/////////////////////////////// // GASNet-EX API Description // /////////////////////////////// // This is *not* a final normative document. // This is "beta documentation" for a work-in-progress. // // This document assumes a reasonable degree of familiarity with the current // (aka GASNet-1) specification: https://gasnet.lbl.gov/dist/docs/gasnet.pdf // // Except where otherwise noted, all definitions in this document // are provided by gasnetex.h. // Document Conventions // // This document includes the annotation [UNIMPLEMENTED] in several places // where we feel we have a suitable design ready for consideration, but // have yet to provide a complete and/or correct implementation. // // This document includes the annotation [EXPERIMENTAL] in several places // where we feel we have a suitable design and an implementation which is // sufficiently complete to be used. However, based on feedback received // from early use, the design may change in non-trivial ways (to the degree // that client code may need to change). // // Specification and release versioning: // // Release version tuple // // This takes the form YEAR.MONTH.PATCH in GASNet-EX releases, // (where YY, MM and PP below represent the appropriate digits) // providing a clear distinction from GASNet-1 with MAJOR==1. #define GASNET_RELEASE_VERSION_MAJOR 20YY #define GASNET_RELEASE_VERSION_MINOR MM #define GASNET_RELEASE_VERSION_PATCH PP // Major and Minor versions of the GASNet-EX specification. // // This is currently a version number for *this* document. #define GEX_SPEC_VERSION_MAJOR 0 #define GEX_SPEC_VERSION_MINOR 17 // Major and Minor versions of the GASNet-1 specification. // // This is the version to which the gasnet_* APIs adhere // and which prevails for all matters which this document // does not (yet) address. #define GASNET_SPEC_VERSION_MAJOR 1 #define GASNET_SPEC_VERSION_MINOR 8 // Major and Minor versions of the GASNet-Tools specification. // // This is the spec version for the GASNet Tools #define GASNETT_SPEC_VERSION_MAJOR 1 #define GASNETT_SPEC_VERSION_MINOR 20 // // Relationship to GASNet-1 APIs: // // This release should continue to support nearly all GASNet-1 APIs, // provided the client #includes , which implements the // GASNet-1 APIs in terms of the new GASNet-EX interfaces. // // Most gasnet_ APIs have gex_ counterparts that are either interoperable, // or which provide a superset of the most closely-related gasnet_ APIs. // // Where a gex_/GEX_ identifier is interoperable or synonymous with // a gasnet_/GASNET_ identifier, that is noted below. // // Hybrid/transitional client support: // // All clients must initialize GASNet using *either* the legacy // gasnet_init()/gasnet_attach() calls *or* the new gex_Client_Init() call // described in a subsequent section. // // Clients who are incrementally adopting GASNet-EX may have a period of time // when they are using deprecated GASNet-1 calls (see below). A process using // such deprecated calls must enable legacy support, which is done implicitly // when a process calls gasnet_init() or explicitly when a caller passes the // GEX_FLAG_USES_GASNET1 flag to gex_Client_Init(). // // The following functions from GASNet-1 have been deprecated in favor of new // GASNet-EX equivalents. These `gasnet_`-prefixed deprecated functions shall // only be invoked if legacy support has been enabled. // + gasnet_AMRequest*() // + gasnet_get*(), including bulk, non-bulk, value-based and VIS // + gasnet_put*(), including bulk, non-bulk, value-based and VIS // + gasnet_memset*() // NOTE: This list is subject to expansion as new GASNet-EX APIs are // implemented which displace less-capable GASNet-1 APIs. // // Note that gasnet_init()/gasnet_attach() may only be called once per process, // and currently only one client per process can use the deprecated GASNet-1 // functions listed above. // #define GEX_FLAG_USES_GASNET1 ((gex_Flags_t)???) // The following API allows jobs that enable legacy support (as described // immediately above) to access the key GASNet-EX objects created explicitly by // gex_Client_Init(), or implicitly by gasnet_init()/gasnet_attach(). The types // and usage of these objects are described below. // // This call is defined in gasnet.h (not gasnetex.h). // // The arguments are all pointers to locations for outputs, each of which // may be NULL if the caller does not need a particular value. // // client_p: receives the gex_Client_t // endpoint_p: receives the gex_EP_t // tm_p: receives the gex_TM_t // segment_p: receives the gex_Segment_t, if any // extern void gasnet_QueryGexObjects(gex_Client_t *client_p, gex_EP_t *endpoint_p, gex_TM_t *tm_p, gex_Segment_t *segment_p); // Calls from restricted context // // The only GASNet functions which may be called within AM handler context, // or while holding a GASNet handler-safe lock are as follows: // // gasnet_mynode(), gasnet_nodes(), gasnet_hsl_*(), gasnet_exit(), // gasnet_QueryGexObjects(), gex_System_QueryNbrhdInfo(), gex_System_QueryHostInfo(), // gex_System_QueryMyPosition(), gex_System_QueryJob{Rank,Size}(), gex_HSL_*(), // gex_*_{Set,Query}CData(), gex_{Client,Segment,EP,TM,AD}_Query*(), gex_TM_Pair(), // gex_AM_Max*(), gex_AM_LUB*(), gex_Token_Max*(), gex_Token_Info(), gasnet_AMGetMsgSource(), // gex_System_GetVerboseErrors(), gex_System_SetVerboseErrors(), // gex_System_QueryMaxThreads(), gex_System_QueryHiddenAMConcurrencyLevel() // // The following are conditionally permitted in handler context, the condition being the // caller must be within an AMRequest handler and not holding a handler-safe lock: // // gasnet_AMReply*(), gex_AM_Reply*() gex_AM_{Prepare,Commit}Reply*(), gex_AM_SrcDesc*() // // The following are conditionally permitted in handler context, the condition // being that the 'flags' argument must include GEX_FLAG_IMMEDIATE: // // gex_EP_QueryBoundSegmentNB() // // All other functions are prohibited to be called from a thread within the // dynamic context of an AM handler, or while holding a handler-safe lock. // This prohibition notably prohibits all communication initiation (aside from Reply // injection from a Request handler), explicit polling and test/wait operations on handles/events. // // Glossary: // The following terms will be used with specific meanings in this document. // // "Collective Call" // // Several APIs in this specification are described as being "collective // calls". All collective calls are collective with respect to a specific // ordered set of participants, which is usually specified by an argument // naming a team (discussed later in detail). The designation of a call // as collective over a given team means: // + For every given team that exists in an execution of the program, all // collective calls made over that team are initiated "in the same order" // by all team members -- otherwise behavior is undefined. // + Here "in the same order" means that for every member of a given team, // the calls over that team and their arguments are "compatible" across all // members at every point in their respective sequence of collective calls // over the team. // + The definition of "compatible" as used here may vary slightly as // defined individually for each call. However, in the absence of per-call // documentation to the contrary the following rules apply: // - The function called must be the same, or from a related group of calls // explicitly documented as mutually compatible. // - Any arguments documented as "single-valued" must be identical across // all callers. // - Any additional argument compatibility constraints documented for a // given call must be satisfied. // // In addition to the requirement on compatibility of collective calls over // any given team, all collective calls over *distinct* teams must be ordered // such that no deadlock would occur if all such calls were replaced by // blocking barriers. A formal specification of this constraint will appear // in a future revision of this document. // "Single-valued" // // This term is used to designate an argument to a collective call as one that // must have the same value on all callers participating in the collective, or // on a well-defined subset of callers. // // In the case of 'flags' arguments, this term may be applied in a qualified // form as "partially single-valued" when the constraint applies only to some // bits (with freedom to differ in the remaining bits). // // Basic types: // // Rank // The type gex_Rank_t is used for a position within, or size of, an ordered // set (such as a team). // Guaranteed to be an unsigned integer type // This type is interoperable with gasnet_node_t typedef [some unsigned integer type] gex_Rank_t; // Pre-defined constant used to indicate "not a rank". // Use may have different semantics in various contexts. // Guaranteed to be larger than any valid rank. // However, a specific value is NOT defined by specification. // In particular, might NOT be equal to GASNET_MAXNODES #define GEX_RANK_INVALID ((gex_Rank_t)???) // "Job rank": // In a non-resilient build this will be the same as the rank in the team // constructed by gex_Client_Init() and will be identical across clients. // This is semantically equivalent to gasnet_mynode(). // // Semantics in a resilient build will be defined in a later release. gex_Rank_t gex_System_QueryJobRank(void); // "Job size": // In a non-resilient build this will be the same as the size in the team // constructed by gex_Client_Init() and will be identical across clients. // This is semantically equivalent to gasnet_nodes(). // // Semantics in a resilient build will be defined in a later release. gex_Rank_t gex_System_QueryJobSize(void); // Utility // By default, certain non-fatal error returns in GASNet-EX will print messages // to the console. This behavior can be queried and set with the following. // Returns non-zero if console messages are enabled for certain non-fatal errors. int gex_System_GetVerboseErrors(); // Enable (1) or disable (0) console messages for certain non-fatal errors. // Values other than 0 and 1 are currently reserved. void gex_System_SetVerboseErrors(int enable); // Threads // The maximum number of live client threads permitted to enter GASNet. // // In threaded (non-SEQ) builds of GASNet, client threads making GASNet calls may // implicitly become associated with thread-specific state managed by the GASNet // library. When such a thread exits, a thread destructor registered by the library // cleans up any associated thread-specific state. The library is permitted to limit // the number of live client threads that may concurrently be implicitly associated // with GASNet-managed state. // // + The limit is per-process and the value returned is for the calling process. // + The limit is process-wide, independent of gex_Client_t. // + Threads internal to GASNet, if any, do not count against this limit. // + Client threads which have not yet entered GASNet do not count against // this limit. // + Client threads which exit after having entered GASNet cease to count // against this limit. // // In a SEQ build of GASNet, this query always returns 1. uint64_t gex_System_QueryMaxThreads(void); // Events // An "Event" is an opaque scalar type, representing a handle // to an asynchronous event that will be generated by a pending operation. // Events are a generalization of GASNet-1 handles, in that // a single non-blocking operation may expose several events // associated with its progress (eg local and remote completion). // Initiation of a event-based (NB-suffix) non-blocking operation will // usually generate one root event (representing the completion // of the entire operation), and zero or more leaf events // (representing completion of intermediate steps). // Root events must eventually be synchronized by passing them // to a Wait or successful Test function, which recycles all the // events (root and leaf) associated with the operation in question. // Leaf events may optionally be synchronized before that point. // This type is interoperable with gasnet_handle_t // - Sync operation: test/wait with one/all/some flavors // + Success consumes the event typedef ... gex_Event_t; // Pre-defined output values of type gex_Event_t // - GEX_EVENT_INVALID // + result for already-completed operation // + synonymous with GASNET_INVALID_HANDLE // + guaranteed to be zero // - GEX_EVENT_NO_OP // + result for a failed communication attempt (eg immediate-mode // injection that encountered backpressure) // + guaranteed to be non-zero // + Erroneous to pass this value to test/wait operations #define GEX_EVENT_INVALID ((gex_Event_t)0) #define GEX_EVENT_NO_OP ((gex_Event_t)???) // Pre-defined input values of type gex_Event_t* // These are passed to communication injection operations // in place of a pointer to an actual gex_Event_t for certain leaf // events, to forgo an independent leaf event and instead request // specific predefined behavior: // - GEX_EVENT_NOW // + Pass to require completion of the leaf event before returning // from the initiation call // - GEX_EVENT_DEFER // + Pass to allow deferring completion of the leaf event to as late // as completion of the root event // - GEX_EVENT_GROUP // + Pass to NBI initiation calls to allow client to use NBI-based // calls to detect event completion (or to use an explicit event // returned/generated by gex_NBI_EndAccessRegion()). #define GEX_EVENT_NOW ((gex_Event_t*)???) #define GEX_EVENT_DEFER ((gex_Event_t*)???) #define GEX_EVENT_GROUP ((gex_Event_t*)???) // Integer flag type used to pass hints/assertions/modifiers to various functions // Flag value bits to a given API are guaranteed to be disjoint, although // flag values used for unrelated functions might share bits. typedef [some integer type] gex_Flags_t; // // Flags for point-to-point communication initiation // // // IMMEDIATE // // This flag indicates that GASNet-EX *may* return without initiating // any communication if the conduit could determine that it would // need to block temporarily to obtain the necessary resources. In // this case calls with return type 'gex_Event_t' return // GEX_EVENT_NO_OP while those with return type 'int' will // return non-zero. // // Additionally, calls with this flag are not required to make any // progress toward recovery of the "necessary resources". Therefore, // clients should not assume that repeated calls with this flag will // eventually succeed. In the presence of multiple threads, it is // even possible that calls with this flag may never succeed due to // racing for resources. // #define GEX_FLAG_IMMEDIATE ((gex_Flags_t)???) // // LC_COPY_{YES,NO} // // This mutually-exclusive pair of flags *may* override GASNet-EX's // choice of whether or not to make a copy of a source payload (of a // non-blocking Put or AM) for the purpose of accelerating local // completion. In the absence of these flags the conduit-specific // logic will apply. // // NOTE: these need more thought w.r.t. the implementation and // specification #define GEX_FLAG_LC_COPY_YES ((gex_Flags_t)???) [UNIMPLEMENTED] #define GEX_FLAG_LC_COPY_NO ((gex_Flags_t)???) [UNIMPLEMENTED] // // PEER_NEVER_{SELF,NBRHD} // [Since spec v0.14] // // These flags, passed to a supporting communication initiation API, assert to // the GASNet-EX library that the '(tm,rank)' tuple (or equivalent) does NOT // name an endpoint in certain processes. // // Use of these flags *may* allow the library to omit its own checks for the // asserted condition. However, to have this desired impact, the compiler must // be capable of statically deciding their presence in the 'flags' argument. // Therefore, non-trivial logic to determine whether or not to pass either of // these flags is strongly discouraged. The intended use case for these flags // is in situations where the asserted property is known without additional // logic at the specific call site. // // Providing an assertion which is untrue will yield undefined results (though // in a high-quality implementation, a debug build will report the discrepancy). // // SELF - Asserts that the "remote" peer in a communication call is not // an endpoint in the initiating process. // NBRHD - Asserts that the "remote" peer in a communication call is not // an endpoint in any process in the initiator's nbrhd. // // Use of GEX_FLAG_PEER_NEVER_NBRHD implies GEX_FLAG_PEER_NEVER_SELF. // However, their use is not mutually exclusive. // // Currently these flags are valid to pass to: // gex_RMA_*() // // A future revision may permit these flags for additional communication // injection calls. #define GEX_FLAG_PEER_NEVER_SELF ((gex_Flags_t)???) #define GEX_FLAG_PEER_NEVER_NBRHD ((gex_Flags_t)???) // // AD_MY_{RANK,NBRHD} // // This mutually-exclusive pair of flags each assert a locality property of // the target of a remote atomic operation, and are described in detail in // the "Remote Atomic Operations" section. // #define GEX_FLAG_AD_MY_RANK ((gex_Flags_t)???) #define GEX_FLAG_AD_MY_NBRHD ((gex_Flags_t)???) // // AD_FAVOR_{MY_RANK,MY_NBRHD,REMOTE} // // This mutually-exclusive group of flags each request that gex_AD_Create() // bias its algorithm selection to favor calls with a given locality property // for the target locations, and are described in detail in the "Remote Atomic // Operations" section. // #define GEX_FLAG_AD_FAVOR_MY_RANK ((gex_Flags_t)???) #define GEX_FLAG_AD_FAVOR_MY_NBRHD ((gex_Flags_t)???) #define GEX_FLAG_AD_FAVOR_REMOTE ((gex_Flags_t)???) // // AD_{ACQ,REL} // // This pair of flags requests memory fencing behaviors for remote atomic // operations, and are described in detail in the "Remote Atomic Operations" // section. It is permitted to include zero, one, or both of these flags // when calling gex_AD_Op*(). // #define GEX_FLAG_AD_ACQ ((gex_Flags_t)???) #define GEX_FLAG_AD_REL ((gex_Flags_t)???) // // RANK_IS_JOBRANK // // This flag indicates, to those calls explicitly documented as accepting it, // that the 'rank' (or equivalent argument) is a jobrank rather than a rank // within the normal associated team. // // Currently this flags is accepted by: // gex_AD_Op*() // #define GEX_FLAG_RANK_IS_JOBRANK ((gex_Flags_t)???) // // AM_PREPARE_LEAST_{CLIENT,ALLOC} [EXPERIMENTAL] // // This pair of mutually exclusive flags modify the behavior of the // gex_AM_Max{Request,Reply}{Medium,Long}() queries to request the largest // legal 'least_payload' argument to the corresponding "gex_AM_Prepare*()" // rather than the default behavior (returning the largest legal 'nbytes' // argument to the corresponding "gex_AM_{Request,Reply}*()"). // // CLIENT - query largest 'least_payload' for a Prepare call with a // client-provided buffer (non-NULL 'client_buf' argument). // ALLOC - query largest 'least_payload' for a Prepare call with a // GASNet-allocated buffer (NULL 'client_buf' argument). // // Legal (and meaningful) in gex_AM_Max{Request,Reply}{Medium,Long}() calls. // Ignored in gex_AM_Prepare{Request,Reply}{Medium,Long}() calls. // Invalid in gex_AM_{Request,Reply}{Medium,Long}*() calls. // #define GEX_FLAG_AM_PREPARE_LEAST_CLIENT ((gex_Flags_t)???) #define GEX_FLAG_AM_PREPARE_LEAST_ALLOC ((gex_Flags_t)???) // SEGMENT DISPOSITION // // The following family of flags assert the segment disposition of // address ranges provided to communication initiation operations. // // The segment disposition flags come in two varieties: // // SELF - describes the segment disposition of addresses associated // with local buffers and the initiating endpoint (ie the EP // which is usually implicitly named by a gex_TM_t argument). // Eg in a Put operation this variety describes source locations, // and in a Get this variety describes destination locations. // // PEER - describes the segment disposition of buffers associated // with (potentially) remote memory and the peer endpoint(s) // (the EPs usually explicitly named by gex_Rank_t arguments). // Eg in a Put operation this variety describes destination locations, // and in a Get this variety describes source locations. // // The following flags are mutually exclusive within each variety - // a given operation may specify at most one SELF flag and one PEER flag. // Unless otherwise noted, the default behavior for each variety in the // absence of an explicitly provided flag corresponds to: // + When the local EP is unbound or bound to host memory: // GEX_FLAG_SELF_SEG_UNKNOWN, GEX_FLAG_PEER_SEG_BOUND // + When the local EP is bound to a device segment: // GEX_FLAG_SELF_SEG_BOUND, GEX_FLAG_PEER_SEG_BOUND // These are backwards-compatible with GASNet-1 segment behavior (where // there is no support for device memory). // NOTE: the flags below are currently [UNIMPLEMENTED], and consequently // these defaults are also the only supported settings for all APIs. // // Each explicit flag has a distinct bit pattern. // Unless otherwise noted, the caller is responsible for ensuring the // assertions expressed by these flags to a given call remain true for // the entire period of time that the described address sequences are "active" // with respect to the operation requested by the call. The definition of // "active" varies based on call type, but generally extends from entry to // the call accepting the assertions until completion is signalled for // all described address ranges. // // {SELF,PEER}_SEG_UNKNOWN // // These flag bits indicate that the corresponding address range(s) // are not known by the caller to reside within current GASNet-EX segments. // Example 1: the address ranges are known to lie partially or entirely // outside any segments in the process hosting the respective endpoint(s). // Example 2: the caller lacks information about the segment disposition // of the address ranges, and passes this flag to reflect a lack of // such assertions and request maximally permissive behavior // (potentially incurring a performance cost). #define GEX_FLAG_SELF_SEG_UNKNOWN ((gex_Flags_t)???) [UNIMPLEMENTED] #define GEX_FLAG_PEER_SEG_UNKNOWN ((gex_Flags_t)???) [UNIMPLEMENTED] // // {SELF,PEER}_SEG_SOME // // These flag bits assert that the corresponding address range(s) // are contained entirely within the union of current GASNet-EX segments // created by any client in the process hosting the respective endpoint. #define GEX_FLAG_SELF_SEG_SOME ((gex_Flags_t)???) [UNIMPLEMENTED] #define GEX_FLAG_PEER_SEG_SOME ((gex_Flags_t)???) [UNIMPLEMENTED] // // {SELF,PEER}_SEG_BOUND // // These flag bits assert that the corresponding address range(s) // are contained entirely within the segment bound to the respective endpoint. // Implies that the respective endpoint has a bound segment. #define GEX_FLAG_SELF_SEG_BOUND ((gex_Flags_t)???) [UNIMPLEMENTED] #define GEX_FLAG_PEER_SEG_BOUND ((gex_Flags_t)???) [UNIMPLEMENTED] // // {SELF,PEER}_SEG_OFFSET // // These flag bits indicate that the corresponding address argument(s) // are byte *offsets* relative to the bound segment base address. // Implies that the respective endpoint has a bound segment, and // that the specified range(s) are contained entirely within that segment. #define GEX_FLAG_SELF_SEG_OFFSET ((gex_Flags_t)???) [UNIMPLEMENTED] #define GEX_FLAG_PEER_SEG_OFFSET ((gex_Flags_t)???) [UNIMPLEMENTED] // COLLECTIVE SCRATCH ALLOCATION // // The following family of flags control the interpretation of address ranges // provided to team construction APIs to describe collective scratch spaces. // // TM_{GLOBAL,LOCAL,SYMMETRIC,NO}_SCRATCH // This mutually-exclusive group indicates the number and meaning of // a gex_Addr_t specified to certain team construction APIs. // [Since spec v0.9:] // GLOBAL: gex_Addr_t per member of the output team // LOCAL: gex_Addr_t per local member of the output team // SYMMETRIC: single gex_Addr_t used for all members of the output team // [Since spec v0.11:] // NO: no gex_Addr_t (and no scratch space is allocated). // #define GEX_FLAG_TM_GLOBAL_SCRATCH ((gex_Flags_t)???) // gex_TM_Create only #define GEX_FLAG_TM_LOCAL_SCRATCH ((gex_Flags_t)???) // gex_TM_Create only #define GEX_FLAG_TM_SYMMETRIC_SCRATCH ((gex_Flags_t)???) // gex_TM_Create only #define GEX_FLAG_TM_NO_SCRATCH ((gex_Flags_t)???) // gex_TM_Create and gex_TM_Split // // SCRATCH_SEG_OFFSET // // This flag bit indicates that the corresponding gex_Addr_t argument(s) // are byte *offsets* relative to the bound segment base address. // Implies that the respective endpoint has a bound segment, and // that the specified range(s) are contained entirely within that segment. #define GEX_FLAG_SCRATCH_SEG_OFFSET ((gex_Flags_t)???) [UNIMPLEMENTED] // GEX_FLAG_GLOBALLY_QUIESCED // [Since spec v0.10] // // This flag bit indicates to the corresponding object destructor call that // the client has satisfied the call's documented global quiescence criteria. // This permits, but does not require, the implementation to elide // synchronization which might otherwise be required. #define GEX_FLAG_GLOBALLY_QUIESCED ((gex_Flags_t)???) // A "token" is an opaque scalar type // This type is interoperable with gasnet_token_t typedef ... gex_Token_t; // Handler index - a fixed-width integer type, used to name an AM handler // This type is interoperable with gasnet_handler_t typedef uint8_t gex_AM_Index_t; // Handler argument - a fixed-width integer type, used for client-defined handler arguments // This type is interoperable with gasnet_handlerarg_t typedef int32_t gex_AM_Arg_t; // Handler function pointer type typedef ... gex_AM_Fn_t; // Widest scalar and width // This type is interoperable with gasnet_register_value_t typedef [some unsigned integer type] gex_RMA_Value_t; // Preprocess-time constant size of gex_RMA_Value_t // Synonymous with SIZEOF_GASNET_REGISTER_VALUE_T #define SIZEOF_GEX_RMA_VALUE_T ... // gex_Addr_t [PROPOSED] // Type which is suitable to hold both addresses and offsets. // // This is always an alias for `void*`, but is given a distinct type to make // prototypes self-documenting with respect to arguments which may (with the // proper flags) be interpreted alternatively as addresses or offsets. typedef void* gex_Addr_t; // Memvec // A "memvec" describes a tuple of memory address and length // gex_Memvec_t is guaranteed to have the same in-memory representation as gasnet_memvec_t; // these two struct types name their fields differently so they are technically // incompatible as far as the compiler is concerned -- it *is* safe to type-pun // pointers to them with explicit casts. typedef struct { void *gex_addr; // [EXPERIMENTAL]: will eventually have type gex_Addr_t size_t gex_len; } gex_Memvec_t; // gex_EP_t is an opaque scalar handle to an Endpoint (EP), // a local representative of an isolated communication context typedef ... gex_EP_t; // Pre-defined value of type gex_EP_t // This zero value is guaranteed never to alias a valid endpoint #define GEX_EP_INVALID ((gex_EP_t)0) // gex_EP_Index_t is an unsigned integer type. // // Every EP within a given gex_Client_t can be uniquely identified by // the jobrank of a process and an endpoint index. // The primordial endpoint, created by gex_Client_Init(), will always have // an index of 0. At this time, there are no other guarantees regarding how // endpoint indices are allocated/assigned. typedef ... gex_EP_Index_t; // Max supported number of endpoints per client in each process. // This is an optimistic compile-time constant which cannot account // for limitations due to scarcity of network resources and/or memory. // The value is implementation-defined and may be conduit-specific. #define GASNET_MAXEPS ... // gex_EP_Location_t is a (rank, ep_index) tuple. typedef struct { gex_Rank_t gex_rank; gex_EP_Index_t gex_ep_index; } gex_EP_Location_t; // gex_Client_t is an opaque scalar handle to a Client, // an instance of the client interface to the GASNet library typedef ... gex_Client_t; // Pre-defined value of type gex_Client_t // This zero value is guaranteed never to alias a valid client #define GEX_CLIENT_INVALID ((gex_Client_t)0) // gex_Segment_t is an opaque scalar handle to a Segment, // a local client-declared memory range for use in communication typedef ... gex_Segment_t; // Pre-defined value of type gex_Segment_t // Used, for instance, to indicate no bound segment #define GEX_SEGMENT_INVALID ((gex_Segment_t)0) // In general, gex_TM_t is an opaque scalar handle to a Team Member, // a collective communication context used for remote endpoint naming. // There is also a less-general form, known as a "TM-pair" which carries only // sufficient information for naming an endpoint in point-to-point communication // or queries. // In collective calls, an argument of type gex_TM_t specifies both an ordered // set of Endpoints (local or remote), and a local gex_EP_t, a local // representative of that team. Use of a TM-pair is prohibited in such calls. // In point-to-point calls the local and remote gex_EP_t are named by a tuple // consisting of one argument of type gex_TM_t and another of type gex_Rank_t // together. Similarly, several queries take a '(tm,rank)' tuple to name an // endpoint. Use of a TM-pair or a fully general gex_TM_t are both permitted // in these non-collective calls. typedef ... gex_TM_t; // Pre-defined value of type gex_TM_t // This zero value will never to alias a valid gex_TM_t (including TM-pairs) #define GEX_TM_INVALID ((gex_TM_t)0) // // Client-Data (CData) // // The major opaque object types in GASNet-EX provide the means for the client // to set and retrieve one void* field of client-specific data for each object // instance, which is NULL for newly created objects. void gex_Client_SetCData(gex_Client_t client, const void *val); void* gex_Client_QueryCData(gex_Client_t client); void gex_Segment_SetCData(gex_Segment_t seg, const void *val); void* gex_Segment_QueryCData(gex_Segment_t seg); void gex_TM_SetCData(gex_TM_t tm, const void *val); void* gex_TM_QueryCData(gex_TM_t tm); void gex_EP_SetCData(gex_EP_t ep, const void *val); void* gex_EP_QueryCData(gex_EP_t ep); // // Operations on gex_Client_t // // Query flags passed to gex_Client_Init() gex_Flags_t gex_Client_QueryFlags(gex_Client_t client); // Query client name passed to gex_Client_Init() const char * gex_Client_QueryName(gex_Client_t client); // Initialize the client // This is a collective call over all processes comprising this GASNet job. // Currently supports only one call per job. // * clientName must reference a string that uniquely identifies this client // within the process, and must match the pattern: [A-Z][A-Z0-9_]+ // The contents of the string referenced by clientName must be single-valued. // In future release this string will be used in such contexts as error messages // and naming of environment variables to control per-client aspects of GASNet. // * argc/argv are optional references to the command-line arguments received by main(). // The caller is permitted to pass NULL for both arguments, if this is done // by all callers. However, providing pointers to the values received in // main() may improve portability or supplementary services. // * client_p, ep_t and tm_p are OUT parameters that receive references to the // newly-created Client, the primordial (thread-safe) Endpoint for this process/client, // and the primordial Team (which contains all the primordial Endpoints, one // for every process in this job). // * flags control the creation of the primordial objects. Supported flags: // + GEX_FLAG_USES_GASNET1 - created client requests the use of GASNet-1 APIs // (defined in gasnet.h). Only permitted for use in one client per process. // Otherwise must be 0. // This is a single-valued parameter. // // There is an implicit barrier synchronization prior to return from this call // to ensure that creation of communications resources has completed on all // callers prior to return on any caller. extern int gex_Client_Init( gex_Client_t *client_p, gex_EP_t *ep_p, gex_TM_t *tm_p, const char *clientName, int *argc, char ***argv, gex_Flags_t flags); // // Operations on gex_Segment_t // // NOTE: *currently* gex_Segment_Attach() is the only way to create a segment // suitable for use as the bound segment of a primordial endpoint (one created // by gex_Client_Init). In particular, the current release does not *yet* // support use of the APIs gex_Segment_Create(), gex_EP_BindSegment() and // gex_EP_PublishBoundSegment() as an alternative to Attach. However, support // for that usage may appear in a future release. // // See also in [PROPOSED] section: // gex_Segment_Create() // gex_EP_BindSegment() // // Query owning client gex_Client_t gex_Segment_QueryClient(gex_Segment_t seg); // Query flags passed when segment was created // There are no segment flags defined in the current release. gex_Flags_t gex_Segment_QueryFlags(gex_Segment_t seg); // Query base address of a segment // For segments created using gex_Create_Segment() with a 'kind' not equal to // GEX_MK_HOST, the return value is a device address. // Otherwise, it is a host address. void * gex_Segment_QueryAddr(gex_Segment_t seg); // Query length of a segment uintptr_t gex_Segment_QuerySize(gex_Segment_t seg); // Collective allocation and creation of Segments // Analogous to gasnet_attach (but see below) // // This is a collective call over the team named by the 'tm' argument that // allocates and binds a local GASNet segment on each caller. // // There is an implicit barrier synchronization prior to return from this call // to ensure that the creation and binding of a segment has completed on all // callers prior to return on any caller. // // segment_p: An OUT parameter that receives the newly created gex_Segment_t. // This is not a single-valued parameter. // tm: The call is collective over this team. // size: Size of the local segment to allocate and bind to the local // Endpoint represented by tm. The value must be a non-zero // multiple of GASNET_PAGESIZE, not larger than // gasnet_getMaxLocalSegmentSize(). // This is not a single-valued parameter. // // The current release allows up to one call per process. // // The current release requires that 'tm' be the team created by // gex_Client_Init(). // // NOTE: gex_Segment_Attach() does not provide alignment of segments across ranks. // Use of --enable-aligned-segments at configure time and definition of // GASNET_ALIGNED_SEGMENTS at compile time are relevant only to the legacy // gasnet_attach() interface. // // NOTE: In the current release, when the legacy GASNET_SEGMENT_EVERYTHING // configuration is in effect, the following additional rules apply: // - In this mode, the primordial endpoint is implicitly bound to the entire // virtual address space by gex_Client_Init(), and this call has no semantic // effect (aside from a barrier synchronization). // - If the optional call is made, the size argument is ignored, and the resulting // gex_Segment_t in `*segment_p` shall be GEX_SEGMENT_INVALID. extern int gex_Segment_Attach( gex_Segment_t *segment_p, gex_TM_t tm, uintptr_t size); // // Operations on gex_TM_t // // Query owning client gex_Client_t gex_TM_QueryClient(gex_TM_t tm); // Query corresponding endpoint gex_EP_t gex_TM_QueryEP(gex_TM_t tm); // Query flags passed when tm was created gex_Flags_t gex_TM_QueryFlags(gex_TM_t tm); // Query rank of team member, and size of team gex_Rank_t gex_TM_QueryRank(gex_TM_t tm); gex_Rank_t gex_TM_QuerySize(gex_TM_t tm); // Split a Team into zero or more disjoint teams // // This is a collective call over the team named by the 'parent_tm' argument // that creates zero or more new teams. While this call is collective, the // arguments are NOT required to be single-valued over the parent team, except // as noted for certain bits in 'flags'. However, the value of 'scratch_size' // (if applicable) must be collective over callers passing the same 'color'. // // + When passing any of the GEX_FLAG_TM_SCRATCH_SIZE_* family of flags, this // call is a collective query to determine the minimum or recommended value // for the 'scratch_size' argument, based on the other parameters (excluding // scratch_addr and scratch_len). No teams are created and nothing is // written into `new_tm_p`. Otherwise, this call creates zero or more teams // as described in the remaining semantics. // + When not operating as a query, the return value is currently undefined. // + Callers passing NULL for 'new_tm_p' do not participate in team creation. // This assists in following the collective call requirement without the // need to create teams that are not needed by the client. // + For callers passing non-NULL for 'new_tm_p', this call creates a new team // consisting of the associated endpoints of all such callers passing the // same value of 'color'. // + Within each newly created team, ranks are assigned (contiguously from // zero) by increasing order of the 'key' argument of the members. In the // case of equal 'key', ties are broken by ranks in the 'parent_tm' team. // In particular this implies that if all ranks pass the same 'key' value, // then relative rank order from the 'parent_tm' is preserved in all created // teams. // + The client may optionally provide scratch space within the bound segment // of the endpoint corresponding to 'parent_tm', for use by the // implementation. No portion of this memory may be written by the client // or passed to any GASNet function, nor may the segment be destroyed, for // the lifetime of the newly created team. When the team is destroyed, // ownership of this memory is returned to the client. // To NOT provide a scratch space, the client must pass 'flags' containing // 'GEX_FLAG_TM_NO_SCRATCH'. // [TBD: what about Unbind of the segment w/o destroying it?] // // new_tm_p: An OUT parameter that receives the gex_TM_t representing the // newly-created team, if any. // parent_tm: The call is collective over this team. // color: A non-negative integer used to match callers to belong to the // same new team. // key: An integer used to order the ranks within newly created teams. // scratch_addr, scratch_size: // If 'GEX_FLAG_TM_NO_SCRATCH' appears in 'flags', then these two // arguments are ignored. Otherwise, the memory // [scratch_addr, scratch_addr+scratch_size) // is granted to the implementation for internal use. // The value of 'scratch_size' must be single-valued over the members // of each new team to be created (non-NULL 'new_tm_p' and same 'color'). // The value of 'scratch_size' must non-zero. // flags: // Single valued: // GEX_FLAG_TM_SCRATCH_SIZE_* // These mutually exclusive flags convert this call into a collective query. // No team is created in the presence of any flag in this family. // - GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED // This query returns the recommended optimal value to be passed in // 'scratch_size' for a subsequent call to gex_TM_Split() with the same // value for the other arguments. In particular, a NULL value of the // 'new_tm_p' indicates the caller will not be a member of any team // created by the subsequent split (and thus the return will be zero). // Return values are guaranteed to be single-valued over the members // of each new team to be created (non-NULL 'new_tm_p' and same 'color'). // - GEX_FLAG_TM_SCRATCH_SIZE_MIN [DEPRECATED at spec version 0.11] // This flag is deprecated and will be removed in a future release. // Use in this release will printing a warning at runtime. // Partially single valued: // GEX_FLAG_TM_NO_SCRATCH // This flag causes creation of a team without a scratch space. The // 'scratch_addr' and 'scratch_size' arguments are ignored. This flag // is intended for use when creating teams which will not perform any // significant collectives, and its use otherwise will most likely // degrade the performance of collectives. // Presence/absence of this flag must be single-valued over the members // of each new team to be created (non-NULL 'new_tm_p' and same 'color'). // Non-single valued: // None currently defined // size_t gex_TM_Split(gex_TM_t *new_tm_p, gex_TM_t parent_tm, int color, int key, void *scratch_addr, size_t scratch_size, gex_Flags_t flags); // Create zero or more new disjoint Teams // [Since spec v0.9] // // This is a collective call which provides the means to construct one or more // teams per call (at most one per caller) with greater generality than the // gex_TM_Split(), including the ability to incorporate endpoints not yet in any // team. // // While this call is collective, the arguments are NOT required to be // single-valued over the parent team, except as noted for certain bits in // 'flags'. However, the value of some arguments must be collective over // callers which comprise the same "output team". // // + Collective over parent_tm, which must contain at least one member for every // process named in the args[] of any caller. // + When flags contains GEX_FLAG_TM_SCRATCH_SIZE_RECOMMENDED (presence of which // must be single-valued over the parent team), this API behaves analogously // to that documented for gex_TM_Split(): returning the recommended size for // the collective scratch space of the team which would otherwise be created // for this caller based on the arguments num_new_tms, numargs and args[], and // ignoring the arguments new_tms, scratch_length and scratch_addrs. // Similarly, passing the (deprecated) GEX_FLAG_TM_SCRATCH_SIZE_MIN returns // the minimum scratch size. // In the absence of these flags, the remaining semantics apply. // + Creates either zero (for numargs == 0) teams or one team (for numargs > 0) // per caller. // + When passing numargs == 0, the caller must provide a value for flags which // is consistent with any "single-valued over the parent team" constraints. // However, all arguments other than parent_tm, numargs and flags are ignored // (and subsequent semantics constraining the ignored arguments do not apply). // + The args[] must contain numargs > 0 distinct elements naming every endpoint // to become a member of the team the caller is creating, in rank order. // + The gex_rank field of args[] specifies a process by jobrank if // GEX_FLAG_RANK_IS_JOBRANK is present in flags, otherwise the gex_rank field // is a rank relative to parent_tm and the process is the one associated with // that team member. // + The presence/absence of GEX_FLAG_RANK_IS_JOBRANK in flags must be // single-valued over the output team. // + The value of numargs and content of args[] must be single-valued over the // output team. // + Taken over all callers, any two non-empty args[] arrays must either be // identical (constructing the same team) or name a disjoint set of endpoints // (creating a distinct, non-overlapping team). A numargs == 0 caller is // always disjoint. // + The immediately preceding restriction applies not only to callers in // distinct processes, but also to the case of multiple callers per process // (due to multiple members in parent_team). // + The value of numargs and content of args[] are not required to be // single-valued over parent_tm, allowing for creation of multiple teams per // collective call (but at most one per caller). // + The endpoint corresponding to parent_tm is not required to be among the // entries in args[]. // + The value of num_new_tms must equal the number of local endpoints named in // args[], and the location named by new_tms[] must have sufficient space to // receive num_new_tms entries. // + On output, the array new_tms[] will be populated with a distinct gex_TM_t // for each local member in the newly created team, in their respective rank // order. No entries will be populated or skipped/reserved for non-local // members. // + Each new team is created with a collective scratch space, which may be // optionally provided from the bound segment of the corresponding endpoint // via the scratch_length and scratch_addrs arguments. // + As with gex_TM_Split(), this "option" is actually required in the current // implementation. // + The argument scratch_length must be single-valued over the output team. // + If GEX_FLAG_SCRATCH_SEG_OFFSET is set in flags, then the value(s) in // scratch_addrs[] are byte offsets into the respective bound segments of the // endpoints being joined into the new team. Otherwise, these values are // virtual addresses in those same bound segments. // + The presence/absence of GEX_FLAG_SCRATCH_SEG_OFFSET in flags must be // single-valued over the output team. // + The length and contents of scratch_addrs[] depends on which of the // following mutually-exclusive values are included in the value of flags // (there is currently no default). // - GEX_FLAG_TM_SYMMETRIC_SCRATCH // There is exactly one entry in scratch_addrs[] and it provides the address // or offset used for all members of the output team. // - GEX_FLAG_TM_LOCAL_SCRATCH // The array scratch_offsets[] has length num_new_tms and provides the // addresses or offsets for each local member in the output team. // - GEX_FLAG_TM_GLOBAL_SCRATCH // The array scratch_offsets[] has length num_args and provides the // addresses or offsets for every member in the output team. // - GEX_FLAG_TM_NO_SCRATCH // The arguments scratch_length and scratch_offsets[] are ignored. // No scratch space is assigned and collectives over this team are prohibited // (this prohibition may be relaxed in the future). // + Scratch space, if any, must always reside in a bound segment with kind // GEX_MK_HOST. Consequently, calls to this team constructor that include // endpoints bound to segments with other memory kinds (such as devices) // currently MUST pass GEX_FLAG_TM_NO_SCRATCH. // This restriction might be relaxed in the future. // + The mutually exclusive choice of // GEX_FLAG_TM_{SYMMETRIC,LOCAL,GLOBAL,NO}_SCRATCH in flags must be // single-valued over the output team. // + This call is guaranteed to provide sufficient synchronization that each // caller may begin using the new handles in new_tms[] immediately following // return. If flags included GEX_FLAG_TM_LOCAL_SCRATCH then this call provides // barrier synchronization individually over each new team created by the call. // In all other cases the implementation is permitted but not required to include // barrier synchronization, which may or may not be necessary to allow immediate // use of the resulting team. // // NOTE: The current implementation only supports creation of teams composed // entirely of primordial endpoints, even with conduits which support creation // of additional endpoints. This limitation will be removed in a later release. size_t gex_TM_Create( gex_TM_t *new_tms, // OUT size_t num_new_tms, // Length of new_tms gex_TM_t parent_tm, gex_EP_Location_t *args, // IN size_t numargs, // single-valued over output team gex_Addr_t *scratch_addrs, // IN size_t scratch_size // single-valued over output team gex_Flags_t flags); // Flags (partially single-valued) // Destroy a (quiesced) team // [Since spec v0.10] // // This is a collective call to destroy a team which is no longer needed and // reclaim associated resources. // // + This call is collective over members of the team named by tm. // + Destroys the team, releasing resources allocated to it by the // implementation. // + It is erroneous to destroy the primordial team. // + Use of tm after return from this call is erroneous. // + Does not destroy the endpoint associated with tm. // + For the purpose of this API, a tm has been "locally quiesced" only when // all of the following are true with respect to calls initiated on the local // process: // - No calls taking this tm as an argument are executing concurrently on // other threads. // - All collective operations using this tm are complete (client has synced // their gex_Event_t's). // - Any gex_AD_t objects created using this tm have been destroyed. // + The identifier GEX_FLAG_GLOBALLY_QUIESCED is a preprocessor macro // expanding to a constant integer expression suitable for use as a value of // type gex_Flags_t. // + By default, the tm must be locally quiesced on *each* caller before it may // invoke this API. However, if GEX_FLAG_GLOBALLY_QUIESCED is passed in // flags, then the caller is additionally asserting that the tm has been // quiesced on *all* callers (globally) prior to any caller invoking this API. // + The presence/absence of GEX_FLAG_GLOBALLY_QUIESCED in flags must be // single-valued. // + Regardless of the presence/absence of GEX_FLAG_GLOBALLY_QUIESCED in flags, // this call is permitted, but not required, to incur barrier synchronization // across tm. // + The scratch_p argument may be NULL. If non-NULL then if-and-only-if the // collective scratch space used by the team was provided by the client, then // its location is written to the location named by the scratch_p argument. // + If a value is written to *scratch_p then return value is non-zero. // Otherwise, zero is returned. // + [UNIMPLEMENTED] If GEX_FLAG_SCRATCH_SEG_OFFSET is set in flags, then the // value (if any) written to the gex_addr field of *scratch_p is assigned the // byte offset into the bound segment of the endpoint associated with tm. // Otherwise, the value (if any) assigned to this field is a virtual address. // + The presence/absence of GEX_FLAG_SCRATCH_SEG_OFFSET in flags need not be // single-valued, and need not match the value used at team construction. // + Any cleanup action with respect to ClientData associated with the tm is // the client's responsibility. // // The specification of GEX_FLAG_GLOBALLY_QUIESCED is intended to make the // synchronization optional in order to remove unnecessary barriers. For // instance given a scenario in which a client has a "row team" and a "column // team" with a common parent, it would be sufficient to locally quiesce both // teams, followed by a barrier over their common parent, followed by making // back-to-back calls to destroy these row and column teams with this flag. // // The definition of "locally quiesced" intentionally excludes completion of // non-blocking point-to-point operations using tm at their initiation. This // is possible because the semantics of such operations depend on the endpoints // involved, and not on the tm used to name them. // // The optional scratch_p argument is intended to assist the client in // reclaiming use of the space it may have granted to the collectives // implementation when the team was created, without creating a requirement // for the client to track something GASNet-EX already tracks. int gex_TM_Destroy( gex_TM_t tm, gex_Memvec_t *scratch_p, // OUT gex_Flags_t flags); // Create an "ad hoc" TM for point-to-point communication // [Since spec v0.12] // // This API provides the means to locally construct a value which can be passed // as the tm argument to point-to-point communication calls in lieu of a // collectively created team, allowing communication between endpoints which // might not be members of any common team (or of any team at all). // // With the exception of AM Replies, all GASNet-EX point-to-point // communications APIs name both the local and remote endpoints using a pair of // arguments of type gex_TM_t and gex_Rank_t. However, a gex_TM_t // corresponding to a team has associated semantics that are not well-suited to // inclusion of endpoints which lack corresponding host CPU threads to perform // collective calls. This API allows for communication to/from the memory in // segments bound to any endpoint in the job without the need include it in // a team. // // + This is not a collective operation. // + Returns a value of type gex_TM_t representing an ad hoc "TM-pair" // consisting of the given local_ep in the calling process and the endpoint // with index remote_ep_index in the process with a jobrank given by the rank // argument passed along with this gex_TM_t in a point-to-point communication // call. // + gex_TM_Pair is a lightweight, non-communicating utility call. // + The result is a TM-pair value which may be stored, reused or discarded, // and has no corresponding free or release call (although it only remains // valid for use while the referenced endpoints exist). // + Two TM-pair values will compare equal if and only if they were created by // calls to gex_TM_Pair() with the same arguments, and will never compare // equal to a gex_TM_t created by other means. // + The result is not a valid argument to any API with a prefix of gex_TM_, // gex_AD_ or gex_Coll_, nor to any API documented as collective over the // argument (regardless of prefix). // + The result is valid for use in AM payload limit queries: // gex_AM_Max{Request,Reply}{Medium,Long}() // + The result is valid for use in bound segment queries: // gex_Segment_QueryBound() [DEPRECATED] and gex_EP_QueryBoundSegmentNB() // + The result is valid for use in point-to-point communication calls in the // gex_RMA_*(), gex_VIS_*() and gex_AM_*() families when used in a manner // similar to what is shown in examples below. // // Example 1. // A call to gex_RMA_GetNBI() to read from the endpoint with index rem_idx on // the process with the given jobrank, and initiated using the local endpoint // loc_ep: // gex_RMA_GetNBI(gex_TM_pair(loc_ep, rem_idx), dest, jobrank, src, nbytes, flags); // // Example 2. // Communicating between a local endpoint ep0 and the remote endpoints with // index 1 in several processes, using a single TM-Pair: // gex_TM_t tm_pair_01 = gex_TM_pair(ep0, 1); // for (int i = 0; i < num_peers; ++i) // gex_RMA_GetNBI(tm_pair_01, dest[i], jobrank[i], src[i], nbytes, flags); gex_TM_t gex_TM_Pair( gex_EP_t local_ep, gex_EP_Index_t remote_ep_index); // Translations between (tm,rank) and jobrank // // These functions provide translations in either direction between a // (tm,rank) pair and a jobrank. // // gex_Rank_t gex_TM_TranslateRankToJobrank(tm, rank) // Returns the jobrank of the endpoint in 'tm' with the given 'rank'. // Requires 0 <= rank < gex_TM_QuerySize(tm) // gex_Rank_t gex_TM_TranslateJobrankToRank(tm, jobrank) // If there is an endpoint in 'tm' with the given 'jobrank', return its // rank in 'tm'. Otherwise, returns GEX_RANK_INVALID. // Requires 0 <= jobrank < gex_System_QueryJobSize() // // These queries MAY communicate. // [TBD: exception for 'self' in one both directions?] // These calls are not legal in contexts which prohibit communication, // including (but not limited to) AM Handler context or when holding an HSL. // gex_Rank_t gex_TM_TranslateRankToJobrank(gex_TM_t tm, gex_Rank_t rank); gex_Rank_t gex_TM_TranslateJobrankToRank(gex_TM_t tm, gex_Rank_t jobrank); // Translation from (tm,rank) to gex_EP_Location_t // // This function provides translation from a (tm,rank) pair to a // gex_EP_Location_t, which is a (jobrank,epidx) pair. // // tm: A valid gex_TM_t // rank: The rank of some member of tm. // Requires 0 <= rank < gex_TM_QuerySize(tm). // flags: Flags are reserved for future use and must currently be zero // // Returns: A gex_EP_Location_t describing the given member of tm. // // This query MAY communicate. // [TBD: exception for 'self' in one both directions?] // This call is not legal in contexts which prohibit communication, // including (but not limited to) AM Handler context or when holding an HSL. // gex_EP_Location_t gex_TM_TranslateRankToEP( gex_TM_t tm, gex_Rank_t rank, gex_Flags_t flags); // // Operations on gex_EP_t // NOTE: currently gex_Client_Init() is the only way to create an EP. // However, additional APIs for EP creation will be added. // // Query owning client gex_Client_t gex_EP_QueryClient(gex_EP_t ep); // Query flags passed when ep was created gex_Flags_t gex_EP_QueryFlags(gex_EP_t ep); // Query the bound segment // Newly-created EPs have no bound segment and will yield GEX_SEGMENT_INVALID. gex_Segment_t gex_EP_QuerySegment(gex_EP_t ep); // Query the endpoint index gex_EP_Index_t gex_EP_QueryIndex(gex_EP_t ep); // Query addresses and length of a (possibly remote) bound segment // [Since spec v0.13] // // This query takes a gex_TM_t and gex_Rank_t, which together name an endpoint. // Other than flags, the remaining arguments are pointers to locations for // outputs, each of which may be NULL if the caller does not need a particular // value. // // If the value of flags does NOT include GEX_FLAG_IMMEDIATE, then this API // behaves as follows: // + The return value is a root event which can be successfully synchronized // (return from gex_Event_Wait*() or zero return from gex_Event_Test*()) // once the query results have been written to the output locations. // It is permitted to be GEX_EVENT_INVALID (but not GEX_EVENT_NO_OP). // + Between entering this call and synchronizing the event it returns, the // content of the output locations is undefined. // + A "successful" query is one in which the endpoint named by (tm, rank) has a // bound segment *and* one or more of the following are true: // + The endpoint resides in the calling process // + The endpoint has a segment that was bound via gex_Segment_Attach() // + The endpoint had the bound segment at the time it was the subject of a // preceding call to gex_EP_PublishBoundSegment() in which the calling // process was a participant. // + A successful query writes the corresponding segment's properties to each of // the non-NULL output locations as described in "Segment properties and output // locations", below. // + If the endpoint named by (tm, rank) does not satisfy the above conditions // for a successful query, then the query may be "unsuccessful", whereby the // size_p output (unless NULL) will receive the value 0 and the remaining // outputs are undefined. The implementation is thus permitted, but not // required, to be successful for a non-primordial bound segment which has not // yet been published to the calling process. // + Since a segment cannot have zero-length, a caller can reliably distinguish // between a successful or unsuccessful query via the size_p output. // + The current definition of "unsuccessful" notably includes the case of a // remote endpoint with a bound segment which has not been published to the // calling process. However, the behavior for this case is subject to // possible change in a future release. // // In the case that flags DOES include GEX_FLAG_IMMEDIATE, then this API // behaves as follows: // + If the query can be resolved without communication, then the return value // is GEX_EVENT_INVALID, with the behavior otherwise identical to the case // without GEX_FLAG_IMMEDIATE. // + Queries which would require communication to resolve will return // GEX_EVENT_NO_OP. // + All queries for which (tm, rank) names an endpoint which resides in the // calling process are guaranteed to return GEX_EVENT_INVALID. // + Queries for which (tm, rank) names an endpoint which does not reside in the // calling process may return either GEX_EVENT_INVALID or GEX_EVENT_NO_OP and // the same query is not guaranteed to return the same value each time. // This permits an implementation to cache information for remote endpoints. // // Segment properties and output locations: // owneraddr_p: receives the address of the segment in the address space // of the process which owns the segment. // For segments of kind GEX_MK_HOST, this is a host address // while for all other kinds this is a device address. In // either case it is the address which would be returned by // gex_Segment_QueryAddr() immediately after segment creation // (via either gex_Segment_Attach() or gex_Segment_Create()). // localaddr_p: receives the address of the segment in the address space // of the calling process, *if* mapped, and NULL otherwise. // size_p: receives the length of the segment. // // Only segments of kind GEX_MK_HOST may report a non-NULL localaddr property, // and all other kinds will yield NULL. The current release additionally // limits the reporting of non-NULL values to primordial segments (those // created by gex_Segment_Attach()). // // Passing GEX_RANK_INVALID as the rank argument is *not* permitted. // Use of a TM-pair for the 'tm' argument *is* permitted. // Passing a '(tm,rank)' tuple naming an endpoint residing on the calling // process *is* permitted. // // When passing a '(tm,rank)' tuple naming an endpoint not residing on the // calling process, this query MAY communicate unless GEX_FLAG_IMMEDIATE is // included in flags. // If and only if GEX_FLAG_IMMEDIATE is included in flags, then this call is // permitted in contexts which prohibit communication (such as AM Handler // context or when holding an HSL). extern gex_Event_t gex_EP_QueryBoundSegmentNB( gex_TM_t tm, gex_Rank_t rank, void **owneraddr_p, void **localaddr_p, uintptr_t *size_p, gex_Flags_t flags); // Query addresses and length of a (possibly remote) bound segment // [DEPRECATED since spec v0.13 - see gex_EP_QueryBoundSegmentNB(), above] // // This query provides semantics similar to // gex_Event_Wait( gex_EP_QueryBoundSegmentNB([...args...], 0) ) // where "[...args...]" represent the five arguments to this query. // // The semantic differences are as follows: // + Success/failure // - This call returns zero for a "successful" query, defined as one in which // (tm, rank) names an endpoint with a bound segment (and, if remote, that // segment is primordial or has been published to the caller). Otherwise, // a non-zero value is returned. // - An successful query with gex_EP_QueryBoundSegmentNB() is distinguishable // by a non-zero size output, while an unsuccessful query will write zero // to the size output. // + Preservation of outputs on failure // - This call guarantees that an unsuccessful query leaves the outputs // unmodified. // - An unsuccessful query with gex_EP_QueryBoundSegmentNB() writes zero to // the size output and leaves the others undefined. // // This call is not legal in contexts which prohibit communication, including // (but not limited to) AM Handler context or when holding an HSL. int gex_Segment_QueryBound( gex_TM_t tm, gex_Rank_t rank, void **owneraddr_p, void **localaddr_p, uintptr_t *size_p); // Publish of EP's Bound Segment "RMA Credentials" // // Description: // Some conduits require "credentials" to initiate communication targeting // the bound segment of a remote endpoint. This call performs any // communication and setup necessary to ensure that after successful return // the local process may safely initiate such communication with any // endpoint named in this call which had a bound segment at the time of // this call. // // Semantics: // + On success, returns GASNET_OK. // + Non-fatal failures return a documented error code. // + Lack of sufficient resources to satisfy the given request will yield a // return of GASNET_ERR_RESOURCE. // + This call is collective over tm, which identifies a team used for // underlying communication. // + The eps argument is an array of length num_eps (possibly zero) of valid // endpoints. // + The num_eps argument may vary by caller (it is not required to be // single-valued). // + This call publishes the bound segments, if any, of the endpoints named // by the eps argument. // + The endpoint associated with tm is not implicitly Published, but it may // be explicitly included in eps if Publication is desired. // + The concatenation of eps arrays must name distinct endpoints. // Duplication is prohibited both within a given eps array, and across eps // arrays passed by multiple tm (from the same team) within a given // process. This restriction may be relaxed in a future release. // + Upon successful return, the local process may safely initiate // communication targeting the bound segment of any endpoint named by the // eps arguments which had a bound segment prior to the corresponding entry // to this collective call. // + It is permitted for eps to contain endpoints without a bound segment, in // which case no credential will be published for such endpoints. // + It is permitted for the same endpoint to be the subject of multiple // successive Publish operations and any bound segment will replace a prior // Publish in which an endpoint had no bound segment. // + The allowance for multiple Publish operations includes the one implicit // in gex_Segment_Attach(). // + The endpoints named by eps must be idle for the duration of this operation. // - No communication operations may be in-flight on any named endpoint // when this operation starts. // - No communication operations may be initiated on any named endpoint // concurrent with this operation. // - No AM Request may target any named endpoint for the duration of this // operation. // - As an exception to the restrictions above, inclusion of the endpoint // associated with tm in eps is explicitly permitted. // - A named endpoint may not be the subject of concurrent segment // operations including (but not limited to) gex_Segment_QueryBound, // gex_EP_BindSegment, gex_EP_PublishBoundSegment, and // gex_EP_QueryBoundSegmentNB. // + The publication of credentials is per local process and remote endpoint, // independent of the specific team used to perform this operation. This // means that upon return, initiation of communication is permitted using // any (tm_x, rank) pair from a participating process naming a participating // remote endpoint, including initiation using a gex_TM_t created using // gex_TM_Pair(). Additionally, this persists beyond destruction of the // team used to Publish. // + The flags argument is reserved for future use and must currently be // zero. // + This call is permitted but not required to incur barrier synchronization // across the team. extern int gex_EP_PublishBoundSegment( gex_TM_t tm, gex_EP_t *eps, // IN size_t num_eps, gex_Flags_t flags); // Minimum permitted fixed index for AM handler registration. // Applies to both gasnet_attach() and gex_EP_RegisterHandlers(). // An integer constant, guaranteed to be 128 or less. #define GEX_AM_INDEX_BASE ??? // Implementation-induced concurrency of AM handlers // This value (always defined) has a non-zero value iff the implementation // may run AM handlers on a thread not owned by the client // (and in particular, concurrently with the client when no client // thread is inside a synchronous call to GASNet). // Note this is orthogonal to SEQ/PAR/PARSYNC mode - in particular, in PAR // mode multiple client threads concurrently entering GASNet may result // in AM handler concurrency, independent of this value. #define GASNET_HIDDEN_AM_CONCURRENCY_LEVEL ??? // Returns the runtime value of AM concurrency level for the calling process // which may be more precise than the conservative static value provided by // GASNET_HIDDEN_AM_CONCURRENCY_LEVEL // Only valid after gex_Client_Init() // [Since spec v0.14] int gex_System_QueryHiddenAMConcurrencyLevel(void); // Client-facing type for describing one AM handler // This type is an alternative to (*not* interchangeable with) gasnet_handlerentry_t // // gex_index may either be in the range [GEX_AM_INDEX_BASE .. 255] to register // at a fixed index, or 0 for "don't care" (see gex_EP_RegisterHandlers() for // more information on this case). // // The gex_nargs and gex_flags fields are used by the client to supply the implementation // with assertions regarding the future invocations and behavior of each AM handler. // If a handler invocation (eg via an AM injection targeting a given handler) or // execution of an AM handler violates its registration assertions, behavior is undefined. typedef struct { gex_AM_Index_t gex_index; // 0 or in [GEX_AM_INDEX_BASE .. 255] gex_AM_Fn_t gex_fnptr; // Pointer to the handler on this process gex_Flags_t gex_flags; // Incl. required S/M/L and REQ/REP, see below unsigned int gex_nargs; // Required in [0 .. gex_AM_MaxArgs()] // Optional fields (both are "shallow copy") const void *gex_cdata; // Available to handler const char *gex_name; // Used in debug messages } gex_AM_Entry_t; // Required flags for gex_flags field when registering AM handlers. // // When registering AM handlers, the gex_flags field of each // gex_AM_Entry_t must indicate how the handler may be called. // This requires ORing one constant from each of the following // two groups. // AM Category Flags: #define GEX_FLAG_AM_SHORT ??? // Called only as a Short #define GEX_FLAG_AM_MEDIUM ??? // Called only as a Medium #define GEX_FLAG_AM_LONG ??? // Called only as a Long #define GEX_FLAG_AM_MEDLONG ??? // Called as a Medium or Long // AM Request/Reply Flags: #define GEX_FLAG_AM_REQUEST ??? // Called only as a Request #define GEX_FLAG_AM_REPLY ??? // Called only as a Reply #define GEX_FLAG_AM_REQREP ??? // Called as a Request or Reply // gex_EP_RegisterHandlers() // // Registers a client-provided list of AM handlers with the given EP, with // semantics similar to gasnet_attach(). However, unlike gasnet_attach() // this function is not collective and does not include an implicit barrier. // Therefore the client must provide for any synchronization required to // ensure handlers are registered before any process may send a corresponding // AM to the Endpoint. // // May be called multiple times on the same Endpoint to incrementally register handlers. // Like gasnet_attach() the handler indices specified in the table (other than // "don't care" zero indices) must be unique. That now extends across multiple // calls on the same gex_EP_t (though provisions to selectively relax this // restriction are planned for a later release). // // Registration of handlers via a call to gasnet_attach() does *not* preclude // use of this function to register additional handlers. // // As in GASNet-1, handlers with a handler index (gex_index) of 0 on entry are // assigned values by GASNet after the non-zero (fixed index) entries have been // registered. While GASNet-1 leaves the algorithm for the assignment // unspecified (only promising that it is deterministic) this specification // guarantees that entries with gex_index==0 are processed in the same order // they appear in 'table' and are assigned the highest-numbered index which is // then still unallocated (where 255 is the highest possible). However, in // the case of concurrent calls to gex_EP_RegisterHandlers() and/or // gasnet_attach() on the same endpoint with gex_index==0, the order in which // such entries are processed is unspecified and may be non-deterministic. // // Updating of gex_index fields that were passed as 0 upon input is the only // modification this function will perform upon the contents of 'table' // (whose elements are otherwise treated as if const-qualified by this call). // Upon return from this function, the relevant information from 'table' // has been copied into storage internal to the endpoint implementation, // and the client is permitted to overwrite or free the contents of 'table'. // // If any sequence of calls attempts register a total of more than (256 - // GEX_AM_INDEX_BASE) handlers to a single gex_EP_t, the result is undefined // // Returns: GASNET_OK == 0 on success int gex_EP_RegisterHandlers( gex_EP_t ep, gex_AM_Entry_t *table, size_t numentries); // // Active Message (AM) limit queries // // Maximum number of supported AM arguments // Semantically identical to gasnet_AMMaxArgs() unsigned int gex_AM_MaxArgs(void); // Maximum payload size queries // Superset of gasnet_AMMax{Medium,LongRequest,LongReply}() // // This family of calls provide maximum payload queries of two types: // + In the absence of the GEX_FLAG_AM_PREPARE_LEAST_{CLIENT,ALLOC} flags, // these queries return the maximum legal 'nbytes' argument value for the // corresponding gex_AM_{Request,Reply}{Medium,Long}*() call (collectively // known as "fixed-payload AM" injection calls) using the named local and // remote endpoint and the same 'lc_opt', 'numargs' and 'flags' arguments. // + When passed either of the GEX_FLAG_AM_PREPARE_LEAST_{CLIENT,ALLOC} flags, // these queries return the maximum legal 'least_payload' argument value for // the corresponding gex_AM_Prepare{Request,Reply}{Medium,Long}() call // (collectively known as "negotiated-payload AM" prepare calls) using the // named local and remote endpoint and the same 'lc_opt', numargs' and 'flags' // arguments. // // 1. If 'tm' names a local endpoint which is not AM-capable, then the call // is erroneous. Here "AM-capable endpoint" is defined as any primordial // endpoint or a non-primordial endpoint which was created with // GEX_EP_CAPABILITY_AM. // 2. When (other_rank != GEX_RANK_INVALID) // a. The result of each query is a function of the 'numargs', 'lc_opt' and // 'flags' arguments, and the two endpoints (one local and one remote) // named by the tuple consisting of the 'tm' and 'other_rank' arguments. // b. The result is independent of *how* the endpoints are named, such as by // distinct 'tm' values with overlapping membership or use of a TM-pair. // c. If the remote endpoint named by the '(tm,other_rank)' tuple is not // AM-capable or does not exist (only possible with a TM-pair), then the // call is erroneous. // 3. When (other_rank == GEX_RANK_INVALID) // a. The result of each query is a min-of-maxes over all AM-capable remote // endpoints that are addressable with the given 'tm' when 'other_rank' // is varied over its valid range, with the given 'numargs', 'lc_opt' and // 'flags' arguments. // b. This valid range excludes any endpoints which are not AM-capable. // c. In the case that 'tm' is a TM-pair, the valid range also excludes // jobranks which do not have an endpoint at the associated remote // endpoint index. // d. If valid range defined above is empty (no AM-capable endpoints are // addressable), then the call is erroneous. // 4. The result of each query function is guaranteed to be symmetric with // respect to exchanging the local and remote endpoints. Two calls, by // appropriate processes, that reverse the local and remote endpoint roles // while keeping all other input arguments equal, are guaranteed to return // the same value. Note this does NOT imply any relationship between the // results of different query functions (eg MaxRequestMedium versus // MaxReplyMedium). // 5. When (other_rank == GEX_RANK_INVALID) all callers providing a 'tm' naming // the same set of participating endpoints are guaranteed to get the same // result when given the same values for the other input arguments. Due to // the symmetry noted above, this includes two calls using TM-pairs to // identify the same two endpoints. // 6. Due to the symmetry properties described above, 'other_rank' can (and // therefore should) always name the other party in the communication, // regardless of whether that rank or the caller is to be the sender or the // receiver. // 7. 'numargs' must be between 0 and gex_AM_MaxArgs(), inclusive. It is // guaranteed that increasing 'numargs' will produce monotonically non- // increasing results when all other parameters are held fixed. // 8. 'lc_opt' indicates the payload local completion option to be used for // the AM injection or prepare call in question. The predefined constants // GEX_EVENT_NOW and GEX_EVENT_GROUP should be used directly, while a // pointer to any variable of type gex_Event_t (or a NULL pointer) may be // used interchangeably to indicate that the injection or prepare call // passes any such value (without requiring that the same pointer value be // passed). // 9. 'flags' indicates the flags that will be provided to the corresponding // AM injection or prepare function (and should not to be confused with the // handler registration flags). The result of the query is only guaranteed // to be correct for an injection of prepare call with exactly the same // 'flags', excepting only that the GEX_FLAG_AM_PREPARE_LEAST_* flags may // be omitted from a prepare call. // // The result of all four query functions is guaranteed to be at least 512 (bytes). // // The result is guaranteed to be stable throughout a given job execution - ie // for the same set of input arguments, it will always return the same value. // // Aside from the explicit guarantees above, the result may otherwise vary with // the input arguments in unspecified ways, and thus only defines the documented // limit for an call with corresponding local and remote endpoints and values of // lc_opt, flags and numargs. For example, limits often vary between different // conduits and may also vary based on job layout, between pairs of ranks in // the same team, or between different pair of endpoints linking the same two // processes. size_t gex_AM_MaxRequestLong( gex_TM_t tm, gex_Rank_t other_rank, const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); size_t gex_AM_MaxReplyLong( gex_TM_t tm, gex_Rank_t other_rank, const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); size_t gex_AM_MaxRequestMedium( gex_TM_t tm, gex_Rank_t other_rank, const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); size_t gex_AM_MaxReplyMedium( gex_TM_t tm, gex_Rank_t other_rank, const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); // Token-specific max fixed-payload queries for specific nargs, lc_opt and flags // // Semantics are identical to the may payload queries above, except that // a gex_Token_t replaces the (tm,rank) tuple. The token names the local // endpoint on which the AM has been received and the remote endpoint which // sent it. In particular, this implies the queries return the limits // governing the AM Reply operations that can be performed using this token. // // These are only permitted in Request handlers. size_t gex_Token_MaxReplyLong( gex_Token_t token, const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); size_t gex_Token_MaxReplyMedium( gex_Token_t token, const gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); // Least-upper-bound fixed-payload queries (unknown team/peer, nargs, lc_opt and flags) // Guaranteed to be less than or equal to the result of the corresponding AM_Max* // function, for all valid input parameters to that function (excluding use of the // GEX_FLAG_AM_PREPARE_LEAST_* flags). // The result of all four query functions is guaranteed to be at least 512 (bytes). // These functions correspond semantically to the gasnet_AMMax*() queries in GASNet-1, // which return a globally conservative maximum. size_t gex_AM_LUBRequestLong(void); size_t gex_AM_LUBReplyLong(void); size_t gex_AM_LUBRequestMedium(void); size_t gex_AM_LUBReplyMedium(void); // // AM Token Info // // Struct type for gex_Token_Info queries contains *at least* the following // fields, in some *unspecified* order typedef struct { // "Job rank" of the sending process, as defined with the description // of gex_System_QueryJobRank(). gex_Rank_t gex_srcrank; // Destination (receiving) endpoint gex_EP_t gex_ep; // Entry describing the currently-running handler corresponding to this token. // The referenced gex_AM_Entry_t object resides in library-owned storage, // and should not be directly modified by client code. // If handler was registered using the legacy gasnet_attach() call, this // value may be set to a valid pointer to a gex_AM_Entry_t, with undefined // contents. const gex_AM_Entry_t *gex_entry; // 1 if the current handler is a Request, 0 otherwise. [some integral type] gex_is_req; // 1 if the current handler is a Long, 0 otherwise. [some integral type] gex_is_long; } gex_Token_Info_t; // Bitmask constants to request specific info from gex_Token_Info(): // All listed constants are required, but the corresponding queries // are divided into Required ones and Optional ones (with the // exception of GEX_TI_ALL). typedef [some integer type] gex_TI_t; // REQUIRED: All implementations must support these queries: #define GEX_TI_SRCRANK ((gex_TI_t)???) # required since spec v0.1 #define GEX_TI_EP ((gex_TI_t)???) # required since spec v0.1 // OPTIONAL: Some implementations might not support these queries: #define GEX_TI_ENTRY ((gex_TI_t)???) # optional since spec v0.1 #define GEX_TI_IS_REQ ((gex_TI_t)???) # optional since spec v0.1 #define GEX_TI_IS_LONG ((gex_TI_t)???) # optional since spec v0.1 // Convenience: all defined queries (Required and Optional) #define GEX_TI_ALL ((gex_TI_t)???) # required since spec v0.1 // Support indicators for Optional token into queries // Available since spec v0.17 // // GASNET_SUPPORTS_TI_* preprocessor identifiers are defined to 1 or undefined // to indicate whether (or not, respectively) the implementation of // gex_Token_Info() supports the corresponding query for all valid tokens. // // When any of these is defined for an Optional query, it is an indication that // the current implementation of the current conduit supports the // corresponding query. However, it is not a guarantee of such support in // other conduits or in future releases of the current conduit. // // When any of these is undefined, the implementation is still permitted to // support the query conditionally. For instance, the shared-memory transport // may support an Optional query that is not supported for AMs travelling // outside of the shared-memory nbrhd, or vice-versa. #define GASNET_SUPPORTS_TI_SRCRANK 1 #define GASNET_SUPPORTS_TI_EP 1 #define GASNET_SUPPORTS_TI_ENTRY 1 or undefined #define GASNET_SUPPORTS_TI_IS_REQ 1 or undefined #define GASNET_SUPPORTS_TI_IS_LONG 1 or undefined // Takes a token, address of client-allocated gex_Token_Info_t, and a mask. // The mask is a bit-wise OR of GEX_TI_* constants, which indicates which // fields of the gex_Token_Info_t should be set by the call. // // The return value is of the same form as the mask. // The implementation is permitted to set fields not requested by the // caller to valid or *invalid* values. The returned mask will indicate // which fields contain valid results, and may include bits not present // in the mask. // // Each GEX_TI_* corresponds to either a Required or Optional query. // When a client requests a Required query, a conforming implementation // MUST set these fields and the corresponding bit in the return value. // An Optional query may not be implemented on all conduits or all // configurations, or even under various conditions (e.g. may not be // supported in a Reply handler). If the client makes an Optional request // the presence of the corresponding bit in the return value is the only // indication that the struct field is valid. extern gex_TI_t gex_Token_Info( gex_Token_t token, gex_Token_Info_t *info, gex_TI_t mask); // // Fixed-payload AM APIs // // NOTE 0: Prototypes in this section are "patterns" // // These API instantiate the "[M]" at the end of each prototype with // the integers 0 through gex_AM_MaxArgs(), inclusive. // The '[,arg0, ... ,argM-1]' then represent the arguments // (each of type gex_AM_Arg_t). // Additionally, on compilers supporting the __VA_ARG__ preprocessor feature // (added in C99 and C++11) the "[M]" may optionally be omitted entirely and // is inferred based on the argument count. // // NOTE 1: Return value // // An AM Request or Reply call is a "no op" IF AND ONLY IF the value // GEX_FLAG_IMMEDIATE is included in the 'flags' argument AND the // conduit could determine that it would need to block temporarily to // obtain the necessary resources. This case is distinguished by a // non-zero return. In all other cases the return value is zero. // // In the "no op" case no communication has been performed and the // contents of the location named by the 'lc_opt' argument (if any) is // undefined. // // NOTE 2: The 'lc_opt' argument for local completion // // The AM interfaces never detect or report remote completion, but do // have selectable behavior with respect to local completion (which // means that the source buffer may safely by written, free()ed, etc). // // Short AMs have no payload and therefore have no 'lc_opt' argument. // // The Medium and Long Requests accept the pre-defined constant values // GEX_EVENT_NOW and GEX_EVENT_GROUP, and pointers to variables of type // 'gex_Event_t' (note that GEX_EVENT_DEFER is prohibited). // The NOW constant requires that the Request call not // return until after local completion. The GROUP constant allows the // Request call to return without delaying for local completion and adds // the AM operation to the set of operations for which // gex_NBI_{Test,Wait}() call may check local completion when passed // GEX_EC_AM. Use of a pointer to a variable of type 'gex_Event_t' // allows the call to return without delay, and requires the client to later // check local completion of this root event using gex_Event_{Test,Wait}*(). // // The 'lc_opt' argument to Medium and Long Reply calls behave as for the // Requests with the exception that GEX_EVENT_GROUP is *not* permitted. // It is also important to note that it is not legal to "test", or // "wait" on a 'gex_Event_t' in AM handler context. // [TBD: we *could* allow handlers to make bounded calls to "test", which // does not Poll, if we wanted to.] // // NOTE 3: The 'flags' argument for segment disposition [UNIMPLEMENTED] // // The 'flags' argument to Medium and Long Request/Reply calls may include // GEX_FLAG_SELF_SEG_* flags to assert segment disposition properties of the // address range described by [source_addr..(source_addr+nbytes-1)]. Any such // assertions must remain true until local completion is signalled (see above). // // The 'flags' argument to Long Request/Reply calls may include // GEX_FLAG_PEER_SEG_* flags to assert segment disposition properties of the // address range described by [dest_addr..(dest_addr+nbytes-1)]. Any such // assertions must remain true until the AM handler begins execution at the target. // // NOTE 4: Overlap // // Within a single gex_AM_*Long* operation, if the specified source and destination // memory regions overlap, behavior is undefined. High-quality implementations // may choose to diagnose such errors. // // Other arguments behave as in the analogous GASNet-1 functions. // Misc semantic strengthening: // * dest_addr for Long is guaranteed to be passed to the handler as provided // by the initiator, even for the degenerate case when nbytes==0 // Long int gex_AM_RequestLong[M]( gex_TM_t tm, // Names a local context ("return address") gex_Rank_t rank, // Together with 'tm', names a remote context gex_AM_Index_t handler, // Index into handler table of remote context const void *source_addr, // Payload address (or OFFSET) size_t nbytes, // Payload length void *dest_addr, // Payload destination address (or OFFSET) gex_Event_t *lc_opt, // Local completion control (see above) gex_Flags_t flags // Flags to control this operation [,arg0, ... ,argM-1]) // Handler argument list, each of type gex_AM_Arg_t int gex_AM_ReplyLong[M]( gex_Token_t token, // Names local and remote contexts gex_AM_Index_t handler, const void *source_addr, size_t nbytes, void *dest_addr, gex_Event_t *lc_opt, gex_Flags_t flags [,arg0, ... ,argM-1]); // Medium int gex_AM_RequestMedium[M]( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler, const void *source_addr, size_t nbytes, gex_Event_t *lc_opt, gex_Flags_t flags [,arg0, ... ,argM-1]); int gex_AM_ReplyMedium[M]( gex_Token_t token, gex_AM_Index_t handler, const void *source_addr, size_t nbytes, gex_Event_t *lc_opt, gex_Flags_t flags [,arg0, ... ,argM-1]); // Short int gex_AM_RequestShort[M]( gex_TM_t tm, gex_Rank_t rank, gex_AM_Index_t handler, gex_Flags_t flags [,arg0, ... ,argM-1]); int gex_AM_ReplyShort[M]( gex_Token_t token, gex_AM_Index_t handler, gex_Flags_t flags [,arg0, ... ,argM-1]); // // Negotiated-payload AM APIs (aka "NPAM") // // The fixed-payload APIs for Active Message Mediums and Longs (brought // forward from GASNet-1) allow sending any payload up to defined maximum // lengths. However, this comes with the potential costs of extra in-memory // copies of the payload and/or conservative maximum lengths. Use of the // negotiated-payload APIs can overcome these limitations to yield performance // improvements in two important cases. First, when the client can begin the // negotiation before the payload is assembled (for instance concatenation of // a client-provided header and application-provided data) payload negotiation // can ensure that the GASNet conduit will not need to make an additional // in-memory copy to prepend its own header, or to send from pre-registered // memory. Second, when the client has a need for fragmentation and // reassembly (due to a payload exceeding the maximums) use of negotiated // payload may permit a smaller number of fragments by taking advantage of // transient conditions (for instance in GASNet's buffer management) that // allow sending AMs with a larger payload than can be guaranteed in general. // // The basis of negotiated-payload AMs is a split-phase interface: "Prepare" // and "Commit". The first phase is a Prepare function to which the client // passes an optional source buffer address, the minimum and maximum lengths // it is willing to send, and many (but not all) of the other parameters // normally passed when injecting an Active Message. In this phase, GASNet // determines how much of the payload can be sent. // // The return from the Prepare call provides the client with an address and a // length. The length is in the range defined by the minimum and maximum // lengths. When the client_buf argument is non-NULL, the address provided by // the return will be exactly that value. Otherwise the address will be a // GASNet-allocated buffer of the indicated length, suitably aligned to hold any // data type. // // It is important to note that passing NULL for the client_buf argument to a // Prepare call requires GASNet to allocate buffer space of size no smaller // than least_payload. Use of gex_AM_Max{Request,Reply}{Medium,Long}() with // the GEX_FLAG_AM_PREPARE_LEAST_ALLOC flag gives the limits on the space GASNet // is required to allocate. Larger values of least_payload are erroneous. // // Between the Prepare and the Commit calls the client is responsible for // assembling its payload (or the prefix of the given length) at the selected // address (potentially a no-op). The client may send a length shorter than // the value returned from the Prepare, for instance rounding down to some // natural boundary. The client may also defer until the Prepare-Commit // interval its selection of the AM handler and arguments, which might depend // on the address and length returned by the Prepare call (though the number // of args must be fixed at Prepare). In the case of a Long, the client may // also defer selecting the destination address. These various parameters are // passed to the Commit function which performs the actual AM injection. // // It is important to note that in the interval between a Prepare and Commit, // the client is bound by the same restrictions as in an Active Message Reply // handler (ie all communication calls are prohibited). Prepare/commit pairs // do not nest. Additionally, the Prepare returns a thread-specific object // that must be consumed (exactly once) by a Commit in the same thread. Calls // to Prepare are permitted in the same places as the corresponding // fixed-payload AM injection call. // // Currently the semantics of the least_payload==0 case are unspecified. // We advise avoiding that case until a later release has resolved this. // Opaque type for AM Source Descriptor // Used in negotiated-payload AM calls: // Produced by (returned from) gex_AM_Prepare*() // Consumed by (passed to) gex_AM_Commit*() typedef ... gex_AM_SrcDesc_t; // Predefined value of type gex_AM_SrcDesc_t // Guaranteed to be zero. // May be returned by gex_AM_Prepare*() when the GEX_FLAG_IMMEDIATE flag // was passed, but required resources are not available. // Must not be passed to gex_AM_Commit*() calls or the // gex_AM_SrcDesc*() queries. #define GEX_AM_SRCDESC_NO_OP ((gex_AM_SrcDesc_t)0) // Query the address component of a gex_AM_SrcDesc_t // // Will be identical to the 'client_buf' passed to the Prepare call if that // value was non-NULL, and otherwise will be GASNet-allocated memory suitably // aligned to hold any data type. void *gex_AM_SrcDescAddr(gex_AM_SrcDesc_t sd); // Query the length component of a gex_AM_SrcDesc_t // // Indicates the maximum length of the buffer located at gex_AM_SrcDescAddr() // that can be sent in the Commit call. // Will be between the 'least_payload' and 'most_payload' passed // to the Prepare call (inclusive). size_t gex_AM_SrcDescSize(gex_AM_SrcDesc_t sd); // Native implementation indicators for negotiated-payload active messages // GASNET_NATIVE_NP_ALLOC_{REQ,REP}_{MEDIUM,LONG} symbols are defined to 1 or // undefined to indicate whether (or not, respectively) the implementation // of negotiated-payload AM Request/Reply Medium/Long (with a GASNet-allocated // source buffer, i.e., initiated with client_buf == NULL) for the network // transport of the current conduit are "native". This is a performance hint // to clients, and does not affect correctness or normative behavior. // The native designation implies that AM injection using these calls can avoid // one or more payload copies relative to the corresponding fixed-payload AM // call under the right conditions (which may be implementation dependent). // Note that in configurations providing GASNet shared-memory bypass for AM // to intra-nbrhd peers (activated by --enable-pshm, enabled by default), // these only denote the behavior of the network transport (AM to peers outside // the caller's nbrhd). The shared-memory transport for all conduits always // provides native behavior for Medium requests and replies. #define GASNET_NATIVE_NP_ALLOC_REQ_MEDIUM 1 or undefined #define GASNET_NATIVE_NP_ALLOC_REP_MEDIUM 1 or undefined #define GASNET_NATIVE_NP_ALLOC_REQ_LONG 1 or undefined #define GASNET_NATIVE_NP_ALLOC_REP_LONG 1 or undefined // // gex_AM_Prepare calls // // RETURNS: gex_AM_SrcDesc_t // + An opaque scalar type (with accessors) described above // + This is thread-specific value // + This object is "consumed" by (cannot be used after) the // Commit call // ARGUMENTS: // gex_TM_t tm, gex_Rank_t rank [REQUEST ONLY] // + These arguments name the destination of an AMRequest // gex_Token_t token [REPLY ONLY] // + This argument identifies (implicitly) the destination of // an AMReply // const void *client_buf // + If non-NULL the client is offering this buffer as a // source_addr // + If NULL, the client is requesting a GASNet-allocated source // buffer to populate // size_t least_payload // + This is the minimum length that the Prepare call may // return on success - ie the least-sized payload the // client is willing to send at this time. // + The value must not exceed the value of the // gex_AM_Max[...]() call with the analogous Prepare arguments // size_t most_payload // + This is the maximum length that the Prepare call may // return on success - ie a (not necessarily tight) upper // bound on the payload size the client is willing to send at // this time. // + The value must not be less than least_payload (but they may // be equal). // + The value *may* exceed the corresponding gex_AM_Max[...](). // void *dest_addr [LONG ONLY] // + If this value is non-NULL then GASNet may use this value // (and flags in the GEX_FLAG_PEER_SEG_* family) to guide its // choice of outputs (addr and size) // + If this value is non-NULL then the client is required to // pass the same value to the Commit call. // + May be NULL to request conservative behavior // + In all cases the actual dest_addr is supplied at Commit. // gex_Event_t *lc_opt // + If client_buf is NULL, this argument must also be NULL. // + If client_buf is non-NULL, this argument operates in the same // manner as the 'lc_opt' argument to the fixed-payload AM calls. // Between Prepare and Commit, the contents of the gex_Event_t // referenced by lc_opt, if any, is indeterminate. Only after // return from the Commit call may such a value be used by the // caller. // gex_Flags_t flags // + Bitwise OR of flags valid for the corresponding // fixed-payload AM injection // + GEX_FLAG_IMMEDIATE: the Prepare call may return // GEX_AM_SRCDESC_NO_OP==0 if injection resources (in // particular a buffer of size least_payload or longer) cannot // be obtained. // The Commit-time behavior is unaffected by this flag. // + [UNIMPLEMENTED] GEX_FLAG_SELF_SEG_OFFSET: is prohibited // + [UNIMPLEMENTED] GEX_FLAG_SELF_SEG_*: these flags may only be // passed if client_buf is non-NULL, and assert segment disposition // properties for the range [client_buf..(client_buf+most_payload-1)] // that must be true upon entry to Prepare. If gex_AM_SrcDescAddr() // on the Prepare result is equal to client_buf, then the assertion // must remain true until after local completion is signalled via `lc_opt`. // + [UNIMPLEMENTED] GEX_FLAG_PEER_SEG_*: [LONG ONLY] if `dest_addr` is // non-NULL, these flags assert segment disposition properties for the // range [dest_addr..(dest_addr+most_payload-1)] that must be true upon // entry to Prepare and remain true until entry to the AM handler at // the target. If `dest_addr` is NULL at Prepare and non-NULL at Commit, // these flags assert segment disposition properties for the Commit-time // range [dest_addr..(dest_addr+nbytes-1)] that must be true upon // entry to Commit and remain true until entry to the AM handler at // the target. // unsigned int numargs // + The number of arguments to be passed to the Commit call // extern gex_AM_SrcDesc_t gex_AM_PrepareRequestMedium( gex_TM_t tm, gex_Rank_t rank, const void *client_buf, size_t least_payload, size_t most_payload, gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); extern gex_AM_SrcDesc_t gex_AM_PrepareReplyMedium( gex_Token_t token, const void *client_buf, size_t least_payload, size_t most_payload, gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); extern gex_AM_SrcDesc_t gex_AM_PrepareRequestLong( gex_TM_t tm, gex_Rank_t rank, const void *client_buf, size_t least_payload, size_t most_payload, void *dest_addr, gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); extern gex_AM_SrcDesc_t gex_AM_PrepareReplyLong( gex_Token_t token, const void *client_buf, size_t least_payload, size_t most_payload, void *dest_addr, gex_Event_t *lc_opt, gex_Flags_t flags, unsigned int numargs); // // gex_AM_Commit calls // // NOTE: Prototypes in this section are "patterns" // These API instantiate the "[M]" at the end of each prototype with // the integers 0 through gex_AM_MaxArgs(), inclusive. // The '[,arg0, ... ,argM-1]' then represent the arguments // (each of type gex_AM_Arg_t). // Additionally, on compilers supporting the __VA_ARG__ preprocessor feature // (added in C99 and C++11) the "[M]" may optionally be omitted entirely and // is inferred based on the argument count. // // RETURNS: void // ARGUMENTS: // gex_AM_SrcDesc sd // + The value returned by the immediately preceding Prepare // call on this thread. // gex_AM_Index_t handler // + The index of the AM handler to run at the destination // size_t nbytes // + The client's payload length // + Must be in the range: [0 .. gex_AM_SrcDescSize(sd)] // + The base address of the source payload buffer is implicitly // specified by gex_AM_SrcDescAddr(sd) // void *dest_addr [LONG ONLY] // + The destination address for transfer of Long payloads // + If non-NULL dest_addr was passed to Prepare, this must // be the same value // extern void gex_AM_CommitRequestMedium[M]( gex_AM_SrcDesc_t sd, gex_AM_Index_t handler, size_t nbytes [,arg0, ... ,argM-1]); extern void gex_AM_CommitReplyMedium[M]( gex_AM_SrcDesc_t sd, gex_AM_Index_t handler, size_t nbytes [,arg0, ... ,argM-1]); extern void gex_AM_CommitRequestLong[M]( gex_AM_SrcDesc_t sd, gex_AM_Index_t handler, size_t nbytes, void *dest_addr [,arg0, ... ,argM-1]); extern void gex_AM_CommitReplyLong[M]( gex_AM_SrcDesc_t sd, gex_AM_Index_t handler, size_t nbytes, void *dest_addr [,arg0, ... ,argM-1]); // // Extended API // // NOTE 1: Return value // // An Extended API initiation call is a "no op" IF AND ONLY IF the value // GEX_FLAG_IMMEDIATE is included in the 'flags' argument AND the // conduit could determine that it would need to block temporarily to // obtain the necessary resources. The blocking and NBI calls return a // non-zero value *only* in the "no op" case, while the NB calls return // GEX_EVENT_NO_OP. // // In the "no op" case no communication has been performed and the // contents of the location named by the 'lc_opt' argument (if any) is // undefined. // // NOTE 2a: The 'lc_opt' argument for local completion (NBI case) // // Implicit-event non-blocking Puts have an 'lc_opt' argument which // controls the behavior with respect to local completion. The value can // be the pre-defined constants GEX_EVENT_NOW, GEX_EVENT_DEFER, or // GEX_EVENT_GROUP. The NOW constant requires that the call not // return until the operation is locally complete. The DEFER constant // permits the call to return without delaying for local completion, // which may occur as late as in the call which syncs (retires) the // operation (could be an explicit-event call if using an NBI access // region). The GROUP constant allows the call to return without // delaying for local completion and adds the operation to the set for // which gex_NBI_{Test,Wait}() call may check local completion when // passed GEX_EC_LC. // // NOTE 2b: The 'lc_opt' argument for local completion (NB case) // // Explicit-event non-blocking Puts have an 'lc_opt' argument which // controls the behavior with respect to local completion. The value can // be the pre-defined constants GEX_EVENT_NOW or GEX_EVENT_DEFER, or // a pointer to a variable of type 'gex_Event_t'. The NOW // constant requires that the call not return until the operation is // locally complete. The DEFER constant permits the call to return // without delaying for local completion, which may occur as late as in // the call which syncs (retires) the returned event. Use of a pointer // to a variable of type 'gex_Event_t' allows the call to return without // delay, and allows the client to check local completion using // gex_Event_{Test,Wait}*(). // // NOTE 3: Local addressing // // Let "the local endpoint" refer to the endpoint associated with 'tm'. // // Let "device segment" denote a segment created using gex_Segment_Create() // with a 'kind' argument other than GEX_MK_HOST. // // Let "in the local bound segment" mean that a given range of addresses // lies entirely within the range of the segment as might be determined by // applying gex_Segment_QueryAddr() and gex_Segment_QuerySize() to the // segment bound to the local endpoint. // // The local address (src of a Put, dest of a Get) is interpreted and // constrained as follows: // + [UNIMPLEMENTED] In the presence of GEX_FLAG_SELF_SEG_OFFSET in 'flags' // the address argument is interpreted as an unsigned offset in bytes from // the start address of the local endpoint's (required) bound segment. // The memory so named must be in the local bound segment. // + In the absence of GEX_FLAG_SELF_SEG_OFFSET in 'flags': // - If the local endpoint has a bound device segment, then the address is // a device address and the memory so named must be in the local bound // segment. // - Otherwise the address is a host address, and the named memory is not // constrained to lie within the local bound segment (if any). // // NOTE 4: Remote addressing // // Let "the remote endpoint" refer to the endpoint named by '(tm,rank)'. // // Let "device segment" denote a segment created using gex_Segment_Create() // with a 'kind' argument other than GEX_MK_HOST. // // Let "in the remote bound segment" mean that a given range of addresses // lies entirely within the range of the segment as might be determined from // the owneraddr and size properties obtained using gex_Segment_QueryBound() // or gex_EP_QueryBoundSegmentNB() applied to the '(tm,rank)' tuple. // // The remote address (dest of a Put, src of a Get) is interpreted as // follows: // + [UNIMPLEMENTED] In the presence of GEX_FLAG_PEER_SEG_OFFSET in 'flags' // the address argument is interpreted as an unsigned offset in bytes from // the start address of the remote endpoint's bound segment. // + In the absence of GEX_FLAG_PEER_SEG_OFFSET in 'flags': // - If the bound segment of the remote endpoint is a device segment, then // the address is a device address. // - Otherwise the address is a host address. // In all cases, the remote memory must be in the remote bound segment. // // NOTE 5: Overlap // // Within a single gex_RMA_* operation, if the specified source and destination // memory regions overlap, behavior is undefined. High-quality implementations // may choose to diagnose such errors. // Put int gex_RMA_PutBlocking( gex_TM_t tm, // Names a local context ("return address") gex_Rank_t rank, // Together with 'tm', names a remote context void *dest, // Remote (destination) address (or OFFSET) const void *src, // Local (source) address (or OFFSET) size_t nbytes, // Length of xfer gex_Flags_t flags); // Flags to control this operation int gex_RMA_PutNBI( gex_TM_t tm, gex_Rank_t rank, void *dest, const void *src, size_t nbytes, gex_Event_t *lc_opt, // Local completion control (see above) gex_Flags_t flags); gex_Event_t gex_RMA_PutNB( gex_TM_t tm, gex_Rank_t rank, void *dest, const void *src, size_t nbytes, gex_Event_t *lc_opt, gex_Flags_t flags); // Get int gex_RMA_GetBlocking( // Returns non-zero *only* in "no op" case (IMMEDIATE flag) gex_TM_t tm, // Names a local context ("return address") void *dest, // Local (destination) address (or OFFSET) gex_Rank_t rank, // Together with 'tm', names a remote context void *src, // Remote (source) address (or OFFSET) size_t nbytes, // Length of xfer gex_Flags_t flags); // Flags to control this operation int gex_RMA_GetNBI( // Returns non-zero *only* in "no op" case (IMMEDIATE flag) gex_TM_t tm, void *dest, gex_Rank_t rank, void *src, size_t nbytes, gex_Flags_t flags); gex_Event_t gex_RMA_GetNB( gex_TM_t tm, void *dest, gex_Rank_t rank, void *src, size_t nbytes, gex_Flags_t flags); // Value-based payloads gex_RMA_Value_t gex_RMA_GetBlockingVal( gex_TM_t tm, gex_Rank_t rank, void *src, size_t nbytes, gex_Flags_t flags); int gex_RMA_PutBlockingVal( gex_TM_t tm, gex_Rank_t rank, void *dest, gex_RMA_Value_t value, size_t nbytes, gex_Flags_t flags); int gex_RMA_PutNBIVal( gex_TM_t tm, gex_Rank_t rank, void *dest, gex_RMA_Value_t value, size_t nbytes, gex_Flags_t flags); gex_Event_t gex_RMA_PutNBVal( gex_TM_t tm, gex_Rank_t rank, void *dest, gex_RMA_Value_t value, size_t nbytes, gex_Flags_t flags); // NBI Access regions: // These are interoperable with, and have the same semantics as, // gasnet_{begin,end}_nbi_accessregion() // flags are reserved for future use and must currently be zero. void gex_NBI_BeginAccessRegion(gex_Flags_t flags); gex_Event_t gex_NBI_EndAccessRegion(gex_Flags_t flags); // Event test/wait operations // The operation is indicated by the suffix // + _Test: no Poll call is made, returns zero on success, and non-zero otherwise. // + _Wait: Polls until success, void return // // In general it is not permitted to test or wait on a leaf event after // synchronization of its corresponding root event because synchronization of a // root event *implicitly* synchronizes any/all leaves. // However, when using NB event array APIs gex_Event_{Test,Wait}{Some,All}() // one may mix leaf events with their corresponding root events in the same // array without concern for their relative order. In other words, placement // of a leaf event later in the array than its corresponding root event is // not "test or wait on a leaf event after synchronization of its corresponding // root event". // Completion of a single NB event // Success is defined as when the passed event is complete. int gex_Event_Test (gex_Event_t event); void gex_Event_Wait (gex_Event_t event); // Completion of an NB event array - "some" // Success is defined as one or more events have been completed, OR // the input array contains only GEX_EVENT_INVALID (which are otherwise ignored). // Completed events, if any, are overwritten with GEX_EVENT_INVALID. // These are the same semantics as gasnet_{try,wait}_syncnb_some(), // except that "Test" does not AMPoll as "try" does. // flags are reserved for future use and must currently be zero. int gex_Event_TestSome (gex_Event_t *pevent, size_t numevents, gex_Flags_t flags); void gex_Event_WaitSome (gex_Event_t *pevent, size_t numevents, gex_Flags_t flags); // Completion of an NB event array - "all" // Success is defined as all passed events have been completed, OR // the input array contains only GEX_EVENT_INVALID (which are otherwise ignored). // Completed events, if any, are overwritten with GEX_EVENT_INVALID. // These are the same semantics as gasnet_{try,wait}_syncnb_all(), // except that "Test" does not AMPoll as "try" does. // flags are reserved for future use and must currently be zero. int gex_Event_TestAll (gex_Event_t *pevent, size_t numevents, gex_Flags_t flags); void gex_Event_WaitAll (gex_Event_t *pevent, size_t numevents, gex_Flags_t flags); // Identifiers to name Event Categories (such as local completion from NBI Puts) // TODO: will eventually include categories for collectives, VIS metadata, ... typedef [some integer type] gex_EC_t; #define GEX_EC_ALL ((gex_EC_t)???) #define GEX_EC_GET ((gex_EC_t)???) #define GEX_EC_PUT ((gex_EC_t)???) #define GEX_EC_AM ((gex_EC_t)???) #define GEX_EC_LC ((gex_EC_t)???) #define GEX_EC_RMW ((gex_EC_t)???) // Sync of specified subset of NBI operations // The 'event_mask' argument is bitwise-OR of GEX_EC_* constants // flags are reserved for future use and must currently be zero. int gex_NBI_Test(gex_EC_t event_mask, gex_Flags_t flags); void gex_NBI_Wait(gex_EC_t event_mask, gex_Flags_t flags); // Extract a leaf event from the root event // NOTE: name is subject to change // // The 'root' argument must be a valid root event, such as returned by an // NB initiation function (gex_*NB()) or gex_NBI_EndAccessRegion. // It is permitted to be GEX_EVENT_INVALID (but not GEX_EVENT_NO_OP). // The 'event_category' argument is an GEX_EC_ constant. // It cannot be a bitwise-OR of multiple such values, nor GEX_EC_ALL. // // There are additional validity constraints to be documented, such as one // cannot ask for an event that was "suppressed" by passing EVENT_NOW or EVENT_DEFER. // Violating those constraints give undefined results (though we want a debug // build to report the violation). // // For root==GEX_EVENT_INVALID, or equivalently for // an event that has "already happened" the implementation may return // either GEX_EVENT_INVALID or a valid event that tests as done. The // implementation is not constrained to pick consistently between these two // options (and in the extreme could choose between them at random). // // This is a *query* and does not instantiate a new object, and so multiple // calls with the same argument (that don't return INVALID_HANDLE) must return // the *same* event. gex_Event_t gex_Event_QueryLeaf( gex_Event_t root, gex_EC_t event_category); // // Neighborhood and Host: [EXPERIMENTAL] // // A "neighborhood" is defined as a set of GEX processes that can share // memory via the GASNet PSHM feature, and is abbreviated to Nbrhd. // // A "host" is an abstract boundary in the system hierarchy that is guaranteed // to be a superset of the neighborhood, but the exact definition may be // system-specific. Generally it encompasses processing resources associated // with a single physical address space and OS kernel image. When using // GASNet-Tools from the same release, it is guaranteed that the definition // for "host" is consistent with the following: // gasnett_cpu_count(), gasnett_getPhysMemSz() // However, there is no guarantee of correspondence to gasnett_gethostname(). // // As with all functions in the gex_System_*() namespace, the following queries // return information about the global GASNet job, independent of any // particular client, team or endpoint. // Const-qualified struct type for describing a member of a neighborhood typedef const struct { gex_Rank_t gex_jobrank; // the Job Rank (as defined above) // Reserved for future expansion and/or internal-use fields } gex_RankInfo_t; // Query information about the neighborhood of the calling process. // // All arguments are pointers to locations for outputs, each of which // may be NULL if the caller does not need a particular value. // // info_p: // Receives the address of an array with elements of type // gex_RankInfo_t (defined above), which includes one entry // for each process in the neighborhood of the calling process. // Entries are sorted by increasing gex_jobrank. // The storage of this array is owned by GASNet and must not be // written to or free()ed. // High-quality implementations will store this array in shared memory // to reduce memory footprint. Therefore, clients should consider using // it in-place to avoid creating a less-scalable copy per process. // info_count_p: // Receives the number of processes in the neighborhood of the calling // process. This includes the caller, and is therefore always non-zero. // my_info_index_p: // Receives the 0-based index of the calling process relative to its // neighborhood. In particular, the following formula holds: // (*info_p)[*my_info_index_p].gex_jobrank == gex_System_QueryJobRank() // // Semantics in a resilient build will be defined in a later release. extern void gex_System_QueryNbrhdInfo( gex_RankInfo_t **info_p, gex_Rank_t *info_count_p, gex_Rank_t *my_info_index_p); // Query information about the Host of the calling process. // // Operates analogously to gex_System_QueryNbrhdInfo, except that instead of // querying information about the neighborhood, this function instead queries // information about the "host" enclosing the calling process and its // neighborhood. // // Argument semantics are identical to gex_System_QueryNbrhdInfo with // "neighborhood" replaced with "host". extern void gex_System_QueryHostInfo( gex_RankInfo_t **info_p, gex_Rank_t *info_count_p, gex_Rank_t *my_info_index_p); // Query information about the sets of Neighborhoods and Hosts // // All arguments are pointers to locations for outputs, each of which // may be NULL if the caller does not need a particular value. // // nbrhd_set_size_p: // Receives the number of neighborhoods in the job. // nbrhd_set_rank_p: // Receives the 0-based rank of the caller's neighborhood within // the set of neighborhoods in the job (a value between 0 and // nbrhd_set_size-1, inclusive). // host_set_size_p: // Receives the number of hosts in the job. // host_set_rank_p: // Receives the 0-based rank of the caller's host within the // set of host in the job (a value between 0 and host_set_size-1, // inclusive). // // In a non-resilient build, the values returned by this query are constant for // any given caller over the lifetime of the job. Semantics in a resilient // build will be defined in a later release. // // Information returned by this query is guaranteed to be self consistent: // (where the value received into the variable referenced by "PROPERTY_p" // is referred to below as "PROPERTY") // + All callers receive identical nbrhd_set_size. // + Callers in the same neighborhood receive identical nbrhd_set_rank. // + Callers in distinct neighborhoods receive distinct nbrhd_set_rank. // + All callers receive identical host_set_size. // + Callers on the same host receive identical host_set_rank. // + Callers on distinct hosts receive distinct host_set_rank. // Other than these rules, and the [0,set_size) ranges, there are no other // guarantees as to how the ranks are assigned. extern void gex_System_QueryMyPosition( gex_Rank_t *nbrhd_set_size_p, gex_Rank_t *nbrhd_set_rank_p, gex_Rank_t *host_set_size_p, gex_Rank_t *host_set_rank_p); // // Handler-safe locks (HSLs) // Lock semantics are identical to those in GASNet-1 // // Type for an HSL // This type interoperable with gasnet_hsl_t typedef {...} gex_HSL_t; // Static-initializer for an HSL // Synonymous with GASNET_HSL_INITIALIZER #define GEX_HSL_INITIALIZER {...} // The following operations on HSLs are semantically identical // to the corresponding gasnet_hsl_* functions: void gex_HSL_Init (gex_HSL_t *hsl); void gex_HSL_Destroy(gex_HSL_t *hsl); void gex_HSL_Lock (gex_HSL_t *hsl); void gex_HSL_Unlock (gex_HSL_t *hsl); int gex_HSL_Trylock(gex_HSL_t *hsl); // // Data types for atomics and reductions // // GASNet-EX defines (as preprocess-time constants) at least the following // data types codes for use with remote atomic and reduction operations. // These are known as the "built-in data types". // // GEX Constant C Data Type // ------------ ----------- // Integer types: // GEX_DT_I32 int32_t // GEX_DT_U32 uint32_t // GEX_DT_I64 int64_t // GEX_DT_U64 uint64_t // Floating-point types: // GEX_DT_FLT float // GEX_DT_DBL double // // In addition to the built-in data types, the following is used to denote an // opaque user-defined data type in the context of a reduction operation. // GEX_DT_USER // // It is guaranteed that all GEX_DT_* values are represented by disjoint non-zero bits. // // Currently, Remote Atomics support all built-in data types listed above. // Currently, Reductions support all data types (built-in and user-defined) // listed above. // // Note that GASNet-EX supports signed and unsigned exact-width integer types. // Any mapping to types such as 'int', 'long' and 'long long' is the // responsibility of the client. typedef [some integer type] gex_DT_t; #define GEX_DT_??? ((gex_DT_t)???) // For each GEX_DT_* above // // Operation codes (opcodes) for atomics and reductions // // GASNet-EX defines (as preprocess-time constants) at least the following // operation codes for use with atomic and reduction operations. Not all // operations are valid in all contexts, as indicated below. // See documentation for the atomic and reduction operations for more details. // // The following apply to the operation definitions which follow: // For atomics: // 'op0' denotes the value at the target location prior to the operation // 'op1' and 'op2' denote the value of the corresponding function arguments // 'expr' denotes the value of the target location after the operation // Fetching operations always return 'op0' // For reductions: // 'op0' represents the "left" (first) reduction operand // 'op1' represents the "right" (second) reduction operand // 'expr' denotes the value of the result of the pairwise reduction // // Except where otherwise noted, the expressions below are evaluated according // to C language rules. // // The following are known as the "built-in operations": // + Non-fetching Operations // - Binary Arithmetic Operations // Valid for Atomics and Reductions // Valid for all built-in data types // GEX_OP_ADD expr = (op0 + op1) // GEX_OP_MULT expr = (op0 * op1) // GEX_OP_MIN expr = ((op0 < op1) ? op0 : op1) // GEX_OP_MAX expr = ((op0 > op1) ? op0 : op1) // - Non-commutative Binary Arithmetic Operations // Valid only for Atomics // Valid for all built-in data types // GEX_OP_SUB expr = (op0 - op1) // - Unary Arithmetic Operations // Valid only for Atomics // Valid for all built-in data types // GEX_OP_INC expr = (op0 + 1) // GEX_OP_DEC expr = (op0 - 1) // - Bit-wise Operations // Valid for Atomics and Reductions // Valid only for Integer built-in types // GEX_OP_AND expr = (op0 & op1) // GEX_OP_OR expr = (op0 | op1) // GEX_OP_XOR expr = (op0 ^ op1) // + Fetching Operations // Valid only for Atomics // Each GEX_OP_Fxxx performs the same operation as GEX_OP_xxx, above, // and is valid for the same types. // Additionally these operations fetch 'op0' as the result of the atomic. // - Binary Arithmetic Operations // GEX_OP_FADD // GEX_OP_FMULT // GEX_OP_FMIN // GEX_OP_FMAX // - Non-commutative Binary Arithmetic Operations // GEX_OP_FSUB // - Unary Arithmetic Operations // GEX_OP_FINC // GEX_OP_FDEC // - Bit-wise Operations // GEX_OP_FAND // GEX_OP_FOR // GEX_OP_FXOR // + Accessor Operations // Valid only for Atomics // Valid for all built-in data types // - Non-fetching Accessor // GEX_OP_SET expr = op1 (writes 'op1' to the target location) // GEX_OP_CAS expr = ((op0 == op1) ? op2 : op0) // With a guarantee to be free of spurious failures as from // cache events. // - Fetching Accessors (fetch 'op0' as the result of the atomic) // GEX_OP_GET expr = op0 (does not modify the target location) // GEX_OP_SWAP expr = op1 (swaps 'op1' with the target location) // GEX_OP_FCAS Fetching variant of GEX_OP_CAS // // NOTE: GEX_OP_CSWAP is a deprecated alias for GEX_OP_FCAS // // In addition to the built-in operations, the following constants are defined: // + User-defined Operations // Valid only for Reductions // Valid for all built-in data types and GEX_DT_USER // The client code, not this specification, determines the operation. // - Commutative User-Defined Reduction Operation // GEX_OP_USER // - Non-commutative User-Defined Reduction Operation // GEX_OP_USER_NC // // It is guaranteed that all GEX_OP_* values are represented by disjoint non-zero bits. typedef [some integer type] gex_OP_t; #define GEX_OP_??? ((gex_OP_t)???) // For each GEX_OP_* above // Opcode conversion // // The macro GEX_OP_TO_FETCHING(op) takes a non-fetching opcode as an argument // and returns the corresponding fetching opcode. The value of 'op' must be // GEX_OP_SET, GEX_OP_CAS or an opcode listed under "Non-fetching Operations", // above. All other values return undefined results. // // The macro GEX_OP_TO_NONFETCHING(op) takes a fetching opcode as an argument // and returns the corresponding non-fetching opcode. The value of 'op' must be // GEX_OP_SWAP, GEX_OP_FCAS or an opcode listed under "Fetching Operations", // above. All other values return undefined results. // // In addition to the natural result when applied to the arithmetic opcodes and // (F)CAS, SWAP/SET are considered to be a fetching/non-fetching pair: // GEX_OP_TO_FETCHING(GEX_OP_SET) == GEX_OP_SWAP // GEX_OP_TO_NONFETCHING(GEX_OP_SWAP) == GEX_OP_SET #define GEX_OP_TO_FETCHING(op) ??? #define GEX_OP_TO_NONFETCHING(op) ??? //---------------------------------------------------------------------- // // Remote Atomic Operations // APIs in this section are provided by gasnet_ratomic.h // // // Atomic Domains // // An "Atomic Domain" is an opaque scalar type. // // Just as all point-to-point RMA calls take a gex_TM_t argument, calls to // initiate Remote Atomic operations take a gex_AD_t, where "AD" is short for // "Atomic Domain". // // + Creation of an AD associates it with a specific gex_TM_t. // // This association defines the memory locations which can be accessed using // the AD. Only memory within the address space of a process hosting an // endpoint that is a member of this team may be accessed by atomic // operations which pass a given AD. // // Currently, there is an additional constraint that target locations must // lie within the bound segments of the team's endpoints. // // + Creation of an AD associates with it one data type and a set of operations. // // This permits selection of the best possible implementation which can // provide correct results for the given set of operations on the given data // type. This is important because the best possible implementation of a // operation "X" may not be compatible with operation "Y". So, this best // "X" can only be used when it is known that "Y" will not be used. This // issue arises because a NIC may offload "X" (but not "Y") and use of a // CPU-based implementation of "Y" would not be coherent with the NIC // performing a concurrent "X" operation. // // + Use of an AD is conceptually tied to specific data and time. // // Correct operation of gex_AD_Op*() APIs is only assured if the client code // can ensure that there are no other accesses to the same target locations // concurrent with the operations on a given AD. // // The prohibition against concurrent access applies to all access by CPUs, // GPUs and any other hardware that references memory; and to all GASNet-EX // operations other than the atomic accesses defined in this section. The // write by a fetching remote atomic operation to an output location on the // initiator is NOT an atomic access for the purposes of this prohibition. // // Prohibited accesses by CPUs and GPUs include not only load/store, but // also any atomic operations provided by languages such as C11 and C++11, // by compiler intrinsics, operating system facilities, etc. // // This prohibition also extends to concurrent access via multiple ADs, even // if created with identical arguments. However, this specification does // not prohibit concurrent access to distinct (non overlapping) data using // distinct ADs. // // GASNet-EX does not provide any mechanisms to detect violations of the // prohibitions described above. // // + Atomic Access Phases [INCOMPLETE / OPEN ISSUE] // // It is the intent of this specification to permit access to the same data // using remote atomics and other (non-atomic) mechanisms, and to the same // data using multiple atomics domains. However, such different accesses // must be NON-concurrent. This separation is into what we will call // "atomic access phases": // During a given atomic access phase, any given byte in the memory of any // GASNet process shall NOT be accessed by more than ONE of: // (1) gex_AD_Op*() calls that reference that byte as part of the target // object. // (2) any means except for (1). // Furthermore, during a given atomic access phase, all gex_AD_Op*() calls // accessing a given target byte shall use the same AD object. // // Note that the byte-granularity of this definition has consequences for // the use of union types and of type-punning, either of which may result in // a given byte being considered part of multiple C objects. // // The means for a transition between atomic access phases has not yet been // fully specified. We do NOT expect that the resolution to this open issue // will invalidate any interface defined in this current specification. // However, when implementations of remote atomics are introduced with // properties such as caching, it may become necessary for clients using // remote atomics to take additional steps to transition between atomic // access phases. // // FOR *THIS* RELEASE we believe it is sufficient to separate atomic access // phases by a barrier synchronization. However, it is necessary to ensure // that any GASNet-EX accesses which may conflict have been completed // (synced) prior to the barrier. This includes completing all remote atomic // operations before a transition to non-atomic access or accesses by a // different atomic domain; and completing all other GASNet data-movement // operations (RMA, Collective, etc.) before a transition to atomic access. // // + Memory Ordering/Fencing/Consistency // // By default calls to the gex_AD_Op*() APIs are not guaranteed to be ordered // with respect to other memory accesses. However, one can request Acquire // or Release fencing through the use of 'flags' as described in more detail // with the description of gex_AD_Op*(). The definitions given below for // Acquire and Release are intended to be compatible with the same concepts // in the C11 and C++ language specifications for atomic operations. // Opaque scalar type for Atomic Domain typedef ... gex_AD_t; // Pre-defined constant, guaranteed to be zero #define GEX_AD_INVALID ((gex_AD_t)0) // Create an Atomic Domain // // This is a collective call over the team named by the 'tm' argument that // creates an atomic domain for the operations in the 'ops' argument performed // on data type 'dt'. // // The 'ad_p' is an OUT parameter that receives a reference to the // newly created atomic domain. // // The 'dt' and 'ops' arguments define the type and operations. // + 'dt' is a value of type gex_DT_t // + 'ops' is a bitwise-OR of one or more GEX_OP_* constants of type gex_OP_t. // If 'dt' and 'ops' do not define only valid combinations (as described in the // definitions of gex_OP_t), then the behavior is undefined. // // The 'flags' argument provides additional control over the created domain. // + GEX_FLAG_AD_FAVOR_{MY_RANK,MY_NBRHD,REMOTE} // This family of mutually-exclusive flags are hints to influence the // selection of implementation to favor PERFORMANCE of accesses initiated // for target locations having certain locality properties. Presence or // absence of these flags will never impact correctness. // - GEX_FLAG_AD_FAVOR_MY_RANK: // Favor calls with the initiating and target endpoint being the same. // (e.g use of GEX_FLAG_AD_MY_RANK would be legal at initiation). // - GEX_FLAG_AD_FAVOR_MY_NBRHD: // Favor calls with the initiating and target endpoints belonging to // processes in the same "Neighborhood", as defined previously. (e.g. // use of GEX_FLAG_AD_MY_NBRHD would be legal at initiation). // - GEX_FLAG_AD_FAVOR_REMOTE: // Favor calls with the initiating and target endpoints belonging to // distinct Neighborhoods. // If a call to gex_AD_Create does not include any flag from this group, the // behavior is not required to correspond to any of the behaviors described // above. A high-quality implementation should examine the composition of // 'tm' and when possible favor either RANK (TM with a single member) or // NBRHD (TM with all members in the same Neighborhood). // // The 'dt', 'ops' and 'flags' parameters are each single-valued. // void gex_AD_Create( gex_AD_t *ad_p, // Output gex_TM_t tm, // The team gex_DT_t dt, // The data type gex_OP_t ops, // OR of operations gex_Flags_t flags); // flags // Destroy an Atomic Domain // // This is a collective call over the team named at creation of the 'ad' // argument that destroys an atomic domain. // // All operations initiated on the atomic domain must be complete prior to any // rank making this call (or the behavior is undefined). In practice, this // means completing (syncing) all atomic operation at their initiators, // followed by a barrier prior to calling this function. // // [INCOMPLETE / OPEN ISSUE] // Once this specification includes a complete specification of atomic access // phases, this call will provide and/all aspects of division between such // phases which are stronger than the quiescence pre-condition. We do NOT // expect that the resolution to this open issue will invalidate this API's // specification. // // Though this function is collective, it does not guarantee barrier // synchronization. // void gex_AD_Destroy(gex_AD_t ad); // // Query operations on gex_AD_t // // Query the parameters passed when atomic domain was created gex_Flags_t gex_AD_QueryFlags(gex_AD_t ad); gex_TM_t gex_AD_QueryTM(gex_AD_t ad); gex_DT_t gex_AD_QueryDT(gex_AD_t ad); gex_OP_t gex_AD_QueryOps(gex_AD_t ad); // Client-Data (CData) support for gex_AD_t // These calls provide the means for the client to set and retrieve one void* // field of client-specific data for each AD, which is NULL for a newly // created AD. void gex_AD_SetCData(gex_AD_t ad, const void *val); void* gex_AD_QueryCData(gex_AD_t ad); // // Remote Atomic Operations // // Remote atomic operations are point-to-point communication calls that // perform read-modify-write and accessor operations on typed data (the // "target location") in the address space of a process hosting an endpoint // that is a member of the team passed to gex_AD_Create(). // // These operations are guaranteed to be atomic with respect to all other // accesses to the same target location made using the same AD, from any rank. // When using a thread-safe endpoint this includes atomicity of concurrent // access by multiple threads within a rank. No other atomicity guarantees // are provided. [Currently all endpoints are "thread-safe" when using a // GASNET_PAR build, and no endpoints are thread-safe otherwise.] // // Despite "Remote" in the name, it is explicitly permitted to apply these // operations to the caller's own memory (and a high-quality implementation // will optimize this case when possible). // // Additional semantics are described following the "Argument synopsis". // // Return value: // // Atomic operations are available with explicit-event (NB) or implicit-event // (NBI) completion, with different return types: // // + The gex_AD_OpNB_*() APIs return a gex_Event_t. // // If (and only if) the GEX_FLAG_IMMEDIATE flag is passed to remote atomic // initiation, these calls are *permitted* to return GEX_EVENT_NO_OP to // indicate that no operation was initiated. Otherwise, the return value // is an event to be used in calls to gex_NBI_{Test,Wait}() to check for // completion. These calls may return GEX_EVENT_INVALID if the operation // was completed synchronously. // // + The gex_AD_OpNBI_*() APIs return an integer. // // If (and only if) the GEX_FLAG_IMMEDIATE flag is passed to remote atomic // initiation, these calls are *permitted* to return non-zero to indicate // that no operation was initiated. Otherwise, the return value is zero // and a gex_NBI_{Test,Wait}() call must be used to check completion. For // the opcodes GEX_OP_SET and GEX_OP_GET, one should use GEX_EC_PUT and // GEX_EC_GET, respectively, to check completion. All other opcodes // correspond to an event category of GEX_EC_RMW. // // Data types and prototypes: // The APIs for remote atomic initiation are typed. Therefore, descriptions // and prototypes below use "[DATATYPE]" to denote the tokens corresponding // to the "???" in each supported GEX_DT_???, and "[TYPE]" to denote the // corresponding C type. There is an instance of each function (NB and NBI) // for each supported data type. // See the "Data types for atomics and reductions" section for which data // types are supported for remote atomics, and their corresponding C types. // // "Fetching": // Text below uses "fetching" to denote operations that write to an output // location ('*result_p') at the initiator (and "non-fetching" for all // others). // See the "Operation codes (opcodes) for atomics and reductions" section // for which opcodes are fetching vs non-fetching. // // Endpoints: // Let 'tm' denote the corresponding argument passed to gex_AD_Create(). // The endpoint associated with 'tm' is known as the "initiating endpoint". // The endpoint named by (tm, tgt_rank) is known as the "target endpoint". // // Argument synopsis: // gex_AD_t ad // + The Atomic Domain for this operation. // [TYPE] * result_p // + Address (or offset) of the output location for fetching operations. // Ignored for non-fetching operations. // gex_Rank_t tgt_rank // + Rank of the target location // void * tgt_addr // + Address (or offset) of the target location // gex_OP_t opcode // + Indicates the operation to perform atomically. // Operations are described with the definition of gex_OP_t. // [TYPE] operand1 // + First operand, if any. // Ignored if the given opcode takes no operands. // [TYPE] operand2 // + Second operand, if any. // Ignored if the given opcode takes fewer than two operands. // gex_Flags_t flags // + Per-operation flags // A bitwise OR of zero or more of the GEX_FLAG_* constants. // // Semantics of gex_AD_Op*(): // // + Successful synchronization of a fetching remote atomic operation means // that the local output value (at *result_p) is ready to be examined, and // will contain a value that was held at the target location at some time in // the interval between the call to the initiation function and the // successful completion of the synchronization. This value will be the one // present at the start of the atomic operation and denoted as 'op0' in the // definition of the applicable opcode. // [THIS PARAGRAPH IS NOT INTENDED TO BE A FORMAL MODEL. // HOWEVER, ONE IS FORTHCOMING.] // // + Successful synchronization of any remote atomic operation means the // operation has been performed atomically (including any constituent Read // and Write access to the target location) and any remote atomic issued // subsequently by any thread on any rank with the same AD and target // location will observe the Write, if any (assuming no intervening updates). // [THIS PARAGRAPH IS NOT INTENDED TO BE A FORMAL MODEL. // HOWEVER, ONE IS FORTHCOMING.] // // + Atomicity guarantees apply only to "target locations". They do not apply // to the output of a fetching operation. Therefore, clients must check for // operation completion before the output value of a fetching operation can // safely be read (analogous to the destination of an gex_RMA_Get*()). // Additionally, a given 'result_p' location must not be used as the target // location of remote atomic operations in the same atomic access phase. // (see "Atomic Access Phases"). // // + If two target objects accessed by gex_AD_Op*() overlap (partially or // completely) those accesses are subject to the restrictions documented in // "Atomic Access Phases" above. In particular, such accesses are // permitted during the same atomic access phase *only* if the accessed // bytes exactly coincide and the calls use the same AD object. // // + Currently, the target location must be contained entirely within the // bound segment of the target endpoint (though this may eventually be // relaxed). // // + The data type associated with 'ad' and that of the gex_AD_Op*() call must // be equal. // // + The 'result_p' argument to fetching operations must be a valid pointer to // an object of the given type [TYPE] on the initiator. (See also the // description of the [UNIMPLEMENTED] GEX_FLAG_SELF_SEG_OFFSET flag, below.) // // + The 'result_p' argument to non-fetching operations is ignored. // // + The 'tgt_rank' argument names the target endpoint. By default, this // argument must be a valid rank relative to the team associated with the AD // at its creation. However, in the presence of GEX_FLAG_RANK_IS_JOBRANK, // this argument instead names the target endpoint by a valid rank in the // primordial team, created by gex_Client_Init(). In this latter case the // named endpoint must be a member of the team associated with the AD at its // creation. // // + The 'tgt_addr' argument names the target location, which must be properly // aligned for its data type [TYPE] and (for any operation except // GEX_OP_SET) must contain an object with compatible effective type, // including a qualified version of [TYPE], and (for integer types only) // including signed or unsigned variants. (See also the description of the // [UNIMPLEMENTED] GEX_FLAG_PEER_SEG_OFFSET flag, below.) // // + The 'opcode' argument gives the operation to be performed atomically. // See the "Operation codes (opcodes) for atomics and reductions" section // for definitions of each operation. // // + The 'opcode' must be a single GEX_OP_* value, not a bitwise OR of two or // more GEX_OP_* values. // // + The 'opcode' must be a member of the set of opcodes passed to // gex_AD_Create(). // // + Operations on floating-point data types are not guaranteed to obey all // rules in the IEEE 754 standard even when the C float and double types // otherwise do conform. Deviations from IEEE 754 include (at least): // - Operations on signalling NaNs have undefined behavior. // - (F)CAS *may* be performed as if on integers of the same width. // This could result in non-conforming behavior with quiet NaNs // or negative zero. // - MIN, MAX, FMIN and FMAX *may* be performed as if on "sign // and magnitude representation integers" of the same width. // This could result in non-conforming behavior with quiet NaNs. // (see https://en.wikipedia.org/wiki/IEEE_754-1985, and especially // the section Comparing_floating-point_numbers) // [THIS PARAGRAPH MAY NOT BE A COMPLETE LIST OF NON-IEEE BEHAVIORS] // // + If the given opcode requires one or more operands, the 'operand1' // argument provides the first ('op1' in the gex_OP_t documentation). // Otherwise, 'operand1' is ignored. // // + If the given opcode requires two operands, the 'operand2' argument // provides the second ('op2' in the gex_OP_t documentation). // Otherwise, 'operand2' is ignored. // // + The 'flags' argument must either be zero, or a bitwise OR of one or more // of the following flags. // - GEX_FLAG_IMMEDIATE: the call is permitted (but not required) to // return a distinguishing value without initiating any communication if // the conduit could determine that it would need to block temporarily // to obtain the necessary resources. The NBI calls return a non-zero // value (only) in this "no op" case, while the NB calls will return // GEX_EVENT_NO_OP. // - At most one flag from the following mutually-exclusive group: // - GEX_FLAG_AD_MY_RANK: asserts that the initiating endpoint and target // endpoint are the same endpoint. This may allow the implementation to // perform the operation more efficiently. // The precise definition of the assertion is: // (tgt_rank == gex_TM_QueryRank(gex_AD_QueryTM(ad))). // - GEX_FLAG_AD_MY_NBRHD: asserts that the target EP belongs to // a process within the "Neighborhood" (defined earlier in this // document) of the calling process. This may allow the // implementation to perform the operation more efficiently. // - GEX_FLAG_AD_REL: this atomic operation shall perform a "release". // Within the thread that initiates this operation, memory accesses by // the processor, issued before the initiation call, shall not be // reordered after that call. Additionally, this includes accesses to // memory by any GASNet operations synchronized by that thread before // initiation. However, there is no ordering with respect to other // GASNet operations. // - GEX_FLAG_AD_ACQ: this atomic operation shall perform an "acquire". // Within the thread that synchronizes this operation, memory accesses by // the processor, issued after the synchronization call, shall not be // reordered before that call. Additionally, this includes accesses to // memory by any GASNet operations initiated by that thread after // synchronization. However, there is no ordering with respect to other // GASNet operations. // - GEX_FLAG_RANK_IS_JOBRANK: this flag indicates that the 'tgt_rank' // argument is a jobrank (rank in the primordial team created by // gex_Client_Init()), rather than the rank in the team associated with // the AD at its creation. // - [UNIMPLEMENTED] GEX_FLAG_SELF_SEG_OFFSET: 'result_p' is to be // interpreted as an offset relative to the bound segment of the // initiating endpoint (instead of as a virtual address). // Ignored for non-fetching operations. // - [UNIMPLEMENTED] GEX_FLAG_PEER_SEG_OFFSET: 'tgt_addr' is to be // interpreted as an offset relative to the bound segment of the target // endpoint (instead of as a virtual address). // gex_Event_t gex_AD_OpNB_[DATATYPE]( gex_AD_t ad, // The atomic domain [TYPE] * result_p, // Output location, if any, else ignored gex_Rank_t tgt_rank, // Rank of target endpoint void * tgt_addr, // Address (or OFFSET) of target location gex_OP_t opcode, // The operation (GEX_OP_*) to perform [TYPE] operand1, // First operand, if any, else ignored [TYPE] operand2, // Second operand, if any, else ignored gex_Flags_t flags); // Flags to control this operation int gex_AD_OpNBI_[DATATYPE]( gex_AD_t ad, [TYPE] * result_p, gex_Rank_t tgt_rank, void * tgt_addr, gex_OP_t opcode, [TYPE] operand1, [TYPE] operand2, gex_Flags_t flags); // End of section describing APIs provided by gasnet_ratomic.h //---------------------------------------------------------------------- // // Vector/Indexed/Strided (VIS) // // APIs in this section are provided by gasnet_vis.h // This API is an updated and expanded version of the VIS prototype offered // in GASNet-1, which is documented here: https://gasnet.lbl.gov/pubs/upc_memcpy_gasnet-2.0.pdf // The following semantics apply to all VIS functions, superseding the above document: // For NB variants, return type for all functions in this section is gex_Event_t. // For NBI/Blocking variants, the return type is int which is non-zero *only* in the // "no op" case (IMMEDIATE flag), exactly analogous to the gex_RMA_{Put,Get}*() functions. // // By default, local completion of all client-owned input buffers (ie payload // buffers and metadata arrays) passed to non-blocking initiation functions // can occur as late as operation completion, and thus must remain valid until // that time (as in GASNet-1). // As an exception, the metadata arrays passed to Strided variants ({src,dst}strides[] and count[]) // are guaranteed to be consumed synchronously before return from initiation. // gex_VIS_*Put{NB,NBI} optionally expose local completion of data payload buffers - // this functionality must be requested using the GEX_FLAG_ENABLE_LEAF_LC flag (see below). // // A future revision may expose other intermediate completion events [UNIMPLEMENTED] // // Within a single VIS operation, if any destination location overlaps a source location // or another destination location, then behavior is undefined. // Source locations are permitted to overlap with each other. // // The 'flags' argument must either be zero, or a bitwise OR of one or more // of the following flags: // - GEX_FLAG_IMMEDIATE: the call is permitted (but not required) to return a // distinguishing value without initiating any communication if the conduit // could determine that it would need to block temporarily to obtain the // necessary resources. The Blocking and NBI calls return a non-zero value // (only) in this "no op" case, while the NB calls will return GEX_EVENT_NO_OP. // - GEX_FLAG_ENABLE_LEAF_LC: (gex_VIS_*Put{NB,NBI} only) This flag requests // asynchronous local completion indication for the local data payload buffers // comprising the source region(s) of the VIS Put operation. Without this flag, // local completion behaves as GEX_EVENT_DEFER, i.e. folded into operation completion. // When this flag is passed to gex_VIS_*PutNBI, asynchronous local completion indication // behaves as specified in sec:`Extended API` for lc_opt=GEX_EVENT_GROUP. // When this flag is passed to gex_VIS_*PutNB, asynchronous local completion indication // behaves as specified in sec:`Extended API` for lc_opt=&(gex_Event_t variable). // In the latter case, the client should retrieve the gex_Event_t corresponding to // local completion by passing the root gex_Event_t returned by the Put initiation // call to gex_Event_QueryLeaf(), for example: // gex_Event_t VISput_RC = gex_VIS_VectorPutNB(..., GEX_FLAG_ENABLE_LEAF_LC); // gex_Event_t VISput_LC = gex_Event_QueryLeaf(VISput_RC, GEX_EC_LC); // The second call is only valid when GEX_FLAG_ENABLE_LEAF_LC was passed to the _VIS_*PutNB() // call, and otherwise has undefined behavior. // NOTE: All of the (void *) types in this API will eventually be gex_Addr_t [UNIMPLEMENTED] // // Vector and Indexed Puts and Gets // // These operate analogously to those in the GASNet-1 prototype gasnet_{put,get}[vi]_* API {gex_Event_t,int} gex_VIS_VectorGet{NB,NBI,Blocking}( gex_TM_t tm, // Names a local context size_t dstcount, gex_Memvec_t const dstlist[], // Local destination data description gex_Rank_t srcrank, // Together with 'tm', names a remote context size_t srccount, gex_Memvec_t const srclist[], // Remote source data description gex_Flags_t flags); // Flags to control this operation {gex_Event_t,int} gex_VIS_VectorPut{NB,NBI,Blocking}( gex_TM_t tm, gex_Rank_t dstrank, size_t dstcount, gex_Memvec_t const dstlist[], size_t srccount, gex_Memvec_t const srclist[], gex_Flags_t flags); {gex_Event_t,int} gex_VIS_IndexedGet{NB,NBI,Blocking}( gex_TM_t tm, size_t dstcount, void * const dstlist[], size_t dstlen, gex_Rank_t srcrank, size_t srccount, void * const srclist[], size_t srclen, gex_Flags_t flags); {gex_Event_t,int} gex_VIS_IndexedPut{NB,NBI,Blocking}( gex_TM_t tm, gex_Rank_t dstrank, size_t dstcount, void * const dstlist[], size_t dstlen, size_t srccount, void * const srclist[], size_t srclen, gex_Flags_t flags); // // Strided Puts and Gets // // These operate similarly to the GASNet-1 prototype gasnet_{put,get}s_* API, // but the metadata format is changing slightly in EX. Notable changes: // + The stride arrays change type from (const size_t[]) to (const ptrdiff_t[]) // + The 'count[0]' datum moves to a new parameter 'elemsz', and the subsequent // elements 'count[1..stridelevels]' "slide down", meaning 'count' now references // an array with 'stridelevels' entries (down from 'stridelevels+1'). // Note that 'elemsz' need not match the "native" element size of the underlying // datastructure, it just needs to indicate a size of contiguous data chunks // (eg, it could be the length of an entire row of doubles stored contiguously). // These interface changes enable the Strided interface to support more generalized // strided data movements (specifically, transpose and reflection). // // Degenerate cases: // // * If elemsz == 0: // the operation is a no-op and all other arguments are ignored // * If stridelevels == 0: // the operation is a contiguous copy of elemsz bytes, and the // srcstrides, dststrides, count arguments are all ignored // * If any entry in count[0..stridelevels-1] == 0: // the operation is a no-op and tm, rank, srcaddr, dstaddr are ignored // ({src,dst}strides must still reference valid arrays) // {gex_Event_t,int} gex_VIS_StridedGet{NB,NBI,Blocking}( gex_TM_t tm, void *dstaddr, const ptrdiff_t dststrides[], gex_Rank_t srcrank, void *srcaddr, const ptrdiff_t srcstrides[], size_t elemsz, const size_t count[], size_t stridelevels, gex_Flags_t flags); {gex_Event_t,int} gex_VIS_StridedPut{NB,NBI,Blocking}( gex_TM_t tm, gex_Rank_t dstrank, void *dstaddr, const ptrdiff_t dststrides[], void *srcaddr, const ptrdiff_t srcstrides[], size_t elemsz, const size_t count[], size_t stridelevels, gex_Flags_t flags); // // VIS Put Peer Completion [EXPERIMENTAL] // // The following call "arms" a peer completion callback that will // signal completion of the next VIS operation initiated by the current thread // to the (possibly remote) peer endpoint. When the selected VIS data movement // operation is complete with respect to the peer, an Active Message // (with some restrictions defined below) is delivered to the peer endpoint. // Currently this feature is only supported for VIS Put operations (not Gets). // // Argument synopsis: // gex_AM_Index_t handler // + The AM handler index to invoke at the peer endpoint. // const void * source_addr // size_t nbytes // + The local source address and length of an optional client-provided payload // to be delivered in the AM notification. // gex_Flags_t flags // + Unused in the current release, should be set to zero. // // + This call "arms" a peer completion handler and binds it to the next VIS operation // successfully initiated by the current thread. Only the next such operation is affected, after // which the peer completion binding for this thread is automatically "disarmed". // In this release, the VIS operation in question may not be passed GEX_FLAG_IMMEDIATE. // [this restriction may be relaxed in a future release] // + If `handler == 0` then any previous gex_VIS_SetPeerCompletionHandler() call from // this thread (if any) is "disarmed" and cancelled. // + Otherwise, `handler` specifies a 0-argument AM Medium Reply handler to invoke at the peer // endpoint selected by the VIS initiation call. The handler is invoked after the data // movement associated with the VIS operation is complete with respect to the peer process. // + The selected AM handler must have been registered at the peer endpoint using gex_EP_RegisterHandlers() // with gex_flags == GEX_FLAG_AM_MEDIUM | GEX_FLAG_AM_REPLY and gex_nargs == 0 // [this restriction may be relaxed in a future release] // and must adhere to the signature and restrictions of a 0-argument AM Medium Reply handler. // + If `nbytes == 0`, then `source_addr` is ignored. // + Otherwise, `nbytes` must be no greater than GEX_VIS_MAX_PEERCOMPLETION. // + If `nbytes > 0`, the specified source memory must remain valid and unchanged starting from the // call to gex_VIS_SetPeerCompletionHandler and lasting until the earlier of either operation completion // or local completion (if enabled) of the VIS operation is signalled to the initiating rank. // + The specified payload is delivered to the invoked AM Medium handler as usual. // + The thread running the peer completion AM handler is guaranteed to observe the results // of the VIS data movement operation upon which it depends. However if it wishes to hand-off // completion notification to other local threads it should use normal cross-thread // synchronization mechanisms (including issuing a write memory barrier on most architectures) // to ensure other cores also observe the payload delivery. void gex_VIS_SetPeerCompletionHandler(gex_AM_Index_t handler, const void *source_addr, size_t nbytes, gex_Flags_t flags); // The largest permissible size (in bytes) for a client payload in a VIS peer completion handler. // Guaranteed to be at least 127 bytes. #define GEX_VIS_MAX_PEERCOMPLETION ((size_t)???) // End of section describing APIs provided by gasnet_vis.h //---------------------------------------------------------------------- // // Collectives (Coll) // // With the exception of gex_Coll_BarrierNB(), APIs in this section are provided // by gasnet_coll.h // // This API is an updated and expanded version of the collectives prototype // offered in GASNet-1, and previously documented in docs/collective_notes.txt. // As these APIs are fully specified and implemented, the corresponding // portions of the GASNet-1 collectives prototype will be removed from // gasnet_coll.h and replaced with GEX variants. The GASNet-1 collectives API // signatures will not be supported in future releases. // The following semantics apply to all Coll functions: // For NB variants, return type for all functions in this section is gex_Event_t. // There are no NBI or Blocking variants at this time. // All functions in this section are "Collective Calls" as defined in the // Glossary. // Multiple collective operations from this section may be active concurrently, // over multiple teams or over a single team. There is no longer an exception // regarding gex_Coll_BarrierNB(), as was the case in an earlier release. // GASNet-EX collectives and the GASNet-1 barrier must not operate concurrently. // Specifically, the gasnet_barrier*() family of calls may not operate // concurrently with any gex_Coll_*() operation (including barriers) over the // primordial team (created by gex_Client_Init(flags=GEX_FLAG_USES_GASNET1) or // obtained from a call to gasnet_QueryGexObjects()). // - Collective operations over the primordial team issued prior to a // GASNet-1 barrier over the same team must be complete/synchronized prior // to initiating the barrier. // - No collective call may be initiated over the primordial team between the // initiation and completion/synchronization of any GASNet-1 barrier over // the same team. // [This restriction may be relaxed in a future release] // // Uses of the GASNet-1 barrier APIs which do not violate the restriction above // are permitted in the same program as GASNet-EX collectives. // In contrast to the UPC-influenced design of the GASNet-1 collectives, the // GASNet-EX collectives do not support "NOSYNC" or "ALLSYNC" flags (they // behave as if IN_MYSYNC|OUT_MYSYNC), nor single-valued address information. // [A future release may re-introduce single-valued addressing for symmetric // heaps via offset-based addressing] // In this respect, the intuition one may hold from MPI-3 non-blocking // collectives is largely applicable. // By default, local completion of all client-owned input buffers ('src' // arguments) passed to the collective initiation functions can occur as late // as operation completion, and thus these buffers must remain valid until that time. // // A future revision may expose intermediate completion events [UNIMPLEMENTED] // Upon operation completion (synchronization of the gex_Event_t returned at // initiation) the following will hold: // + Any input buffer ('src' argument) will be locally complete (analogous // to the source of a gex_RMA_PutNB() with GEX_EVENT_DEFER). // + Any output buffer ('dst' argument) is ready to be examined by the thread // performing the sync of the gex_Event_t (analogous to the destination of // a gex_RMA_GetNB()). // + Unless otherwise noted in the description of a given operation, there are // no guarantees regarding the state on other ranks participating in the // collective operation nor their associated input and output buffers. // Unless noted explicitly, no API in this section, other than a Barrier, is // required to synchronize the calling ranks. However, the implementation is // *permitted* to do so in any call in this section. // NOTE: All of the (void *) types for source and destination buffers in these // APIs will eventually be gex_Addr_t [UNIMPLEMENTED] // // Collectives Part I. Barrier // // Split-phase barrier over a Team // // This is a collective call over the team named by the 'tm' argument that // initiates a split-phase (non-blocking) barrier over the callers. // // + The return value is a root event which can be successfully synchronized // (return from gex_Event_Wait*() or zero return from gex_Event_Test*()) // only after all members of the team have issued a corresponding call. // + This call is non-blocking (may return before other team members have // issued a corresponding call). // + Calls to gex_Coll_BarrierNB() are not "compatible" with calls to // gasnet_barrier() or gasnet_barrier_notify() for the purpose of // determining collective calling order. // + The barrier operation provides the following memory ordering behaviors: // - Initiating a barrier operation shall perform a "release". // Within the thread that initiates the operation, memory accesses by the // processor, issued before the initiation call, shall not be reordered // after that call. Additionally, this includes accesses to memory by any // GASNet operations synchronized by that thread before initiation. // However, there is no ordering with respect to other GASNet operations. // - Synchronizing a barrier operation shall perform an "acquire". // Within the thread that synchronizes the operation, memory accesses by // the processor, issued after the synchronization call, shall not be // reordered before that call. Additionally, this includes accesses to // memory by any GASNet operations initiated by that thread after // synchronization. However, there is no ordering with respect to other // GASNet operations. // // tm: The call is collective over the associated team. // flags: Flags are reserved for future use and must currently be zero // gex_Event_t gex_Coll_BarrierNB(gex_TM_t tm, gex_Flags_t flags); // // Collectives Part II. Data Movement // // The following argument descriptions are applicable to all collective data // movement APIs in this section using arguments with these names. // // root: // The rank within 'tm' of one distinguished endpoint. More information // on the distinguishing role of the root is provided with the detailed // description of each such collective operation. // This is always a single-valued parameter. // src: // The local address of the caller's input buffer, if any. // This is not a single-valued parameter. // dst: // The local address of the caller's output buffer, if any. // This is not a single-valued parameter. // nbytes: // The length in bytes of one element of data. // This is a single-valued parameter. // flags: // A bitwise OR of zero or more of permitted GEX_FLAG_* constants. // Currently no flags are defined for data-movement collective // operations, and the value zero should be passed. // However, a future release will support the "segment disposition" // flags [UNIMPLEMENTED]. // Individual flags bits may or may not be single-valued, as will // be documented with each supported flag. // Broadcast // // This operation copies 'nbytes' bytes of data starting at 'src' on rank // 'root' of 'tm', to 'dst' on every rank within the 'tm'. // // The value of 'src' is ignored on all ranks other than 'root'. // // On the 'root' rank, the data is copied from 'src' to 'dst' except in the // case these pointers are equal. However, any other overlap between 'src' // and 'dst' buffers on the root rank yields undefined behavior. gex_Event_t gex_Coll_BroadcastNB( gex_TM_t tm, // The team gex_Rank_t root, // Root rank (single-valued) void * dst // Destination (all ranks) const void * src, // Source (root rank only) size_t nbytes, // Length of data (single-valued) gex_Flags_t flags); // Flags (partially single-valued) // // Collectives Part III. Computational // // User-Defined Reduction Operations // // GASNet provides a set of useful built-in reduction operations. These // should be favored whenever possible in performance-critical reductions, // because using a built-in operator is generally a prerequisite to leveraging // hardware-offload support for reductions which is available in some network // hardware. However for situations where none of the provided built-in // operations fit client requirements, GASNet also allows clients to provide // code for their own reduction operation. // // Reduction operations which do not correspond to a built-in opcode // (GEX_OP_*) constant are supported by passing GEX_OP_USER or GEX_OP_USER_NC // as the 'op' when initiating a reduction. // + GEX_OP_USER: denotes a user-defined operation that is both // associative and commutative. // + GEX_OP_USER_NC: denotes a user-defined operation that is // associative but NOT commutative. [UNIMPLEMENTED] // // The implementation will invoke the user-provided function an unspecified // number of times to perform the user's operation on a pair of vectors of // operands. // // The user-defined reduction operation is passed to the initiation call using // a function pointer with type gex_Coll_ReduceFn_t: typedef void (*gex_Coll_ReduceFn_t)( const void * arg1, // "Left" operands void * arg2_and_out, // "Right" operands and result size_t count, // Operand count const void * cdata); // Client-data // These arguments are defined as follows, with additional semantics below: // arg1: // This is a pointer to memory containing 'count' consecutive operands. // These may be caller-provided input values or intermediate results. // In the case of a non-commutative operation, these are the operands on // the "left-hand side" of the nominal operator. // The reduction operation is not permitted to write to this memory. // arg2_and_out: // This is a pointer to memory containing 'count' consecutive operands. // These may be caller-provided input values or intermediate results. // In the case of a non-commutative operation, these are the operands on // the "right-hand side" of the nominal operator. // The reduction operation must write the result(s) to this memory, as // described below. // count: // This is the number of "fields" on which to perform the reduction, and // thus the length of the accessible memory at 'arg1' and 'arg2_and_out' // is equal to 'count' times the size of each element (passed as 'dt_sz' // at initiation of the reduction). // Note that this argument may take on any positive value, which may be // either smaller or larger than the 'dt_cnt' passed at initiation of // the reduction. // cdata: // This is the value of the 'user_cdata' argument passed locally at // initiation of the reduction, and is intended to assist in implementing // more than a single data type and/or operation with a common C function. // // The function implementing a user-defined reduction operation: // + May use the 'cdata' argument to receive information (such as the // operation or data type) not provided by the other arguments. // + May assume 'arg1' and 'arg2_and_out' do not overlap each other. However, // they can overlap the 'src' buffer (and 'dst' buffer, if any) passed at // operation initiation. // + May perform the element-wise operations in any order, and parallel // computation is explicitly permitted. // + Shall interpret the 'arg1' and 'arg2_and_out' as arrays of length 'count' // with an element type corresponding to the data type passed at initiation // of the reduction. // + Shall apply the desired operation element-wise to each of the 'count' // pairs of operands, storing the result in the location from which the // second (right-hand) operand is retrieved. Here "element-wise" // application can be expressed in pseudo-code as follows, using 'T' to // denote the C data type and '(+)' to denote the operator: // T* x = (T*)arg1; // T* y = (T*)arg2_and_out; // For all i in [0..count) do y[i] = x[i] (+) y[i]; // + Shall not assume 'count' is equal to the 'dt_cnt' passed at operation // initiation, since the implementation is free to process the 'dt_cnt' // elements passed in at initiation in smaller groups, or to group more // than 'dt_cnt' pairs of operands into a single call to the user's // function. // + Shall not block pending any condition the satisfaction of which is // dependent on progress in GASNet (whether local or global). // + Shall not make any GASNet calls other than those enumerated as permitted // while holding a handler-safe lock, in the section "Calls from restricted // context" // + Shall not assume that the executing thread was created by the client. // + Shall allow for the possibility that the implementation invokes the // function concurrent with itself, even if the client has not spawned // threads. Use of handler-safe locks or GASNet-Tools atomics are // recommended mechanisms to deal with any access to global/persistent // state. // User-Defined Data Types // // Reductions on types without a corresponding built-in data type (GEX_DT_*) // constant are supported by passing GEX_DT_USER as the 'dt' when initiating a // reduction. In this case data elements are treated as indivisible byte // sequences with length given as 'dt_sz' at initiation of the reduction. // Reduction operations passing GEX_DT_USER for the data type must pass either // GEX_OP_USER or GEX_OP_USER_NC for the operation. // Limitations for Built-in Data Types // // + Operations on floating-point data types are not guaranteed to obey all // rules in the IEEE 754 standard even when the C float and double types // otherwise do conform. Deviations from IEEE 754 include (at least): // - Operations on signalling NaNs have undefined behavior. // - MIN and MAX *may* be performed as if on "sign and magnitude // representation integers" of the same width, resulting in // non-conforming behavior with quiet NaNs. // (See https://en.wikipedia.org/wiki/IEEE_754-1985, and especially // the section "Comparing_floating-point_numbers") // [THIS PARAGRAPH MAY NOT BE A COMPLETE LIST OF NON-IEEE BEHAVIORS] // // The following argument descriptions are applicable to all computational // collective APIs in this section using arguments with these names. // // src: // The local address of the caller's input buffer. // This is not a single-valued parameter. // dst: // The local address of the caller's output buffer, if any. // This is not a single-valued parameter. // dt: // The data type for the reduction operation. // Must be a GEX_DT_* constant documented as valid for Reductions. // This is a single-valued parameter. // dt_sz: // The length in bytes of one element of data. // When 'dt' is a built-in type, this value must be the size in bytes of // the corresponding built-in C type. When 'dt' is GEX_DT_USER, this // value must be the size in bytes of the user-defined data type. // This length must be non-zero. // This is a single-valued parameter. // dt_cnt: // The per-rank count of data elements to reduce (not a length in bytes). // This count must be non-zero. // This is a single-valued parameter. // op: // The opcode, of type gex_OP_t, naming the reduction operator. // Must be a GEX_OP_* constant documented as valid for Reductions. // This is a single-valued parameter. // user_op, user_cdata: // If 'op' is neither GEX_OP_USER nor GEX_OP_USER_NC, then these two // arguments are ignored. Otherwise 'user_op' is a local function // pointer of type gex_Coll_ReduceFn_t (described above), and // 'user_cdata' is a client data pointer to be passed to each local // invocation of 'user_op'. The 'user_cdata' is treated as opaque by the // implementation and therefore is not required to be a pointer to valid // memory. In particular, it may be NULL. // flags: // A bitwise OR of zero or more of permitted GEX_FLAG_* constants. // Currently no flags are defined for computational collective // operations, and the value zero should be passed. // However, a future release will support the "segment disposition" // flags [UNIMPLEMENTED]. // Individual flags bits may or may not be single-valued, as will // be documented with each supported flag. // Common Semantics // // All reductions are performed via an unspecified pattern of applications of // the operator to pairs of operands, under the assumptions that (1) all // operations are mathematically associative and (2) operations other than // GEX_OP_USER_NC are mathematically commutative. // // The implementation is not required to take measures to accommodate any // divergence (for instance of IEEE floating-point arithmetic) from the // assumptions in the preceding paragraph. Specifically, in the presence of // such divergence, the implementation is not required to provide equality of // the results of calls with mathematically equivalent arguments; neither // between distinct calls in the same execution, nor between the same call in // distinct executions. However, high-quality implementations will provide // reproducibility among calls with the same parameters within a single // execution (e.g. by applying the operation in a deterministic order). // // The implementation is not required to preserve the vector of 'dt_cnt' // elements as an indivisible unit. It is permitted not only to break the // vector into shorter ones, but may also concatenate multiple vectors to // lessen the number of calls to a user-defined reduction operator. // // This specification does not require that a user-defined function applies // the semantically equivalent operation to every pair of inputs. Nothing in // this specification prohibits passing a different user-defined operator on // each caller, nor does it prohibit a user-defined operator from applying a // different operation depending on an element's location in the argument // vectors. However, both behaviors are strongly discouraged. Since this // specification explicitly permits the implementation freedom in the order of // reductions in both rank and vector dimensions, either of these behaviors // will result in unpredictable output values. Nothing in this paragraph is // intended to prohibit, or discourage use of, user-defined operations with // behaviors which depend on characteristics encoded in a user-defined data // type (which may include position in the rank or vector dimensions); the // intent is to discourage operators which *infer* such position information. // Reduction to one // // This is a collective call over the team named by the 'tm' argument that // initiates a non-blocking reduction applying the operation denoted by 'op' // repeatedly to reduce a collection of operands of type denoted by 'dt'. // Each member of 'tm' provides a 'src' vector of length 'dt_cnt' (in // elements), and the elements are reduced element-wise such that the i'th // element of the output vector is the reduction over the i'th elements of the // 'src' vectors of all team members. The result is written to the 'dst' of // one 'root' rank. // // Using `(+)` to represent the nominal reduction operator, and `src_i[j]` to // denote the i'th element of the 'src' vector passed by rank 'j', the result // produced on the 'root' rank can be expressed as: // dst_i = src_i[0] (+) src_i[1] ... (+) src_i[N-1] // where `N = gex_TM_QuerySize(tm)`. // // This call is non-blocking (may return before other team members have issued // a corresponding call). // // On the 'root' rank the 'dst' buffer has length in bytes of 'dt_sz * dt_cnt'. // The value of 'dst' is ignored on all other ranks. // // On all ranks the 'src' buffer has length in bytes of 'dt_sz * dt_cnt'. // // On the 'root' rank, it is permitted that 'src' and 'dst' be equal. // However, any other overlap between 'src' and 'dst' buffers on the root rank // yields undefined behavior. // // LIMITATIONS of the current release: // + The current implementation may limit `dt_sz` for user-defined types to as // little as 32KB bytes in some configurations and with default parameters. // The precise limit depends on the network, the sizes of the job and team, // and the size of the team's collective scratch space. gex_Event_t gex_Coll_ReduceToOneNB( gex_TM_t tm, // The team gex_Rank_t root, // Root rank (single-valued) void * dst, // NOT single-valued const void * src, // NOT single-valued gex_DT_t dt, // Data type (single-valued) size_t dt_sz, // Data type size (single-valued) size_t dt_cnt, // Element count (single-valued) gex_OP_t op, // Operation (single-valued) gex_Coll_ReduceFn_t user_op, // NOT single-valued void * user_cdata, // NOT single-valued gex_Flags_t flags); // Flags (partially single-valued) // Reduction to all // // This is a collective call over the team named by the 'tm' argument that // initiates a non-blocking reduction applying the operation denoted by 'op' // repeatedly to reduce a collection of operands of type denoted by 'dt'. // Each member of 'tm' provides a 'src' vector of length 'dt_cnt' (in // elements), and the elements are reduced element-wise such that the i'th // element of the output vector is the reduction over the i'th elements of the // 'src' vectors of all team members. The result is written to the 'dst' of // all ranks. // // The definition of the element-wise reduction is the same as was given above // for gex_Coll_ReduceToOneNB(). // // This call produces an output in the 'dst' buffer of all ranks. However, // the implementation is free to apply associativity (and commutativity for // operators other than GEX_OP_USER_NC) *differently* in producing the // multiple outputs. Therefore, when the operator differs from the assumed // mathematical properties, the results on different ranks might not be // identical. // // The 'dst' and 'src' buffers have length in bytes of 'dt_sz * dt_cnt'. // // It is permitted that 'src' and 'dst' be equal pairwise either on every rank, // or on none of them. Any other overlap between 'src' and 'dst' buffers // yields undefined behavior. This includes any case in which 'src' and 'dst' // are equal on at least one rank, but less than all ranks in the team (though // this last restriction may be relaxed in a future release). // // LIMITATIONS of the current release: // + The current implementation may limit `dt_sz` for user-defined types to as // little as 32KB bytes in some configurations and with default parameters. // The precise limit depends on the network and sizes of the job and team. gex_Event_t gex_Coll_ReduceToAllNB( gex_TM_t tm, // The team void * dst, // NOT single-valued const void * src, // NOT single-valued gex_DT_t dt, // Data type (single-valued) size_t dt_sz, // Data type size (single-valued) size_t dt_cnt, // Element count (single-valued) gex_OP_t op, // Operation (single-valued) gex_Coll_ReduceFn_t user_op, // NOT single-valued void * user_cdata, // NOT single-valued gex_Flags_t flags); // Flags (partially single-valued) // End of section describing APIs provided by gasnet_coll.h //---------------------------------------------------------------------- // [PROPOSED] // This section of this document covers APIs described in more detail in // GASNet-EX API Proposal: Memory Kinds, Revision 2022.3.0 // // For functions, only prototypes and supported flags are provided here. // For function semantics, one should consult the document cited above. // Copies of the most recent revision of that document are available on // request from gasnet-staff@lbl.gov. int gex_Segment_Create( gex_Segment_t *segment_p, // OUT gex_Client_t client, gex_Addr_t address, uintptr_t length, gex_MK_t kind, gex_Flags_t flags); // Valid flags: // NONE - zero is currently required void gex_Segment_Destroy( gex_Segment_t segment, gex_Flags_t flags); // Valid flags: // NONE - zero is currently required int gex_EP_BindSegment( gex_EP_t ep, gex_Segment_t segment, gex_Flags_t flags); // Valid flags: // NONE - zero is currently required typedef [some integer type] gex_EP_Capabilities_t; #define GEX_EP_CAPABILITY_{RMA,AM,VIS,COLL,AD,ALL} ??? #define GEX_FLAG_HINT_ACCEL_{AD,COLL,ALL} ??? // NOTE: implemented only by ibv, ofi and ucx conduits, // and only for capabilities == GEX_EP_CAPABILITY_RMA int gex_EP_Create( gex_EP_t *ep_p, // OUT gex_Client_t client, gex_EP_Capabilities_t capabilities, gex_Flags_t flags); // Valid flags: // NONE - zero is currently required // gex_MK_t is an opaque scalar handle to a Memory Kind (MK) typedef ... gex_MK_t; // GEX_MK_INVALID is a predefined constant guaranteed to have the value // zero and never alias a valid memory kind #define GEX_MK_INVALID ((gex_MK_t)0) // GEX_MK_HOST is a predefined constant for the "kind" for host memory #define GEX_MK_HOST ((gex_MK_t)???) // **** // The APIs in the remainder of this section are provided by gasnet_mk.h // **** // gex_MK_Class_t is enum naming available "classes" of memory kinds. // It includes at least the following values (in unspecified order): typedef enum { GEX_MK_CLASS_HOST, // "normal" memory (eg GEX_MK_HOST) GEX_MK_CLASS_CUDA_UVA, // CUDA UVA memory [since 2020.11.0] GEX_MK_CLASS_HIP, // HIP device memory [since 2021.9.0] GEX_MK_CLASS_ZE, // oneAPI Level Zero device memory [EXPERIMENTAL] ??? } gex_MK_Class_t; // The gex_MK_Create_args_t struct is passed to gex_MK_Create to create a // per-device instance of a memory kind of the given class, and includes // at least the following members (in unspecified order): typedef struct { uint64_t gex_flags; // Reserved. Must be 0 currently. gex_MK_Class_t gex_class; union { struct {// CUDA UVA memory [since 2020.11.0] int gex_CUdevice; } gex_class_cuda_uva; struct {// HIP device memory [since 2021.9.0] int gex_hipDevice; } gex_class_hip; struct {// oneAPI Level Zero device memory [EXPERIMENTAL] void* gex_zeDevice; void* gex_zeContext; uint32_t gex_zeMemoryOrdinal; } gex_class_ze; } gex_args; } gex_MK_Create_args_t; int gex_MK_Create( gex_MK_t *memkind_p, // OUT gex_Client_t client, const gex_MK_Create_args_t *args, // IN gex_Flags_t flags // Reserved. Must be 0 currently. ); // Valid flags: // NONE - zero is currently required void gex_MK_Destroy( gex_MK_t memkind, gex_Flags_t flags // Reserved. Must be 0 currently. ); // Valid flags: // NONE - zero is currently required // **** // End of APIs provided by gasnet_mk.h // **** // End of "GASNet-EX API Proposal: Memory Kinds, Revision 2022.3.0" section //---------------------------------------------------------------------- // vim: syntax=c