Module Cudajit.Context

All CUDA tasks are run under a context, usually under the current context. See: Context Management.

type flag =
  1. | SCHED_AUTO
    (*

    Automatic scheduling.

    *)
  2. | SCHED_SPIN
    (*

    Instruct CUDA to actively spin when waiting for results from the GPU.

    *)
  3. | SCHED_YIELD
    (*

    Instruct CUDA to yield its thread when waiting for results from the GPU.

    *)
  4. | SCHED_BLOCKING_SYNC
    (*

    Set blocking synchronization as default scheduling.

    *)
  5. | SCHED_MASK
  6. | MAP_HOST
    (*

    Deprecated: it is always present regardless of passed config.

    *)
  7. | LMEM_RESIZE_TO_MAX
    (*

    Keep local memory allocation after launch.

    *)
  8. | COREDUMP_ENABLE
    (*

    Trigger coredumps from exceptions in this context.

    *)
  9. | USER_COREDUMP_ENABLE
    (*

    Enable user pipe to trigger coredumps in this context.

    *)
  10. | SYNC_MEMOPS
    (*

    Ensure synchronous memory operations on this context will synchronize.

    *)
val sexp_of_flag : flag -> Sexplib0.Sexp.t
val flag_of_sexp : Sexplib0.Sexp.t -> flag
type flags = flag list
val sexp_of_flags : flags -> Sexplib0.Sexp.t
val flags_of_sexp : Sexplib0.Sexp.t -> flags
type t
val sexp_of_t : t -> Sexplib0.Sexp.t
val create : flags -> Device.t -> t

NOTE: In most cases it is recommended to use get_primary instead! The context is pushed to the CPU-thread-local stack. See cuCtxCreate

The context value is finalized using ctxDestroy.

val get_flags : unit -> flags
val get_primary : Device.t -> t

The context is not pushed to the stack. See cuDevicePrimaryCtxRetain.

The context is finalized using cuDevicePrimaryCtxRelease. The underlying CUDA context will be reset once the last reference to it is released.

val get_device : unit -> Device.t
val pop_current : unit -> t
val get_current : unit -> t
val push_current : t -> unit

Pushes a context on the current CPU thread. See cuCtxPushCurrent.

val set_current : t -> unit

If there exists a CUDA context stack on the calling CPU thread, this will replace the top of that stack with ctx. See cuCtxSetCurrent.

val synchronize : unit -> unit

Blocks for the current context's tasks to complete. See cuCtxSynchronize.

val disable_peer_access : t -> unit

Disables peer access between the current context and the given context. See cuCtxDisablePeerAccess.

val enable_peer_access : ?flags:Unsigned.uint -> t -> unit

Flags are unused. See cuCtxEnablePeerAccess.

type limit =
  1. | STACK_SIZE
  2. | PRINTF_FIFO_SIZE
  3. | MALLOC_HEAP_SIZE
  4. | DEV_RUNTIME_SYNC_DEPTH
    (*

    GPU device runtime launch synchronize depth.

    *)
  5. | DEV_RUNTIME_PENDING_LAUNCH_COUNT
  6. | MAX_L2_FETCH_GRANULARITY
    (*

    Between 0 and 128, in bytes, it is a hint.

    *)
  7. | PERSISTING_L2_CACHE_SIZE
val sexp_of_limit : limit -> Sexplib0.Sexp.t
val limit_of_sexp : Sexplib0.Sexp.t -> limit
val set_limit : limit -> int -> unit
val get_limit : limit -> int