Module (cudajit.Cudajit.Module)

type jit_target =

| COMPUTE_30
| COMPUTE_32
| COMPUTE_35
| COMPUTE_37
| COMPUTE_50
| COMPUTE_52
| COMPUTE_53
| COMPUTE_60
| COMPUTE_61
| COMPUTE_62
| COMPUTE_70
| COMPUTE_72
| COMPUTE_75
| COMPUTE_80
| COMPUTE_86
| COMPUTE_87
| COMPUTE_89
| COMPUTE_90
| COMPUTE_90A
(*
Compute device class 9.0 with accelerated features.
*)

Compute device classes. See enum CUjit_target.

val sexp_of_jit_target : jit_target -> Sexplib0.Sexp.t

val jit_target_of_sexp : Sexplib0.Sexp.t -> jit_target

type jit_fallback =

| PREFER_PTX
| PREFER_BINARY

Cubin matching fallback strategies. See CUjit_fallback.

val sexp_of_jit_fallback : jit_fallback -> Sexplib0.Sexp.t

val jit_fallback_of_sexp : Sexplib0.Sexp.t -> jit_fallback

type jit_cache_mode =

| NONE
| CG
(*
Compile with L1 cache disabled.
*)
| CA
(*
Compile with L1 cache enabled.
*)

Caching modes for dlcm. See CUjit_cacheMode.

val sexp_of_jit_cache_mode : jit_cache_mode -> Sexplib0.Sexp.t

val jit_cache_mode_of_sexp : Sexplib0.Sexp.t -> jit_cache_mode

type jit_option =

| MAX_REGISTERS of int
(*
Max number of registers that a thread may use.
*)
| THREADS_PER_BLOCK of int
(*
Specifies minimum number of threads per block to target compilation for or returns the number of threads the compiler actually targeted. Cannot be combined with TARGET.
*)
| WALL_TIME of {
1. milliseconds : float;
}
| INFO_LOG_BUFFER of bigstring
| ERROR_LOG_BUFFER of bigstring
| OPTIMIZATION_LEVEL of int
(*
0 to 4, with 4 being the default and highest level of optimizations.
*)
| TARGET_FROM_CUCONTEXT
| TARGET of jit_target
| FALLBACK_STRATEGY of jit_fallback
| GENERATE_DEBUG_INFO of bool
(*
Helpful for cuda-gdb.
*)
| LOG_VERBOSE of bool
| GENERATE_LINE_INFO of bool
(*
Helpful for cuda-gdb.
*)
| CACHE_MODE of jit_cache_mode
| POSITION_INDEPENDENT_CODE of bool

See CUjit_option.

val sexp_of_jit_option : jit_option -> Sexplib0.Sexp.t

val jit_option_of_sexp : Sexplib0.Sexp.t -> jit_option

type func

See CUfunction.

type t

See CUmodule.

val load_data_ex : Nvrtc.compile_to_ptx_result -> jit_option list -> t

Currently, the image passed via this call is the PTX source. See cuModuleLoadDataEx.

The module is finalized using cuModuleUnload. The finalizer captures the context when load_data_ex is called to temporarily push it on the stack for unloading.

val get_function : t -> name:string -> func

See cuModuleGetFunction.

val get_global : t -> name:string -> Deviceptr.t * Unsigned.size_t

See cuModuleGetGlobal.