Module Cudajit.Module

A CUDA module type represents CUDA code that's ready to execute, i.e. is loaded. See: Module Management.

type jit_target =
  1. | COMPUTE_30
  2. | COMPUTE_32
  3. | COMPUTE_35
  4. | COMPUTE_37
  5. | COMPUTE_50
  6. | COMPUTE_52
  7. | COMPUTE_53
  8. | COMPUTE_60
  9. | COMPUTE_61
  10. | COMPUTE_62
  11. | COMPUTE_70
  12. | COMPUTE_72
  13. | COMPUTE_75
  14. | COMPUTE_80
  15. | COMPUTE_86
  16. | COMPUTE_87
  17. | COMPUTE_89
  18. | COMPUTE_90
  19. | COMPUTE_90A
    (*

    Compute device class 9.0 with accelerated features.

    *)

Compute device classes. See enum CUjit_target.

val sexp_of_jit_target : jit_target -> Sexplib0.Sexp.t
val jit_target_of_sexp : Sexplib0.Sexp.t -> jit_target
type jit_fallback =
  1. | PREFER_PTX
  2. | PREFER_BINARY

Cubin matching fallback strategies. See CUjit_fallback.

val sexp_of_jit_fallback : jit_fallback -> Sexplib0.Sexp.t
val jit_fallback_of_sexp : Sexplib0.Sexp.t -> jit_fallback
type jit_cache_mode =
  1. | NONE
  2. | CG
    (*

    Compile with L1 cache disabled.

    *)
  3. | CA
    (*

    Compile with L1 cache enabled.

    *)

Caching modes for dlcm. See CUjit_cacheMode.

val sexp_of_jit_cache_mode : jit_cache_mode -> Sexplib0.Sexp.t
val jit_cache_mode_of_sexp : Sexplib0.Sexp.t -> jit_cache_mode
type jit_option =
  1. | MAX_REGISTERS of int
    (*

    Max number of registers that a thread may use.

    *)
  2. | THREADS_PER_BLOCK of int
    (*

    Specifies minimum number of threads per block to target compilation for or returns the number of threads the compiler actually targeted. Cannot be combined with TARGET.

    *)
  3. | WALL_TIME of {
    1. milliseconds : float;
    }
  4. | INFO_LOG_BUFFER of bigstring
  5. | ERROR_LOG_BUFFER of bigstring
  6. | OPTIMIZATION_LEVEL of int
    (*

    0 to 4, with 4 being the default and highest level of optimizations.

    *)
  7. | TARGET_FROM_CUCONTEXT
  8. | TARGET of jit_target
  9. | FALLBACK_STRATEGY of jit_fallback
  10. | GENERATE_DEBUG_INFO of bool
    (*

    Helpful for cuda-gdb.

    *)
  11. | LOG_VERBOSE of bool
  12. | GENERATE_LINE_INFO of bool
    (*

    Helpful for cuda-gdb.

    *)
  13. | CACHE_MODE of jit_cache_mode
  14. | POSITION_INDEPENDENT_CODE of bool
val sexp_of_jit_option : jit_option -> Sexplib0.Sexp.t
val jit_option_of_sexp : Sexplib0.Sexp.t -> jit_option
type func
type t
val load_data_ex : Nvrtc.compile_to_ptx_result -> jit_option list -> t

Currently, the image passed via this call is the PTX source. See cuModuleLoadDataEx.

The module is finalized using cuModuleUnload. The finalizer captures the context when load_data_ex is called to temporarily push it on the stack for unloading.

val get_function : t -> name:string -> func
val get_global : t -> name:string -> Deviceptr.t * Unsigned.size_t