Stream (cudajit.Cudajit.Stream)

type t

Stores a stream pointer and manages lifetimes of kernel launch arguments. See CUstream.

val sexp_of_t : t -> Sexplib0.Sexp.t

val mem_alloc : t -> size_in_bytes:int -> Deviceptr.t

The pointer is finalized using cuMemFreeAsync.

val mem_free : t -> Deviceptr.t -> unit

val memcpy_H_to_D_unsafe : 
  dst:Deviceptr.t ->
  src:unit Ctypes.ptr ->
  size_in_bytes:int ->
  t ->
  unit

See cuMemcpyHtoDAsync.

val memcpy_H_to_D : 
  ?host_offset:int ->
  ?length:int ->
  dst:Deviceptr.t ->
  src:('a, 'b, 'c) Stdlib.Bigarray.Genarray.t ->
  t ->
  unit

Copies the bigarray (or its interval) into the device memory asynchronously. host_offset and length are in numbers of elements. See memcpy_H_to_D_async_unsafe.

type kernel_param =

| Tensor of Deviceptr.t
| Int of int
(*
Passed as C int.
*)
| Size_t of Unsigned.size_t
| Single of float
(*
Passed as C float.
*)
| Double of float
(*
Passed as C double.
*)

Parameters to pass to a kernel.

val sexp_of_kernel_param : kernel_param -> Sexplib0.Sexp.t

val no_stream : t

The NULL stream which is the main synchronization stream of a device. Manages lifetimes of the corresponding kernel launch parameters.

val launch_kernel : 
  Module.func ->
  grid_dim_x:int ->
  ?grid_dim_y:int ->
  ?grid_dim_z:int ->
  block_dim_x:int ->
  ?block_dim_y:int ->
  ?block_dim_z:int ->
  shared_mem_bytes:int ->
  t ->
  kernel_param list ->
  unit

See cuLaunchKernel.

val memcpy_D_to_H_unsafe : 
  dst:unit Ctypes.ptr ->
  src:Deviceptr.t ->
  size_in_bytes:int ->
  t ->
  unit

See cuMemcpyDtoHAsync.

val memcpy_D_to_H : 
  ?host_offset:int ->
  ?length:int ->
  dst:('a, 'b, 'c) Stdlib.Bigarray.Genarray.t ->
  src:Deviceptr.t ->
  t ->
  unit

Copies from the device memory into the bigarray (or its interval) asynchronously. host_offset and length are in numbers of elements. See memcpy_D_to_H_unsafe and cuMemcpyDtoHAsync.

val memcpy_D_to_D : 
  ?kind:('a, 'b) Stdlib.Bigarray.kind ->
  ?length:int ->
  ?size_in_bytes:int ->
  dst:Deviceptr.t ->
  src:Deviceptr.t ->
  t ->
  unit

Copies between two memory positions on the same device asynchronously. The size to copy can optionally be provided in numbers of elements via kind and length. Provide either both kind and length, or just size_in_bytes. See cuMemcpyDtoDAsync.

val memcpy_peer : 
  ?kind:('a, 'b) Stdlib.Bigarray.kind ->
  ?length:int ->
  ?size_in_bytes:int ->
  dst:Deviceptr.t ->
  dst_ctx:Context.t ->
  src:Deviceptr.t ->
  src_ctx:Context.t ->
  t ->
  unit

Copies between memory positions on two different devices asynchronously. The size to copy can optionally be provided in numbers of elements via kind and length. Provide either both kind and length, or just size_in_bytes. See cuMemcpyPeerAsync.

type attach_mem =