Module Cudajit.Device

Managing a CUDA GPU device and its primary context. See: Device Management and Primary Context Management.

type t
include Sexplib0.Sexpable.S with type t := t
val t_of_sexp : Sexplib0__.Sexp.t -> t
val sexp_of_t : t -> Sexplib0__.Sexp.t
val get_count : unit -> int

Returns the number of Nvidia devices. See cuDeviceGetCount.

val get : ordinal:int -> t

Returns the given device. See cuDeviceGet.

val primary_ctx_reset : t -> unit

Destroys all allocations and resets all state on the primary context. See cuDevicePrimaryCtxReset.

val get_free_and_total_mem : unit -> int * int

Gets the free memory on the device of the current context according to the OS, and the total memory on the device. See: cuMemGetInfo.

type p2p_attribute =
  1. | PERFORMANCE_RANK of int
  2. | ACCESS_SUPPORTED of bool
  3. | NATIVE_ATOMIC_SUPPORTED of bool
  4. | CUDA_ARRAY_ACCESS_SUPPORTED of bool
val sexp_of_p2p_attribute : p2p_attribute -> Sexplib0.Sexp.t
val p2p_attribute_of_sexp : Sexplib0.Sexp.t -> p2p_attribute
val get_p2p_attributes : dst:t -> src:t -> p2p_attribute list
val can_access_peer : dst:t -> src:t -> bool
type computemode =
  1. | DEFAULT
    (*

    Multiple contexts allowed per device.

    *)
  2. | PROHIBITED
    (*

    No contexts can be created on this device at this time.

    *)
  3. | EXCLUSIVE_PROCESS
    (*

    Only one context used by a single process can be present on this device at a time.

    *)
val sexp_of_computemode : computemode -> Sexplib0.Sexp.t
val computemode_of_sexp : Sexplib0.Sexp.t -> computemode
type flush_GPU_direct_RDMA_writes_options =
  1. | HOST
  2. | MEMOPS
val sexp_of_flush_GPU_direct_RDMA_writes_options : flush_GPU_direct_RDMA_writes_options -> Sexplib0.Sexp.t
val flush_GPU_direct_RDMA_writes_options_of_sexp : Sexplib0.Sexp.t -> flush_GPU_direct_RDMA_writes_options
type mem_allocation_handle_type =
  1. | NONE
  2. | POSIX_FILE_DESCRIPTOR
  3. | WIN32
  4. | WIN32_KMT
  5. | FABRIC
val sexp_of_mem_allocation_handle_type : mem_allocation_handle_type -> Sexplib0.Sexp.t
val mem_allocation_handle_type_of_sexp : Sexplib0.Sexp.t -> mem_allocation_handle_type
type attributes = {
  1. name : string;
  2. max_threads_per_block : int;
  3. max_block_dim_x : int;
  4. max_block_dim_y : int;
  5. max_block_dim_z : int;
  6. max_grid_dim_x : int;
  7. max_grid_dim_y : int;
  8. max_grid_dim_z : int;
  9. max_shared_memory_per_block : int;
    (*

    In bytes.

    *)
  10. total_constant_memory : int;
    (*

    In bytes.

    *)
  11. warp_size : int;
    (*

    In threads.

    *)
  12. max_pitch : int;
    (*

    In bytes.

    *)
  13. max_registers_per_block : int;
    (*

    32-bit registers.

    *)
  14. clock_rate : int;
    (*

    In kilohertz.

    *)
  15. texture_alignment : int;
  16. multiprocessor_count : int;
  17. kernel_exec_timeout : bool;
  18. integrated : bool;
  19. can_map_host_memory : bool;
  20. compute_mode : computemode;
  21. maximum_texture1d_width : int;
  22. maximum_texture2d_width : int;
  23. maximum_texture2d_height : int;
  24. maximum_texture3d_width : int;
  25. maximum_texture3d_height : int;
  26. maximum_texture3d_depth : int;
  27. maximum_texture2d_layered_width : int;
  28. maximum_texture2d_layered_height : int;
  29. maximum_texture2d_layered_layers : int;
  30. surface_alignment : int;
  31. concurrent_kernels : bool;
    (*

    Whether the device supports executing multiple kernels within the same context simultaneously.

    *)
  32. ecc_enabled : bool;
    (*

    Whether error correction is supported and enabled on the device.

    *)
  33. pci_bus_id : int;
  34. pci_device_id : int;
    (*

    PCI device (also known as slot) identifier of the device.

    *)
  35. tcc_driver : bool;
  36. memory_clock_rate : int;
    (*

    In kilohertz.

    *)
  37. global_memory_bus_width : int;
    (*

    In bits.

    *)
  38. l2_cache_size : int;
    (*

    In bytes.

    *)
  39. max_threads_per_multiprocessor : int;
  40. async_engine_count : int;
  41. unified_addressing : bool;
  42. maximum_texture1d_layered_width : int;
  43. maximum_texture1d_layered_layers : int;
  44. maximum_texture2d_gather_width : int;
  45. maximum_texture2d_gather_height : int;
  46. maximum_texture3d_width_alternate : int;
  47. maximum_texture3d_height_alternate : int;
  48. maximum_texture3d_depth_alternate : int;
  49. pci_domain_id : int;
  50. texture_pitch_alignment : int;
  51. maximum_texturecubemap_width : int;
  52. maximum_texturecubemap_layered_width : int;
  53. maximum_texturecubemap_layered_layers : int;
  54. maximum_surface1d_width : int;
  55. maximum_surface2d_width : int;
  56. maximum_surface2d_height : int;
  57. maximum_surface3d_width : int;
  58. maximum_surface3d_height : int;
  59. maximum_surface3d_depth : int;
  60. maximum_surface1d_layered_width : int;
  61. maximum_surface1d_layered_layers : int;
  62. maximum_surface2d_layered_width : int;
  63. maximum_surface2d_layered_height : int;
  64. maximum_surface2d_layered_layers : int;
  65. maximum_surfacecubemap_width : int;
  66. maximum_surfacecubemap_layered_width : int;
  67. maximum_surfacecubemap_layered_layers : int;
  68. maximum_texture2d_linear_width : int;
  69. maximum_texture2d_linear_height : int;
  70. maximum_texture2d_linear_pitch : int;
    (*

    In bytes.

    *)
  71. maximum_texture2d_mipmapped_width : int;
  72. maximum_texture2d_mipmapped_height : int;
  73. compute_capability_major : int;
  74. compute_capability_minor : int;
  75. maximum_texture1d_mipmapped_width : int;
  76. stream_priorities_supported : bool;
  77. global_l1_cache_supported : bool;
  78. local_l1_cache_supported : bool;
  79. max_shared_memory_per_multiprocessor : int;
    (*

    In bytes.

    *)
  80. max_registers_per_multiprocessor : int;
    (*

    32-bit registers.

    *)
  81. managed_memory : bool;
  82. multi_gpu_board : bool;
  83. multi_gpu_board_group_id : int;
  84. host_native_atomic_supported : bool;
  85. single_to_double_precision_perf_ratio : int;
  86. pageable_memory_access : bool;
    (*

    Device supports coherently accessing pageable memory without calling cudaHostRegister.

    *)
  87. concurrent_managed_access : bool;
  88. compute_preemption_supported : bool;
  89. can_use_host_pointer_for_registered_mem : bool;
  90. cooperative_launch : bool;
  91. max_shared_memory_per_block_optin : int;
  92. can_flush_remote_writes : bool;
  93. host_register_supported : bool;
  94. pageable_memory_access_uses_host_page_tables : bool;
  95. direct_managed_mem_access_from_host : bool;
  96. virtual_memory_management_supported : bool;
  97. handle_type_posix_file_descriptor_supported : bool;
  98. handle_type_win32_handle_supported : bool;
  99. handle_type_win32_kmt_handle_supported : bool;
  100. max_blocks_per_multiprocessor : int;
  101. generic_compression_supported : bool;
  102. max_persisting_l2_cache_size : int;
    (*

    In bytes.

    *)
  103. max_access_policy_window_size : int;
    (*

    For CUaccessPolicyWindow::num_bytes.

    *)
  104. gpu_direct_rdma_with_cuda_vmm_supported : bool;
  105. reserved_shared_memory_per_block : int;
    (*

    In bytes.

    *)
  106. sparse_cuda_array_supported : bool;
  107. read_only_host_register_supported : bool;
  108. timeline_semaphore_interop_supported : bool;
  109. memory_pools_supported : bool;
  110. gpu_direct_rdma_supported : bool;
  111. gpu_direct_rdma_flush_writes_options : flush_GPU_direct_RDMA_writes_options list;
  112. gpu_direct_rdma_writes_ordering : bool;
  113. mempool_supported_handle_types : mem_allocation_handle_type list;
    (*

    Handle types supported with mempool based IPC.

    *)
  114. cluster_launch : bool;
  115. deferred_mapping_cuda_array_supported : bool;
  116. can_use_64_bit_stream_mem_ops : bool;
  117. can_use_stream_wait_value_nor : bool;
  118. dma_buf_supported : bool;
  119. ipc_event_supported : bool;
  120. mem_sync_domain_count : int;
    (*

    Number of memory domains the device supports.

    *)
  121. tensor_map_access_supported : bool;
  122. unified_function_pointers : bool;
  123. multicast_supported : bool;
    (*

    Device supports switch multicast and reduction operations.

    *)
}
val sexp_of_attributes : attributes -> Sexplib0.Sexp.t
val attributes_of_sexp : Sexplib0.Sexp.t -> attributes
val get_attributes : t -> attributes

Populates all the device attributes. See cuDeviceGetAttribute.