NVIDIA · mdboom · May 5, 2026 · May 5, 2026 · May 5, 2026 · May 5, 2026
diff --git a/cuda_core/cuda/core/_memory/_device_memory_resource.pyx b/cuda_core/cuda/core/_memory/_device_memory_resource.pyx
@@ -25,7 +25,7 @@ import multiprocessing
 import platform  # no-cython-lint
 import uuid
 
-from ._peer_access_utils import plan_peer_access_update
+from cuda.core._memory._peer_access_utils import plan_peer_access_update
 from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
 
 __all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions']

diff --git a/cuda_core/cuda/core/system/__init__.py b/cuda_core/cuda/core/system/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -18,6 +18,8 @@
 ]
 
 
+from cuda.core.system import typing
+
 from ._system import *
 
 if CUDA_BINDINGS_NVML_IS_COMPATIBLE:

diff --git a/cuda_core/cuda/core/system/_clock.pxi b/cuda_core/cuda/core/system/_clock.pxi
@@ -3,41 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-class ClockId(StrEnum):
-    """
-    Clock Ids. These are used in combination with :class:`ClockType` to specify a single clock value.
-    """
-    CURRENT = "current"
-    CUSTOMER_BOOST_MAX = "customer_boost_max"
-    # APP_CLOCK_TARGET and APP_CLOCK_DEFAULT are deprecated so not included here
-
-
-ClockId.CURRENT.__doc__ = "Current actual clock value."
-ClockId.CUSTOMER_BOOST_MAX.__doc__ = "OEM-defined maximum clock rate"
-
-
 _CLOCK_ID_MAPPING = {
     ClockId.CURRENT: nvml.ClockId.CURRENT,
     ClockId.CUSTOMER_BOOST_MAX: nvml.ClockId.CUSTOMER_BOOST_MAX,
 }
 
 
-class ClocksEventReasons(StrEnum):
-    """
-    Reasons for a clocks event.  These are used in combination with :class:`ClockType` to specify the reason for a clocks event.
-    """
-    NONE = "none"
-    GPU_IDLE = "gpu_idle"
-    APPLICATIONS_CLOCKS_SETTING = "applications_clocks_setting"
-    SW_POWER_CAP = "sw_power_cap"
-    HW_SLOWDOWN = "hw_slowdown"
-    SYNC_BOOST = "sync_boost"
-    SW_THERMAL_SLOWDOWN = "sw_thermal_slowdown"
-    HW_THERMAL_SLOWDOWN = "hw_thermal_slowdown"
-    HW_POWER_BRAKE_SLOWDOWN = "hw_power_brake_slowdown"
-    DISPLAY_CLOCK_SETTING = "display_clock_setting"
-
-
 _CLOCKS_EVENT_REASONS_MAPPING = {
     nvml.ClocksEventReasons.EVENT_REASON_NONE: ClocksEventReasons.NONE,
     nvml.ClocksEventReasons.EVENT_REASON_GPU_IDLE: ClocksEventReasons.GPU_IDLE,
@@ -52,16 +23,6 @@ _CLOCKS_EVENT_REASONS_MAPPING = {
 }
 
 
-class ClockType(StrEnum):
-    """
-    Clock types. All speeds are in Mhz.
-    """
-    GRAPHICS = "graphics"
-    SM = "sm"
-    MEMORY = "memory"
-    VIDEO = "video"
-
-
 _CLOCK_TYPE_MAPPING = {
     ClockType.GRAPHICS: nvml.ClockType.CLOCK_GRAPHICS,
     ClockType.SM: nvml.ClockType.CLOCK_SM,

diff --git a/cuda_core/cuda/core/system/_cooler.pxi b/cuda_core/cuda/core/system/_cooler.pxi
@@ -3,46 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-class CoolerControl(StrEnum):
-    """
-    Cooler control type.
-    """
-    TOGGLE = "toggle"
-    VARIABLE = "variable"
-
-
-CoolerControl.TOGGLE.__doc__ = """
-This cooler can only be toggled either ON or OFF (e.g. a switch).
-"""
-CoolerControl.VARIABLE.__doc__ = """
-This cooler's level can be adjusted from some minimum to some maximum (e.g. a knob).
-"""
-
-
 _COOLER_CONTROL_MAPPING = {
     nvml.CoolerControl.THERMAL_COOLER_SIGNAL_TOGGLE: CoolerControl.TOGGLE,
     nvml.CoolerControl.THERMAL_COOLER_SIGNAL_VARIABLE: CoolerControl.VARIABLE,
 }
 
 
-class CoolerTarget(StrEnum):
-    """
-    Cooler target.
-    """
-    NONE = "none"
-    GPU = "gpu"
-    MEMORY = "memory"
-    POWER_SUPPLY = "power_supply"
-    # THERMAL_GPU_RELATED is a composite target, so it is omitted here and will
-    # get returned as 3 separate targets: GPU, MEMORY, and POWER_SUPPLY.
-
-
-CoolerTarget.NONE.__doc__ = "This cooler controls nothing."
-CoolerTarget.GPU.__doc__ = "This cooler can cool the GPU."
-CoolerTarget.MEMORY.__doc__ = "This cooler can cool the memory."
-CoolerTarget.POWER_SUPPLY.__doc__ = "This cooler can cool the power supply."
-
-
 _COOLER_TARGET_MAPPING = {
     nvml.CoolerTarget.THERMAL_NONE: CoolerTarget.NONE,
     nvml.CoolerTarget.THERMAL_GPU: CoolerTarget.GPU,

diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
@@ -5,22 +5,34 @@
 from libc.stdint cimport intptr_t, uint64_t
 from libc.math cimport ceil
 
-import sys
-if sys.version_info >= (3, 11):
-    from enum import StrEnum
-else:
-    from backports.strenum import StrEnum
 from multiprocessing import cpu_count
 from typing import Iterable
 import warnings
 
 from cuda.bindings import nvml
-try:
-    from cuda.bindings._internal._fast_enum import FastEnum
-except ImportError:
-    from enum import IntEnum as FastEnum
 
 from ._nvml_context cimport initialize
+from cuda.core.system.typing import (
+    AddressingMode,
+    AffinityScope,
+    DeviceArch,
+    ClockId,
+    ClocksEventReasons,
+    ClockType,
+    CoolerControl,
+    CoolerTarget,
+    DeviceArch,
+    EventType,
+    FanControlPolicy,
+    FieldId,
+    GpuP2PCapsIndex,
+    GpuP2PStatus,
+    GpuTopologyLevel,
+    InforomObject,
+    TemperatureThresholds,
+    ThermalController,
+    ThermalTarget,
+)
 
 
 cdef object _pstate_to_int(object pstate):
@@ -57,53 +69,12 @@ include "_temperature.pxi"
 include "_utilization.pxi"
 
 
-class AddressingMode(StrEnum):
-    """
-    Addressing mode of a device.
-
-    For Kepler™ or newer fully supported devices.
-    """
-    HMM = "hmm"
-    ATS = "ats"
-
-
-AddressingMode.HMM.__doc__ = """
-    System allocated memory (``malloc``, ``mmap``) is addressable from the device
-    (GPU), via software-based mirroring of the CPU's page tables, on the GPU.
-"""
-
-
-AddressingMode.ATS.__doc__ = """
-    System allocated memory (``malloc``, ``mmap``) is addressable from the device
-    (GPU), via Address Translation Services. This means that there is (effectively)
-    a single set of page tables, and the CPU and GPU both use them.
-"""
-
-
 _ADDRESSING_MODE_MAPPING = {
     nvml.DeviceAddressingModeType.DEVICE_ADDRESSING_MODE_HMM: AddressingMode.HMM,
     nvml.DeviceAddressingModeType.DEVICE_ADDRESSING_MODE_ATS: AddressingMode.ATS,
 }
 
 
-class AffinityScope(StrEnum):
-    """
-    Scope for affinity queries.
-    """
-    NODE = "node"
-    SOCKET = "socket"
-
-
-AffinityScope.NODE.__doc__ = """
-The NUMA node is the scope of the affinity query.  This is the default scope.
-"""
-
-
-AffinityScope.SOCKET.__doc__ = """
-The CPU socket is the scope of the affinity query.
-"""
-
-
 _AFFINITY_SCOPE_MAPPING = {
     AffinityScope.NODE: nvml.AffinityScope.NODE,
     AffinityScope.SOCKET: nvml.AffinityScope.SOCKET,
@@ -132,37 +103,6 @@ _BRAND_TYPE_MAPPING = {
 }
 
 
-# This uses FastEnum instead of StrEnum because the ordering of the values is
-# meaningful, e.g. Kepler "or later"
-class DeviceArch(FastEnum):
-    """
-    Device architecture.
-    """
-    KEPLER = int(nvml.DeviceArch.KEPLER)
-    MAXWELL = int(nvml.DeviceArch.MAXWELL)
-    PASCAL = int(nvml.DeviceArch.PASCAL)
-    VOLTA = int(nvml.DeviceArch.VOLTA)
-    TURING = int(nvml.DeviceArch.TURING)
-    AMPERE = int(nvml.DeviceArch.AMPERE)
-    ADA = int(nvml.DeviceArch.ADA)
-    HOPPER = int(nvml.DeviceArch.HOPPER)
-    BLACKWELL = int(nvml.DeviceArch.BLACKWELL)
-    UNKNOWN = int(nvml.DeviceArch.UNKNOWN)
-
-
-class GpuP2PCapsIndex(StrEnum):
-    """
-    GPU peer-to-peer capabilities index.
-    """
-    READ = "read"
-    WRITE = "write"
-    NVLINK = "nvlink"
-    ATOMICS = "atomics"
-    PCI = "pci"
-    PROP = "prop"
-    UNKNOWN = "unknown"
-
-
 _GPU_P2P_CAPS_INDEX_MAPPING = {
     GpuP2PCapsIndex.READ: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_READ,
     GpuP2PCapsIndex.WRITE: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_WRITE,
@@ -174,19 +114,6 @@ _GPU_P2P_CAPS_INDEX_MAPPING = {
 }
 
 
-class GpuP2PStatus(StrEnum):
-    """
-    GPU peer-to-peer status.
-    """
-    OK = "ok"
-    CHIPSET_NOT_SUPPORTED = "chipset not supported"
-    GPU_NOT_SUPPORTED = "GPU not supported"
-    IOH_TOPOLOGY_NOT_SUPPORTED = "IOH topology not supported"
-    DISABLED_BY_REGKEY = "disabled by regkey"
-    NOT_SUPPORTED = "not supported"
-    UNKNOWN = "unknown"
-
-
 _GPU_P2P_STATUS_MAPPING = {
     nvml.GpuP2PStatus.P2P_STATUS_OK: GpuP2PStatus.OK,
     nvml.GpuP2PStatus.P2P_STATUS_CHIPSET_NOT_SUPPORTED: GpuP2PStatus.CHIPSET_NOT_SUPPORTED,
@@ -198,18 +125,6 @@ _GPU_P2P_STATUS_MAPPING = {
 }
 
 
-class GpuTopologyLevel(StrEnum):
-    """
-    Represents level relationships within a system between two GPUs.
-    """
-    INTERNAL = "internal"
-    SINGLE = "single"
-    MULTIPLE = "multiple"
-    HOSTBRIDGE = "hostbridge"
-    NODE = "node"
-    SYSTEM = "system"
-
-
 _GPU_TOPOLOGY_LEVEL_MAPPING = {
     GpuTopologyLevel.INTERNAL: nvml.GpuTopologyLevel.TOPOLOGY_INTERNAL,
     GpuTopologyLevel.SINGLE: nvml.GpuTopologyLevel.TOPOLOGY_SINGLE,
@@ -1204,27 +1119,8 @@ def get_p2p_status(device1: Device, device2: Device, index: GpuP2PCapsIndex | st
 
 
 __all__ = [
-    "AddressingMode",
-    "AffinityScope",
-    "ClockId",
-    "ClocksEventReasons",
-    "ClockType",
-    "CoolerControl",
-    "CoolerTarget",
     "Device",
-    "DeviceArch",
-    "EventType",
-    "FanControlPolicy",
-    "FieldId",
     "get_p2p_status",
     "get_topology_common_ancestor",
-    "GpuP2PCapsIndex",
-    "GpuP2PStatus",
-    "GpuTopologyLevel",
-    "InforomObject",
     "NvlinkInfo",
-    "TemperatureThresholds",
-    "ThermalController",
-    "ThermalTarget",
-    "Utilization",
 ]
diff --git a/cuda_core/cuda/core/system/_event.pxi b/cuda_core/cuda/core/system/_event.pxi
@@ -3,36 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-class EventType(StrEnum):
-    """
-    Event types that can be waited on with :class:`DeviceEvents`.
-    """
-    NONE = "none"
-    SINGLE_BIT_ECC_ERROR = "single_bit_ecc_error"
-    DOUBLE_BIT_ECC_ERROR = "double_bit_ecc_error"
-    PSTATE = "pstate"
-    XID_CRITICAL_ERROR = "xid_critical_error"
-    CLOCK = "clock"
-    POWER_SOURCE_CHANGE = "power_source_change"
-    MIG_CONFIG_CHANGE = "mig_config_change"
-    SINGLE_BIT_ECC_ERROR_STORM = "single_bit_ecc_error_storm"
-    DRAM_RETIREMENT_EVENT = "dram_retirement_event"
-    DRAM_RETIREMENT_FAILURE = "dram_retirement_failure"
-    NON_FATAL_POISON_ERROR = "non_fatal_poison_error"
-    FATAL_POISON_ERROR = "fatal_poison_error"
-    GPU_UNAVAILABLE_ERROR = "gpu_unavailable_error"
-    GPU_RECOVERY_ACTION = "gpu_recovery_action"
-
-
-EventType.PSTATE.__doc__ = """
-Event about PState changes
-
-On Fermi™ architecture, PState changes are also an indicator that GPU is throttling down due to
-no work being executed on the GPU, power capping or thermal capping. In a typical situation,
-Fermi-based GPU should stay in P0 for the duration of the execution of the compute process.
-"""
-
-
 _EVENT_TYPE_MAPPING = {
     nvml.EventType.NONE: EventType.NONE,
     nvml.EventType.SINGLE_BIT_ECC_ERROR: EventType.SINGLE_BIT_ECC_ERROR,

diff --git a/cuda_core/cuda/core/system/_fan.pxi b/cuda_core/cuda/core/system/_fan.pxi
@@ -3,14 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-class FanControlPolicy(StrEnum):
-    """
-    Fan control policies.
-    """
-    TEMPERATURE_CONTROLLED = "temperature_controlled"
-    MANUAL = "manual"
-
-
 _FAN_CONTROL_POLICY_MAPPING = {
     nvml.FanControlPolicy.TEMPERATURE_CONTINUOUS_SW: FanControlPolicy.TEMPERATURE_CONTROLLED,
     nvml.FanControlPolicy.MANUAL: FanControlPolicy.MANUAL,

diff --git a/cuda_core/cuda/core/system/_field_values.pxi b/cuda_core/cuda/core/system/_field_values.pxi
@@ -3,9 +3,6 @@
 # SPDX-License-Identifier: Apache-2.0
 
 
-FieldId = nvml.FieldId
-
-
 cdef class FieldValue:
     """
     Represents the data from a single field value.