fix for issue #87: ComfyU-MultiGPU not supporting all device types currently supported by Comfy Core.

pollockjj · pollockjj · commit 4d0d4a673f3a · 2025-08-30T07:39:26.000-05:00
Refactor device detection into dedicated utility module

- Extract device enumeration and compatibility checks to device_utils.py
- Add support for additional device types (NPU, MLU, DirectML, CoreX)
- Update all modules to use centralized device utilities
- Implement caching for device list to improve performance
- Reduce code duplication across distorch, nodes, and wanvideo modules
diff --git a/__init__.py b/__init__.py
@@ -6,6 +6,7 @@
 import folder_paths
 import comfy.model_management as mm
 from nodes import NODE_CLASS_MAPPINGS as GLOBAL_NODE_CLASS_MAPPINGS
+from .device_utils import get_device_list, is_accelerator_available
 
 # --- DisTorch V2 Logging Configuration ---
 # Set to "E" for Engineering (DEBUG) or "P" for Production (INFO)
@@ -29,31 +30,6 @@
 current_device = mm.get_torch_device()
 current_text_encoder_device = mm.text_encoder_device()
 
-def _has_xpu():
-    try:
-        return hasattr(torch, "xpu") and hasattr(torch.xpu, "is_available") and torch.xpu.is_available()
-    except Exception:
-        return False
-
-def get_device_list():
-    devs = ["cpu"]
-    try:
-        if hasattr(torch, "cuda") and hasattr(torch.cuda, "is_available") and torch.cuda.is_available():
-            devs += [f"cuda:{i}" for i in range(torch.cuda.device_count())]
-    except Exception:
-        pass
-    try:
-        if _has_xpu():
-            devs += [f"xpu:{i}" for i in range(torch.xpu.device_count())]
-    except Exception:
-        pass
-    try:
-        if torch.backends.mps.is_available():
-            devs += ["mps"]
-    except Exception:
-        pass
-    return devs
-
 def set_current_device(device):
     global current_device
     current_device = device
@@ -119,7 +95,7 @@ def override(self, *args, device=None, **kwargs):
 
 def get_torch_device_patched():
     device = None
-    if (not (torch.cuda.is_available() or _has_xpu()) or mm.cpu_state == mm.CPUState.CPU or "cpu" in str(current_device).lower()):
+    if (not is_accelerator_available() or mm.cpu_state == mm.CPUState.CPU or "cpu" in str(current_device).lower()):
         device = torch.device("cpu")
     else:
         devs = set(get_device_list())
@@ -129,7 +105,7 @@ def get_torch_device_patched():
 
 def text_encoder_device_patched():
     device = None
-    if (not (torch.cuda.is_available() or _has_xpu()) or mm.cpu_state == mm.CPUState.CPU or "cpu" in str(current_text_encoder_device).lower()):
+    if (not is_accelerator_available() or mm.cpu_state == mm.CPUState.CPU or "cpu" in str(current_text_encoder_device).lower()):
         device = torch.device("cpu")
     else:
         devs = set(get_device_list())
@@ -367,4 +343,4 @@ def register_and_count(module_names, node_map):
 logger.info(dash_line)
 
 
-logger.info(f"[MultiGPU] Registration complete. Final mappings: {', '.join(NODE_CLASS_MAPPINGS.keys())}")
+logger.info(f"[MultiGPU] Registration complete. Final mappings: {', '.join(NODE_CLASS_MAPPINGS.keys())}")
diff --git a/device_utils.py b/device_utils.py
@@ -0,0 +1,231 @@
+"""
+Device detection and management utilities for ComfyUI-MultiGPU.
+Single source of truth for all device enumeration and compatibility checks.
+Handles all device types supported by ComfyUI core.
+"""
+
+import torch
+import logging
+
+logger = logging.getLogger("MultiGPU")
+
+# Module-level cache for device list (populated once on first call)
+_DEVICE_LIST_CACHE = None
+
+def get_device_list():
+    """
+    Enumerate ALL physically available devices that can store torch tensors.
+    This includes all device types supported by ComfyUI core.
+    Results are cached after first call since devices don't change during runtime.
+    
+    Returns a comprehensive list of all available devices across all types:
+    - CPU (always available)
+    - CUDA devices (NVIDIA GPUs)
+    - XPU devices (Intel GPUs)
+    - NPU devices (Ascend NPUs from Huawei)
+    - MLU devices (Cambricon MLUs)
+    - MPS device (Apple Metal)
+    - DirectML devices (Windows DirectML)
+    - CoreX/IXUCA devices
+    """
+    global _DEVICE_LIST_CACHE
+    
+    # Return cached result if already populated
+    if _DEVICE_LIST_CACHE is not None:
+        return _DEVICE_LIST_CACHE
+    
+    # First time - do the actual detection
+    devs = []
+    
+    # CPU is always physically present and can store tensors
+    devs.append("cpu")
+    
+    # CUDA devices (NVIDIA GPUs)
+    try:
+        if hasattr(torch, "cuda") and hasattr(torch.cuda, "is_available") and torch.cuda.is_available():
+            device_count = torch.cuda.device_count()
+            devs += [f"cuda:{i}" for i in range(device_count)]
+            logger.debug(f"[MultiGPU] Found {device_count} CUDA device(s)")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] CUDA detection failed: {e}")
+    
+    # XPU devices (Intel GPUs)
+    try:
+        # Try to import intel extension first (may be required for XPU support)
+        import intel_extension_for_pytorch as ipex
+    except ImportError:
+        pass
+    try:
+        if hasattr(torch, "xpu") and hasattr(torch.xpu, "is_available") and torch.xpu.is_available():
+            device_count = torch.xpu.device_count()
+            devs += [f"xpu:{i}" for i in range(device_count)]
+            logger.debug(f"[MultiGPU] Found {device_count} XPU device(s)")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] XPU detection failed: {e}")
+    
+    # NPU devices (Ascend NPUs from Huawei)
+    try:
+        import torch_npu
+        if hasattr(torch, "npu") and hasattr(torch.npu, "is_available") and torch.npu.is_available():
+            device_count = torch.npu.device_count()
+            devs += [f"npu:{i}" for i in range(device_count)]
+            logger.debug(f"[MultiGPU] Found {device_count} NPU device(s)")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] NPU detection failed: {e}")
+    
+    # MLU devices (Cambricon MLUs)
+    try:
+        import torch_mlu
+        if hasattr(torch, "mlu") and hasattr(torch.mlu, "is_available") and torch.mlu.is_available():
+            device_count = torch.mlu.device_count()
+            devs += [f"mlu:{i}" for i in range(device_count)]
+            logger.debug(f"[MultiGPU] Found {device_count} MLU device(s)")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] MLU detection failed: {e}")
+    
+    # MPS device (Apple Metal - single device only)
+    try:
+        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            devs.append("mps")
+            logger.debug("[MultiGPU] Found MPS device")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] MPS detection failed: {e}")
+    
+    # DirectML devices (Windows DirectML for AMD/Intel/NVIDIA)
+    try:
+        import torch_directml
+        adapter_count = torch_directml.device_count()
+        if adapter_count > 0:
+            devs += [f"directml:{i}" for i in range(adapter_count)]
+            logger.debug(f"[MultiGPU] Found {adapter_count} DirectML adapter(s)")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] DirectML detection failed: {e}")
+    
+    # IXUCA/CoreX devices (special accelerator)
+    try:
+        if hasattr(torch, "corex"):
+            # CoreX typically exposes single device, but check if there's a count method
+            if hasattr(torch.corex, "device_count"):
+                device_count = torch.corex.device_count()
+                devs += [f"corex:{i}" for i in range(device_count)]
+                logger.debug(f"[MultiGPU] Found {device_count} CoreX device(s)")
+            else:
+                devs.append("corex:0")
+                logger.debug("[MultiGPU] Found CoreX device")
+    except Exception as e:
+        logger.debug(f"[MultiGPU] CoreX detection failed: {e}")
+    
+    # Cache the result for future calls
+    _DEVICE_LIST_CACHE = devs
+    
+    # Log only once when initially populated
+    logger.info(f"[MultiGPU] Device list initialized: {devs}")
+    
+    return devs
+
+
+def is_accelerator_available():
+    """
+    Check if any accelerator device is available.
+    Used by patched functions to determine CPU fallback.
+    
+    Returns True if any GPU/accelerator is available, False otherwise.
+    """
+    # Check CUDA
+    try:
+        if torch.cuda.is_available():
+            return True
+    except:
+        pass
+    
+    # Check XPU (Intel GPU)
+    try:
+        if hasattr(torch, "xpu") and torch.xpu.is_available():
+            return True
+    except:
+        pass
+    
+    # Check NPU (Ascend)
+    try:
+        import torch_npu
+        if hasattr(torch, "npu") and torch.npu.is_available():
+            return True
+    except:
+        pass
+    
+    # Check MLU (Cambricon)
+    try:
+        import torch_mlu
+        if hasattr(torch, "mlu") and torch.mlu.is_available():
+            return True
+    except:
+        pass
+    
+    # Check MPS (Apple Metal)
+    try:
+        if hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            return True
+    except:
+        pass
+    
+    # Check DirectML
+    try:
+        import torch_directml
+        if torch_directml.device_count() > 0:
+            return True
+    except:
+        pass
+    
+    # Check CoreX/IXUCA
+    try:
+        if hasattr(torch, "corex"):
+            return True
+    except:
+        pass
+    
+    return False
+
+
+def is_device_compatible(device_string):
+    """
+    Check if a device string represents a valid, available device.
+    
+    Args:
+        device_string: Device identifier like "cuda:0", "cpu", "xpu:1", etc.
+    
+    Returns:
+        True if the device is available, False otherwise.
+    """
+    available_devices = get_device_list()
+    return device_string in available_devices
+
+
+def get_device_type(device_string):
+    """
+    Extract the device type from a device string.
+    
+    Args:
+        device_string: Device identifier like "cuda:0", "cpu", "xpu:1", etc.
+    
+    Returns:
+        Device type string (e.g., "cuda", "cpu", "xpu", "npu", "mlu", "mps", "directml", "corex")
+    """
+    if ":" in device_string:
+        return device_string.split(":")[0]
+    return device_string
+
+
+def parse_device_string(device_string):
+    """
+    Parse a device string into type and index.
+    
+    Args:
+        device_string: Device identifier like "cuda:0", "cpu", "xpu:1", etc.
+    
+    Returns:
+        Tuple of (device_type, device_index) where index is None for non-indexed devices
+    """
+    if ":" in device_string:
+        parts = device_string.split(":")
+        return parts[0], int(parts[1])
+    return device_string, None
diff --git a/distorch.py b/distorch.py
@@ -12,6 +12,7 @@
 import copy
 from collections import defaultdict
 import comfy.model_management as mm
+from .device_utils import get_device_list
 
 # Global store for model allocations
 model_allocation_store = {}
@@ -292,7 +293,6 @@ def calculate_vvram_allocation_string(model, virtual_vram_str):
 
 def override_class_with_distorch_gguf(cls):
     """Legacy DisTorch wrapper for GGUF models for backward compatibility."""
-    from .nodes import get_device_list
     from . import current_device
     
     class NodeOverrideDisTorchGGUFLegacy(cls):
@@ -330,7 +330,7 @@ def override(self, *args, device=None, expert_mode_allocations=None, use_other_v
             vram_string = ""
             if virtual_vram_gb > 0:
                 if use_other_vram:
-                    available_devices = [d for d in get_device_list() if d.startswith(("cuda", "xpu"))]
+                    available_devices = [d for d in get_device_list() if d != "cpu"]
                     other_devices = [d for d in available_devices if d != device]
                     other_devices.sort(key=lambda x: int(x.split(':')[1] if ':' in x else x[-1]), reverse=False)
                     device_string = ','.join(other_devices + ['cpu'])
@@ -354,7 +354,6 @@ def override(self, *args, device=None, expert_mode_allocations=None, use_other_v
 
 def override_class_with_distorch_gguf_v2(cls):
     """DisTorch 2.0 wrapper for GGUF models."""
-    from .nodes import get_device_list
     from . import current_device
     
     class NodeOverrideDisTorchGGUFv2(cls):
@@ -406,7 +405,6 @@ def override(self, *args, compute_device=None, virtual_vram_gb=4.0,
 
 def override_class_with_distorch_clip(cls):
     """DisTorch wrapper for CLIP models with GGUF support"""
-    from .nodes import get_device_list
     from . import current_text_encoder_device
     
     class NodeOverrideDisTorch(cls):
@@ -441,7 +439,7 @@ def override(self, *args, device=None, expert_mode_allocations=None, use_other_v
             vram_string = ""
             if virtual_vram_gb > 0:
                 if use_other_vram:
-                    available_devices = [d for d in get_device_list() if d.startswith(("cuda", "xpu"))]
+                    available_devices = [d for d in get_device_list() if d != "cpu"]
                     other_devices = [d for d in available_devices if d != device]
                     other_devices.sort(key=lambda x: int(x.split(':')[1] if ':' in x else x[-1]), reverse=False)
                     device_string = ','.join(other_devices + ['cpu'])
diff --git a/distorch_2.py b/distorch_2.py
@@ -16,7 +16,7 @@
 import comfy.model_management as mm
 import comfy.model_patcher
 from . import current_device
-from .nodes import get_device_list
+from .device_utils import get_device_list
 
 safetensor_allocation_store = {}
 safetensor_settings_store = {}
@@ -549,7 +549,6 @@ def calculate_safetensor_vvram_allocation(model_patcher, virtual_vram_str):
 
 def override_class_with_distorch_safetensor_v2(cls):
     """DisTorch 2.0 wrapper for safetensor models"""
-    from .nodes import get_device_list
     from . import current_device
     
     class NodeOverrideDisTorchSafetensorV2(cls):
diff --git a/nodes.py b/nodes.py
@@ -2,26 +2,7 @@
 import folder_paths
 from pathlib import Path
 from nodes import NODE_CLASS_MAPPINGS
-
-def _has_xpu():
-    try:
-        return hasattr(torch, "xpu") and hasattr(torch.xpu, "is_available") and torch.xpu.is_available()
-    except Exception:
-        return False
-
-def get_device_list():
-    devs = ["cpu"]
-    try:
-        if hasattr(torch, "cuda") and hasattr(torch.cuda, "is_available") and torch.cuda.is_available():
-            devs += [f"cuda:{i}" for i in range(torch.cuda.device_count())]
-    except Exception:
-        pass
-    try:
-        if _has_xpu():
-            devs += [f"xpu:{i}" for i in range(torch.xpu.device_count())]
-    except Exception:
-        pass
-    return devs
+from .device_utils import get_device_list
 
 class DeviceSelectorMultiGPU:
     @classmethod
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/wanvideo.py b/wanvideo.py