Add no-device variants for multi-GPU CLIP loaders

pollockjj · pollockjj · commit afafc8042d32 · 2025-09-12T22:39:50.000-05:00
diff --git a/__init__.py b/__init__.py
@@ -92,6 +92,31 @@ def override(self, *args, device=None, **kwargs):
 
     return NodeOverride
 
+def override_class_clip_no_device(cls):
+    class NodeOverride(cls):
+        @classmethod
+        def INPUT_TYPES(s):
+            inputs = copy.deepcopy(cls.INPUT_TYPES())
+            devices = get_device_list()
+            default_device = devices[1] if len(devices) > 1 else devices[0]
+            inputs["optional"] = inputs.get("optional", {})
+            inputs["optional"]["device"] = (devices, {"default": default_device})
+            return inputs
+
+        CATEGORY = "multigpu"
+        FUNCTION = "override"
+
+        def override(self, *args, device=None, **kwargs):
+            if device is not None:
+                set_current_text_encoder_device(device)
+            fn = getattr(super(), cls.FUNCTION)
+            out = fn(*args, **kwargs)
+            
+            return out
+
+    return NodeOverride
+
+
 def get_torch_device_patched():
     device = None
     if (not is_accelerator_available() or mm.cpu_state == mm.CPUState.CPU or "cpu" in str(current_device).lower()):
@@ -183,6 +208,7 @@ def check_module_exists(module_path):
     override_class_with_distorch_gguf,
     override_class_with_distorch_gguf_v2,
     override_class_with_distorch_clip,
+    override_class_with_distorch_clip_no_device,
     override_class_with_distorch
 )
 
@@ -194,7 +220,8 @@ def check_module_exists(module_path):
     analyze_safetensor_loading,
     calculate_safetensor_vvram_allocation,
     override_class_with_distorch_safetensor_v2,
-    override_class_with_distorch_safetensor_v2_clip
+    override_class_with_distorch_safetensor_v2_clip,
+    override_class_with_distorch_safetensor_v2_clip_no_device
 )
 
 # Import advanced checkpoint loaders
@@ -217,10 +244,10 @@ def check_module_exists(module_path):
 NODE_CLASS_MAPPINGS["CLIPLoaderMultiGPU"] = override_class_clip(GLOBAL_NODE_CLASS_MAPPINGS["CLIPLoader"])
 NODE_CLASS_MAPPINGS["DualCLIPLoaderMultiGPU"] = override_class_clip(GLOBAL_NODE_CLASS_MAPPINGS["DualCLIPLoader"])
 if "TripleCLIPLoader" in GLOBAL_NODE_CLASS_MAPPINGS:
-    NODE_CLASS_MAPPINGS["TripleCLIPLoaderMultiGPU"] = override_class_clip(GLOBAL_NODE_CLASS_MAPPINGS["TripleCLIPLoader"])
+    NODE_CLASS_MAPPINGS["TripleCLIPLoaderMultiGPU"] = override_class_clip_no_device(GLOBAL_NODE_CLASS_MAPPINGS["TripleCLIPLoader"])
 if "QuadrupleCLIPLoader" in GLOBAL_NODE_CLASS_MAPPINGS:
-    NODE_CLASS_MAPPINGS["QuadrupleCLIPLoaderMultiGPU"] = override_class_clip(GLOBAL_NODE_CLASS_MAPPINGS["QuadrupleCLIPLoader"])
-NODE_CLASS_MAPPINGS["CLIPVisionLoaderMultiGPU"] = override_class_clip(GLOBAL_NODE_CLASS_MAPPINGS["CLIPVisionLoader"])
+    NODE_CLASS_MAPPINGS["QuadrupleCLIPLoaderMultiGPU"] = override_class_clip_no_device(GLOBAL_NODE_CLASS_MAPPINGS["QuadrupleCLIPLoader"])
+NODE_CLASS_MAPPINGS["CLIPVisionLoaderMultiGPU"] = override_class_clip_no_device(GLOBAL_NODE_CLASS_MAPPINGS["CLIPVisionLoader"])
 NODE_CLASS_MAPPINGS["CheckpointLoaderSimpleMultiGPU"] = override_class(GLOBAL_NODE_CLASS_MAPPINGS["CheckpointLoaderSimple"])
 NODE_CLASS_MAPPINGS["ControlNetLoaderMultiGPU"] = override_class(GLOBAL_NODE_CLASS_MAPPINGS["ControlNetLoader"])
 if "DiffusersLoader" in GLOBAL_NODE_CLASS_MAPPINGS:
@@ -234,10 +261,10 @@ def check_module_exists(module_path):
 NODE_CLASS_MAPPINGS["CLIPLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip(GLOBAL_NODE_CLASS_MAPPINGS["CLIPLoader"])
 NODE_CLASS_MAPPINGS["DualCLIPLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip(GLOBAL_NODE_CLASS_MAPPINGS["DualCLIPLoader"])
 if "TripleCLIPLoader" in GLOBAL_NODE_CLASS_MAPPINGS:
-    NODE_CLASS_MAPPINGS["TripleCLIPLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip(GLOBAL_NODE_CLASS_MAPPINGS["TripleCLIPLoader"])
+    NODE_CLASS_MAPPINGS["TripleCLIPLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip_no_device(GLOBAL_NODE_CLASS_MAPPINGS["TripleCLIPLoader"])
 if "QuadrupleCLIPLoader" in GLOBAL_NODE_CLASS_MAPPINGS:
-    NODE_CLASS_MAPPINGS["QuadrupleCLIPLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip(GLOBAL_NODE_CLASS_MAPPINGS["QuadrupleCLIPLoader"])
-NODE_CLASS_MAPPINGS["CLIPVisionLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip(GLOBAL_NODE_CLASS_MAPPINGS["CLIPVisionLoader"])
+    NODE_CLASS_MAPPINGS["QuadrupleCLIPLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip_no_device(GLOBAL_NODE_CLASS_MAPPINGS["QuadrupleCLIPLoader"])
+NODE_CLASS_MAPPINGS["CLIPVisionLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2_clip_no_device(GLOBAL_NODE_CLASS_MAPPINGS["CLIPVisionLoader"])
 NODE_CLASS_MAPPINGS["CheckpointLoaderSimpleDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2(GLOBAL_NODE_CLASS_MAPPINGS["CheckpointLoaderSimple"])
 NODE_CLASS_MAPPINGS["ControlNetLoaderDisTorch2MultiGPU"] = override_class_with_distorch_safetensor_v2(GLOBAL_NODE_CLASS_MAPPINGS["ControlNetLoader"])
 if "DiffusersLoader" in GLOBAL_NODE_CLASS_MAPPINGS:
@@ -305,20 +332,20 @@ def register_and_count(module_names, node_map):
     "UnetLoaderGGUFAdvancedDisTorchMultiGPU": override_class_with_distorch_gguf(UnetLoaderGGUFAdvanced),
     "CLIPLoaderGGUFDisTorchMultiGPU": override_class_with_distorch_clip(CLIPLoaderGGUF),
     "DualCLIPLoaderGGUFDisTorchMultiGPU": override_class_with_distorch_clip(DualCLIPLoaderGGUF),
-    "TripleCLIPLoaderGGUFDisTorchMultiGPU": override_class_with_distorch_clip(TripleCLIPLoaderGGUF),
-    "QuadrupleCLIPLoaderGGUFDisTorchMultiGPU": override_class_with_distorch_clip(QuadrupleCLIPLoaderGGUF),
+    "TripleCLIPLoaderGGUFDisTorchMultiGPU": override_class_with_distorch_clip_no_device(TripleCLIPLoaderGGUF),
+    "QuadrupleCLIPLoaderGGUFDisTorchMultiGPU": override_class_with_distorch_clip_no_device(QuadrupleCLIPLoaderGGUF),
     "UnetLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2(UnetLoaderGGUF),
     "UnetLoaderGGUFAdvancedDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2(UnetLoaderGGUFAdvanced),
     "CLIPLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2_clip(CLIPLoaderGGUF),
     "DualCLIPLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2_clip(DualCLIPLoaderGGUF),
-    "TripleCLIPLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2_clip(TripleCLIPLoaderGGUF),
-    "QuadrupleCLIPLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2_clip(QuadrupleCLIPLoaderGGUF),
+    "TripleCLIPLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2_clip_no_device(TripleCLIPLoaderGGUF),
+    "QuadrupleCLIPLoaderGGUFDisTorch2MultiGPU": override_class_with_distorch_safetensor_v2_clip_no_device(QuadrupleCLIPLoaderGGUF),
     "UnetLoaderGGUFMultiGPU": override_class(UnetLoaderGGUF),
     "UnetLoaderGGUFAdvancedMultiGPU": override_class(UnetLoaderGGUFAdvanced),
     "CLIPLoaderGGUFMultiGPU": override_class_clip(CLIPLoaderGGUF),
     "DualCLIPLoaderGGUFMultiGPU": override_class_clip(DualCLIPLoaderGGUF),
-    "TripleCLIPLoaderGGUFMultiGPU": override_class_clip(TripleCLIPLoaderGGUF),
-    "QuadrupleCLIPLoaderGGUFMultiGPU": override_class_clip(QuadrupleCLIPLoaderGGUF)
+    "TripleCLIPLoaderGGUFMultiGPU": override_class_clip_no_device(TripleCLIPLoaderGGUF),
+    "QuadrupleCLIPLoaderGGUFMultiGPU": override_class_clip_no_device(QuadrupleCLIPLoaderGGUF)
 }
 register_and_count(["ComfyUI-GGUF", "comfyui-gguf"], gguf_nodes)
 
diff --git a/distorch.py b/distorch.py
@@ -462,7 +462,64 @@ def override(self, *args, device=None, expert_mode_allocations=None, use_other_v
             return out
 
     return NodeOverrideDisTorch
+def override_class_with_distorch_clip_no_device(cls):
+    """DisTorch wrapper for CLIP models with GGUF support"""
+    from . import current_text_encoder_device
+    
+    class NodeOverrideDisTorchClipNoDevice(cls):
+        @classmethod
+        def INPUT_TYPES(s):
+            inputs = copy.deepcopy(cls.INPUT_TYPES())
+            devices = get_device_list()
+            default_device = devices[1] if len(devices) > 1 else devices[0]
+            inputs["optional"] = inputs.get("optional", {})
+            inputs["optional"]["device"] = (devices, {"default": default_device})
+            inputs["optional"]["virtual_vram_gb"] = ("FLOAT", {"default": 4.0, "min": 0.0, "max": 24.0, "step": 0.1})
+            inputs["optional"]["use_other_vram"] = ("BOOLEAN", {"default": False})
+            inputs["optional"]["expert_mode_allocations"] = ("STRING", {
+                "multiline": False, 
+                "default": "",
+                "tooltip": "Expert use only: Manual VRAM allocation string. Incorrect values can cause crashes. Do not modify unless you fully understand DisTorch memory management."
+            })
+            return inputs
+
+        CATEGORY = "multigpu"
+        FUNCTION = "override"
+
+        def override(self, *args, device=None, expert_mode_allocations=None, use_other_vram=None, virtual_vram_gb=0.0, **kwargs):
+            from . import set_current_text_encoder_device
+            if device is not None:
+                set_current_text_encoder_device(device)
+            
+            register_patched_ggufmodelpatcher()
+            fn = getattr(super(), cls.FUNCTION)
+            out = fn(*args, **kwargs)
+
+            vram_string = ""
+            if virtual_vram_gb > 0:
+                if use_other_vram:
+                    available_devices = [d for d in get_device_list() if d != "cpu"]
+                    other_devices = [d for d in available_devices if d != device]
+                    other_devices.sort(key=lambda x: int(x.split(':')[1] if ':' in x else x[-1]), reverse=False)
+                    device_string = ','.join(other_devices + ['cpu'])
+                    vram_string = f"{device};{virtual_vram_gb};{device_string}"
+                else:
+                    vram_string = f"{device};{virtual_vram_gb};cpu"
+
+            full_allocation = f"{expert_mode_allocations}#{vram_string}" if expert_mode_allocations or vram_string else ""
+            
+            logging.info(f"[MultiGPU_DisTorch] Full allocation string: {full_allocation}")
+            
+            if hasattr(out[0], 'model'):
+                model_hash = create_model_hash(out[0], "override")
+                model_allocation_store[model_hash] = full_allocation
+            elif hasattr(out[0], 'patcher') and hasattr(out[0].patcher, 'model'):
+                model_hash = create_model_hash(out[0].patcher, "override")
+                model_allocation_store[model_hash] = full_allocation
+
+            return out
 
+    return NodeOverrideDisTorchClipNoDevice
 
 # Alias for backward compatibility
 override_class_with_distorch = override_class_with_distorch_gguf
diff --git a/distorch_2.py b/distorch_2.py
@@ -772,3 +772,99 @@ def override(self, *args, device=None, virtual_vram_gb=4.0,  # Changed from comp
             return out
 
     return NodeOverrideDisTorchSafetensorV2Clip
+
+def override_class_with_distorch_safetensor_v2_clip_no_device(cls):
+    """DisTorch 2.0 wrapper for safetensor CLIP models"""
+    from . import current_device
+
+    class NodeOverrideDisTorchSafetensorV2ClipNoDevice(cls):
+        @classmethod
+        def INPUT_TYPES(s):
+            inputs = copy.deepcopy(cls.INPUT_TYPES())
+            devices = get_device_list()
+            default_device = devices[1] if len(devices) > 1 else devices[0]
+
+            inputs["optional"] = inputs.get("optional", {})
+            inputs["optional"]["device"] = (devices, {"default": default_device})  # Changed from compute_device
+            inputs["optional"]["virtual_vram_gb"] = ("FLOAT", {"default": 4.0, "min": 0.0, "max": 128.0, "step": 0.1})
+            inputs["optional"]["donor_device"] = (devices, {"default": "cpu"})
+            inputs["optional"]["expert_mode_allocations"] = ("STRING", {"multiline": False, "default": ""})
+            inputs["optional"]["high_precision_loras"] = ("BOOLEAN", {"default": True})
+            return inputs
+
+        CATEGORY = "multigpu/distorch_2"
+        FUNCTION = "override"
+        TITLE = f"{cls.TITLE if hasattr(cls, 'TITLE') else cls.__name__} (DisTorch2)"
+
+        @classmethod
+        def IS_CHANGED(s, *args, device=None, virtual_vram_gb=4.0,  # Changed from compute_device
+                       donor_device="cpu", expert_mode_allocations="", high_precision_loras=True, **kwargs):
+            # Create a hash of our specific settings
+            settings_str = f"{device}{virtual_vram_gb}{donor_device}{expert_mode_allocations}{high_precision_loras}"  # Changed from compute_device
+            return hashlib.sha256(settings_str.encode()).hexdigest()
+
+        def override(self, *args, device=None, virtual_vram_gb=4.0,  # Changed from compute_device
+                     donor_device="cpu", expert_mode_allocations="", high_precision_loras=True, **kwargs):
+
+            from . import set_current_text_encoder_device  # Use text encoder device setter
+            if device is not None:
+                set_current_text_encoder_device(device)
+
+            # Register our patched ModelPatcher
+            register_patched_safetensor_modelpatcher()
+
+            # Call original function
+            fn = getattr(super(), cls.FUNCTION)
+
+            # --- Check if we need to unload the model due to settings change ---
+            # This logic is a bit redundant with IS_CHANGED, but provides clear logging
+            settings_str = f"{device}{virtual_vram_gb}{donor_device}{expert_mode_allocations}"  # Changed from compute_device
+            settings_hash = hashlib.sha256(settings_str.encode()).hexdigest()
+
+            # Temporarily load to get hash without applying our patch
+            temp_out = fn(*args, **kwargs)
+            model_to_check = None
+            if hasattr(temp_out[0], 'model'):
+                model_to_check = temp_out[0]
+            elif hasattr(temp_out[0], 'patcher') and hasattr(temp_out[0].patcher, 'model'):
+                model_to_check = temp_out[0].patcher
+
+            if model_to_check:
+                model_hash = create_safetensor_model_hash(model_to_check, "override_check")
+                last_settings_hash = safetensor_settings_store.get(model_hash)
+
+                if last_settings_hash != settings_hash:
+                    logger.info(f"[MultiGPU_DisTorch2] Settings changed for model {model_hash[:8]}. Previous settings hash: {last_settings_hash}, New settings hash: {settings_hash}. Forcing reload.")
+                else:
+                    logger.info(f"[MultiGPU_DisTorch2] Settings unchanged for model {model_hash[:8]}. Using cached model.")
+
+            out = fn(*args, **kwargs)
+
+            # Store high_precision_loras in the model for later retrieval
+            if hasattr(out[0], 'model'):
+                out[0].model._distorch_high_precision_loras = high_precision_loras
+            elif hasattr(out[0], 'patcher') and hasattr(out[0].patcher, 'model'):
+                out[0].patcher.model._distorch_high_precision_loras = high_precision_loras
+
+            vram_string = ""
+            if virtual_vram_gb > 0:
+                vram_string = f"{device};{virtual_vram_gb};{donor_device}"  # Changed from compute_device
+            elif expert_mode_allocations:  # Only include device if there's an expert string
+                vram_string = device  # Changed from compute_device
+
+            full_allocation = f"{expert_mode_allocations}#{vram_string}" if expert_mode_allocations or vram_string else ""
+
+            logger.info(f"[MultiGPU_DisTorch2] Full allocation string: {full_allocation}")
+
+            if hasattr(out[0], 'model'):
+                model_hash = create_safetensor_model_hash(out[0], "override")
+                safetensor_allocation_store[model_hash] = full_allocation
+                safetensor_settings_store[model_hash] = settings_hash
+            elif hasattr(out[0], 'patcher') and hasattr(out[0].patcher, 'model'):
+                model_hash = create_safetensor_model_hash(out[0].patcher, "override")
+                safetensor_allocation_store[model_hash] = full_allocation
+                safetensor_settings_store[model_hash] = settings_hash
+
+            return out
+
+    return NodeOverrideDisTorchSafetensorV2ClipNoDevice