pollockjj
diff --git a/‎README.md‎
Lines changed: 6 additions & 6 deletions b/‎README.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎__init__.py‎
Lines changed: 12 additions & 4 deletions b/‎__init__.py‎
Lines changed: 12 additions & 4 deletions
diff --git a/‎distorch.py‎
Lines changed: 35 additions & 35 deletions b/‎distorch.py‎
Lines changed: 35 additions & 35 deletions
@@ -1,4 +1,4 @@
-# ComfyUI-MultiGPU: Tools to free up your primary GPU’s VRAM by using your CPU or additional GPUs, now with tighter integration into kijai's WanVideoWrapper[^1]
+# ComfyUI-MultiGPU v2.0.0: Universal `.safetensors` and GGUF Multi-GPU Distribution with DisTorch
 <p align="center">
   <img src="https://raw.githubusercontent.com/pollockjj/ComfyUI-MultiGPU/main/assets/distorch_average.png" width="600">
   <br>
@@ -18,10 +18,10 @@ ComfyUI-MultiGPU now includes a custom, tightly integrated implementation for Wa
 ## The Core of ComfyUI-MultiGPU:
 [^1]: This **enhances memory management,** not parallel processing. Workflow steps still execute sequentially, but with components (in full or in part) loaded across your specified devices. *Performance gains* come from avoiding repeated model loading/unloading when VRAM is constrained. *Capability gains* come from offloading as much of the model (VAE/CLIP/UNet) off of your main **compute** device as possible—allowing you to maximize latent space for actual computation.
 
-1. **DisTorch Virtual VRAM for UNet Loaders**: Move UNet layers off your compute GPU  
-   - Automatic distribution to RAM or other GPUs  
-   - One-number control of VRAM usage  
-   - Support for all GGUF models  
+1. **DisTorch Virtual VRAM for `.safetensors` and GGUF Models**: Move model layers off your compute GPU
+   - Automatic, memory-size based distribution to RAM or other GPUs
+   - One-number control of VRAM usage
+   - Universal support for `.safetensors` and GGUF models
 
 2. **CLIP Offloading**: Two solutions for LLM-based and standard CLIP models:  
    - **MultiGPU CLIP**: Full offload to CPU or secondary GPU  
@@ -83,7 +83,7 @@ With a 12GB GPU running an 8GB model:
 - Your GPU now has extra VRAM for larger batches, higher resolutions, or longer video
 
 ## 🚀 Compatibility
-Works with all GGUF-quantized ComfyUI/ComfyUI-GGUF-supported UNet/CLIP models.
+Works with all `.safetensors` and GGUF-quantized models.
 
 ⚙️ Expert users: For those of you who were here for the 1.0 release of DisTorch, manual allocation strings still available for advanced configurations. Each log will contain the allocation string for the run so it can be easily recreated and/or manipulated for more sophisticated setups.
 
 
@@ -9,12 +9,20 @@
 
 # --- DisTorch V2 Logging Configuration ---
 # Set to "E" for Engineering (DEBUG) or "P" for Production (INFO)
-LOG_LEVEL = "E"
+LOG_LEVEL = "P"
 
 # Configure logger
-log_level = logging.DEBUG if LOG_LEVEL == "E" else logging.INFO
-logging.basicConfig(level=log_level, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("MultiGPU")
+logger.propagate = False
+
+if not logger.handlers:
+    log_level = logging.DEBUG if LOG_LEVEL == "E" else logging.INFO
+    handler = logging.StreamHandler()
+    formatter = logging.Formatter('%(message)s')
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    logger.setLevel(log_level)
+
 # --- End Logging Configuration ---
 
 # Global device state management
 
@@ -7,6 +7,8 @@
 import torch
 import logging
 import hashlib
+
+logger = logging.getLogger("MultiGPU")
 import copy
 from collections import defaultdict
 import comfy.model_management as mm
@@ -22,7 +24,7 @@ def create_model_hash(model, caller):
     first_layers = str(list(model.model_state_dict().keys())[:3])
     identifier = f"{model_type}_{model_size}_{first_layers}"
     final_hash = hashlib.sha256(identifier.encode()).hexdigest()
-    logging.debug(f"[MultiGPU_DisTorch_HASH] Created hash for {caller}: {final_hash[:8]}...")
+    logger.debug(f"[MultiGPU_DisTorch_HASH] Created hash for {caller}: {final_hash[:8]}...")
     return final_hash
 
 
@@ -99,22 +101,21 @@ def analyze_ggml_loading(model, allocations_str):
             "alloc_gb": alloc_gb
         }
 
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-    logging.info(eq_line)
-    logging.info("    DisTorch Model Device Allocations")
-    logging.info(eq_line)
-    logging.info(fmt_assign.format("Device", "Alloc %", "Total (GB)", " Alloc (GB)"))
-    logging.info(dash_line)
+    logger.info(eq_line)
+    logger.info("    DisTorch Model Device Allocations")
+    logger.info(eq_line)
+    logger.info(fmt_assign.format("Device", "Alloc %", "Total (GB)", " Alloc (GB)"))
+    logger.info(dash_line)
 
     sorted_devices = sorted(device_table.keys(), key=lambda d: (d == "cpu", d))
 
     for dev in sorted_devices:
         frac = device_table[dev]["fraction"]
         tot_gb = device_table[dev]["total_gb"]
         alloc_gb = device_table[dev]["alloc_gb"]
-        logging.info(fmt_assign.format(dev,f"{int(frac * 100)}%",f"{tot_gb:.2f}",f"{alloc_gb:.2f}"))
+        logger.info(fmt_assign.format(dev,f"{int(frac * 100)}%",f"{tot_gb:.2f}",f"{alloc_gb:.2f}"))
 
-    logging.info(dash_line)
+    logger.info(dash_line)
 
     layer_summary = {}
     layer_list = []
@@ -134,16 +135,16 @@ def analyze_ggml_loading(model, allocations_str):
             memory_by_type[layer_type] += layer_memory
             total_memory += layer_memory
 
-    logging.info("    DisTorch Model Layer Distribution")
-    logging.info(dash_line)
+    logger.info("    DisTorch Model Layer Distribution")
+    logger.info(dash_line)
     fmt_layer = "{:<12}{:>10}{:>14}{:>10}"
-    logging.info(fmt_layer.format("Layer Type", "Layers", "Memory (MB)", "% Total"))
-    logging.info(dash_line)
+    logger.info(fmt_layer.format("Layer Type", "Layers", "Memory (MB)", "% Total"))
+    logger.info(dash_line)
     for layer_type, count in layer_summary.items():
         mem_mb = memory_by_type[layer_type] / (1024 * 1024)
         mem_percent = (memory_by_type[layer_type] / total_memory) * 100 if total_memory > 0 else 0
-        logging.info(fmt_layer.format(layer_type,str(count),f"{mem_mb:.2f}",f"{mem_percent:.1f}%"))
-    logging.info(dash_line)
+        logger.info(fmt_layer.format(layer_type,str(count),f"{mem_mb:.2f}",f"{mem_percent:.1f}%"))
+    logger.info(dash_line)
 
     nonzero_devices = [d for d, r in DEVICE_RATIOS_DISTORCH.items() if r > 0]
     nonzero_total_ratio = sum(DEVICE_RATIOS_DISTORCH[d] for d in nonzero_devices)
@@ -162,11 +163,11 @@ def analyze_ggml_loading(model, allocations_str):
         device_assignments[device] = layer_list[start_idx:end_idx]
         current_layer += device_layer_count
 
-    logging.info("DisTorch Model Final Device/Layer Assignments")
-    logging.info(dash_line)
+    logger.info("DisTorch Model Final Device/Layer Assignments")
+    logger.info(dash_line)
     fmt_assign = "{:<12}{:>10}{:>14}{:>10}"
-    logging.info(fmt_assign.format("Device", "Layers", "Memory (MB)", "% Total"))
-    logging.info(dash_line)
+    logger.info(fmt_assign.format("Device", "Layers", "Memory (MB)", "% Total"))
+    logger.info(dash_line)
     total_assigned_memory = 0
     device_memories = {}
     for device, layers in device_assignments.items():
@@ -185,8 +186,8 @@ def analyze_ggml_loading(model, allocations_str):
         layers = device_assignments[dev]
         mem_mb = device_memories[dev] / (1024 * 1024)
         mem_percent = (device_memories[dev] / total_memory) * 100 if total_memory > 0 else 0
-        logging.info(fmt_assign.format(dev,str(len(layers)),f"{mem_mb:.2f}",f"{mem_percent:.1f}%"))
-    logging.info(dash_line)
+        logger.info(fmt_assign.format(dev,str(len(layers)),f"{mem_mb:.2f}",f"{mem_percent:.1f}%"))
+    logger.info(dash_line)
 
     return {"device_assignments": device_assignments}
 
@@ -200,17 +201,16 @@ def calculate_vvram_allocation_string(model, virtual_vram_str):
     dash_line = "-" * 47
     fmt_assign = "{:<8} {:<6} {:>11} {:>9} {:>9}"
 
-    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-    logging.info(eq_line)
-    logging.info("    DisTorch Model Virtual VRAM Analysis")
-    logging.info(eq_line)
-    logging.info(fmt_assign.format("Object", "Role", "Original(GB)", "Total(GB)", "Virt(GB)"))
-    logging.info(dash_line)
+    logger.info(eq_line)
+    logger.info("    DisTorch Model Virtual VRAM Analysis")
+    logger.info(eq_line)
+    logger.info(fmt_assign.format("Object", "Role", "Original(GB)", "Total(GB)", "Virt(GB)"))
+    logger.info(dash_line)
 
     recipient_vram = mm.get_total_memory(torch.device(recipient_device)) / (1024**3)
     recipient_virtual = recipient_vram + virtual_vram_gb
 
-    logging.info(fmt_assign.format(recipient_device, 'recip', f"{recipient_vram:.2f}GB",f"{recipient_virtual:.2f}GB", f"+{virtual_vram_gb:.2f}GB"))
+    logger.info(fmt_assign.format(recipient_device, 'recip', f"{recipient_vram:.2f}GB",f"{recipient_virtual:.2f}GB", f"+{virtual_vram_gb:.2f}GB"))
 
     ram_donors = [d for d in donors.split(',') if d != 'cpu']
     remaining_vram_needed = virtual_vram_gb
@@ -228,15 +228,15 @@ def calculate_vvram_allocation_string(model, virtual_vram_str):
         donor_allocations[donor] = donation
 
         donor_device_info[donor] = (donor_vram, donor_virtual)
-        logging.info(fmt_assign.format(donor, 'donor', f"{donor_vram:.2f}GB",  f"{donor_virtual:.2f}GB", f"-{donation:.2f}GB"))
+        logger.info(fmt_assign.format(donor, 'donor', f"{donor_vram:.2f}GB",  f"{donor_virtual:.2f}GB", f"-{donation:.2f}GB"))
 
     system_dram_gb = mm.get_total_memory(torch.device('cpu')) / (1024**3)
     cpu_donation = remaining_vram_needed
     cpu_virtual = system_dram_gb - cpu_donation
     donor_allocations['cpu'] = cpu_donation
-    logging.info(fmt_assign.format('cpu', 'donor', f"{system_dram_gb:.2f}GB", f"{cpu_virtual:.2f}GB", f"-{cpu_donation:.2f}GB"))
+    logger.info(fmt_assign.format('cpu', 'donor', f"{system_dram_gb:.2f}GB", f"{cpu_virtual:.2f}GB", f"-{cpu_donation:.2f}GB"))
 
-    logging.info(dash_line)
+    logger.info(dash_line)
 
     layer_summary = {}
     layer_list = []
@@ -259,12 +259,12 @@ def calculate_vvram_allocation_string(model, virtual_vram_str):
     model_size_gb = total_memory / (1024**3)
     new_model_size_gb = max(0, model_size_gb - virtual_vram_gb)
 
-    logging.info(fmt_assign.format('model', 'model', f"{model_size_gb:.2f}GB",f"{new_model_size_gb:.2f}GB", f"-{virtual_vram_gb:.2f}GB"))
+    logger.info(fmt_assign.format('model', 'model', f"{model_size_gb:.2f}GB",f"{new_model_size_gb:.2f}GB", f"-{virtual_vram_gb:.2f}GB"))
 
     if model_size_gb > (recipient_vram * 0.9):
         on_recipient = recipient_vram * 0.9
         on_virtuals = model_size_gb - on_recipient
-        logging.info(f"\nWarning: Model size is greater than 90% of recipient VRAM. {on_virtuals:.2f} GB of GGML Layers Offloaded Automatically to Virtual VRAM.\n")
+        logger.info(f"\nWarning: Model size is greater than 90% of recipient VRAM. {on_virtuals:.2f} GB of GGML Layers Offloaded Automatically to Virtual VRAM.\n")
     else:
         on_recipient = model_size_gb
         on_virtuals = 0
@@ -285,7 +285,7 @@ def calculate_vvram_allocation_string(model, virtual_vram_str):
 
     allocation_string = ";".join(allocation_parts)
     fmt_mem = "{:<20}{:>20}"
-    logging.info(fmt_mem.format("\n  v1 Expert String", allocation_string))
+    logger.info(fmt_mem.format("\n  v1 Expert String", allocation_string))
 
     return allocation_string
 
@@ -390,7 +390,7 @@ def override(self, *args, compute_device=None, virtual_vram_gb=4.0,
 
             full_allocation = f"{expert_mode_allocations}#{vram_string}" if expert_mode_allocations or vram_string else ""
 
-            logging.info(f"[MultiGPU_DisTorch] Full allocation string: {full_allocation}")
+            logger.info(f"[MultiGPU_DisTorch] Full allocation string: {full_allocation}")
 
             if hasattr(out[0], 'model'):
                 model_hash = create_model_hash(out[0], "override")