pollockjj
diff --git a/‎__init__.py‎
Lines changed: 15 additions & 16 deletions b/‎__init__.py‎
Lines changed: 15 additions & 16 deletions
diff --git a/‎checkpoint_multigpu.py‎
Lines changed: 35 additions & 33 deletions b/‎checkpoint_multigpu.py‎
Lines changed: 35 additions & 33 deletions
diff --git a/‎ci/extract_allocation.py‎
Lines changed: 11 additions & 5 deletions b/‎ci/extract_allocation.py‎
Lines changed: 11 additions & 5 deletions
@@ -1,13 +1,12 @@
 import torch
 import logging
-import weakref
 import os
-import copy
 import json
 import importlib
 from contextlib import contextmanager
 from datetime import datetime
 from pathlib import Path
+from types import MethodType
 import folder_paths
 import comfy.model_management as mm
 import comfy.memory_management
@@ -17,13 +16,13 @@
 from .device_utils import (
     get_device_list,
     is_accelerator_available,
-    soft_empty_cache_multigpu,
+    soft_empty_cache_multigpu as soft_empty_cache_multigpu,
 )
 from .model_management_mgpu import (
-    trigger_executor_cache_reset,
-    check_cpu_memory_threshold,
-    multigpu_memory_log,
-    force_full_system_cleanup,
+    trigger_executor_cache_reset as trigger_executor_cache_reset,
+    check_cpu_memory_threshold as check_cpu_memory_threshold,
+    multigpu_memory_log as multigpu_memory_log,
+    force_full_system_cleanup as force_full_system_cleanup,
 )
 
 WEB_DIRECTORY = "./web"
@@ -138,7 +137,7 @@ def mgpu_mm_log_method(self, msg):
             f"[MultiGPU Model Management] {msg}",
             extra={"mgpu_context": {"component": "model_management"}},
         )
-logger.mgpu_mm_log = mgpu_mm_log_method.__get__(logger, type(logger))
+logger.mgpu_mm_log = MethodType(mgpu_mm_log_method, logger)
 
 def _normalize_module_name(module_name):
     """Normalize a custom node directory name for tolerant matching."""
@@ -416,7 +415,7 @@ def wrap_for_dlpack_with_device_guard(*args, **kwargs):
     logger.info("[MultiGPU] Applied comfy_kitchen CUDA DLPack device guard patch")
     return True
 
-logger.info(f"[MultiGPU Core Patching] Patching mm.get_torch_device, mm.text_encoder_device, mm.unet_offload_device")
+logger.info("[MultiGPU Core Patching] Patching mm.get_torch_device, mm.text_encoder_device, mm.unet_offload_device")
 logger.info(f"[MultiGPU DEBUG] Initial current_device: {current_device}")
 logger.info(f"[MultiGPU DEBUG] Initial current_text_encoder_device: {current_text_encoder_device}")
 logger.info(f"[MultiGPU DEBUG] Initial current_unet_offload_device: {current_unet_offload_device}")
@@ -457,18 +456,18 @@ def wrap_for_dlpack_with_device_guard(*args, **kwargs):
     override_class_clip,
     override_class_clip_no_device,
     override_class_with_distorch_gguf,
-    override_class_with_distorch_gguf_v2,
+    override_class_with_distorch_gguf_v2 as override_class_with_distorch_gguf_v2,
     override_class_with_distorch_clip,
     override_class_with_distorch_clip_no_device,
-    override_class_with_distorch,
+    override_class_with_distorch as override_class_with_distorch,
     override_class_with_distorch_safetensor_v2,
     override_class_with_distorch_safetensor_v2_clip,
     override_class_with_distorch_safetensor_v2_clip_no_device,
 )
 from .distorch_2 import (
-    register_patched_safetensor_modelpatcher,
-    analyze_safetensor_loading,
-    calculate_safetensor_vvram_allocation,
+    register_patched_safetensor_modelpatcher as register_patched_safetensor_modelpatcher,
+    analyze_safetensor_loading as analyze_safetensor_loading,
+    calculate_safetensor_vvram_allocation as calculate_safetensor_vvram_allocation,
 )
 
 from .checkpoint_multigpu import (
@@ -569,7 +568,7 @@ def register_and_count(module_names, node_map):
         if check_module_exists(name):
             found = True
             break
-    
+
     count = 0
     if found:
         try:
@@ -582,7 +581,7 @@ def register_and_count(module_names, node_map):
         for key, value in resolved_node_map.items():
             NODE_CLASS_MAPPINGS[key] = value
         count = len(NODE_CLASS_MAPPINGS) - initial_len
-        
+
     registration_data.append({"name": module_names[0], "found": "Y" if found else "N", "count": count})
     return found
 
 
@@ -21,11 +21,11 @@
 def patch_load_state_dict_guess_config():
     """Monkey patch comfy.sd.load_state_dict_guess_config with MultiGPU-aware checkpoint loading."""
     global original_load_state_dict_guess_config
-    
+
     if original_load_state_dict_guess_config is not None:
         logger.debug("[MultiGPU Checkpoint] load_state_dict_guess_config is already patched.")
         return
-    
+
     logger.info("[MultiGPU Core Patching] Patching comfy.sd.load_state_dict_guess_config for advanced MultiGPU loading.")
     original_load_state_dict_guess_config = comfy.sd.load_state_dict_guess_config
     comfy.sd.load_state_dict_guess_config = patched_load_state_dict_guess_config
@@ -35,7 +35,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
                                         te_model_options={}, metadata=None):
     """Patched checkpoint loader with MultiGPU and DisTorch2 device placement support."""
     from . import set_current_device, set_current_text_encoder_device, get_current_device, get_current_text_encoder_device
-    
+
     sd_size = sum(p.numel() for p in sd.values() if hasattr(p, 'numel'))
     config_hash = str(sd_size)
     device_config = checkpoint_device_config.get(config_hash)
@@ -53,7 +53,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
     vae = None
     model = None
     model_patcher = None
-    
+
     # Capture the current devices at runtime so we can restore them after loading
     original_main_device = get_current_device()
     original_clip_device = get_current_text_encoder_device()
@@ -68,7 +68,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
             sd, metadata = comfy.utils.convert_old_quants(sd, diffusion_model_prefix, metadata=metadata)
 
         model_config = comfy.model_detection.model_config_from_unet(sd, diffusion_model_prefix, metadata=metadata)
-        
+
         if model_config is None:
             logger.warning("[MultiGPU] Warning: Not a standard checkpoint file. Trying to load as diffusion model only.")
             # Simplified fallback for non-checkpoints
@@ -83,13 +83,13 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
         unet_weight_dtype = list(model_config.supported_inference_dtypes)
         if model_config.scaled_fp8 is not None:
             weight_dtype = None
-        
+
         if custom_operations is not None:
             model_config.custom_operations = custom_operations
         unet_dtype = model_options.get("dtype", model_options.get("weight_dtype", None))
         if unet_dtype is None:
             unet_dtype = mm.unet_dtype(model_params=parameters, supported_dtypes=unet_weight_dtype, weight_dtype=weight_dtype)
-        
+
         unet_compute_device = device_config.get('unet_device', original_main_device)
         if model_config.scaled_fp8 is not None:
             manual_cast_dtype = mm.unet_manual_cast(None, torch.device(unet_compute_device), model_config.supported_inference_dtypes)
@@ -104,7 +104,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
 
         if output_model:
             unet_compute_device = device_config.get('unet_device', original_main_device)
-            set_current_device(unet_compute_device)            
+            set_current_device(unet_compute_device)
             inital_load_device = mm.unet_inital_load_device(parameters, unet_dtype)
 
             multigpu_memory_log(f"unet:{config_hash[:8]}", "pre-load")
@@ -131,7 +131,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
             vae_target_device = torch.device(device_config.get('vae_device', original_main_device))
             set_current_device(vae_target_device) # Use main device context for VAE
             multigpu_memory_log(f"vae:{config_hash[:8]}", "pre-load")
-            
+
             vae_sd = comfy.utils.state_dict_prefix_replace(sd, {k: "" for k in model_config.vae_key_prefix}, filter_keys=True)
             vae_sd = model_config.process_vae_state_dict(vae_sd)
             vae = VAE(sd=vae_sd, metadata=metadata)
@@ -151,7 +151,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
                         for pref in scaled_fp8_list:
                             skip = skip or k.startswith(pref)
                         if not skip:
-                           out_sd[k] = sd[k]
+                            out_sd[k] = sd[k]
 
                     for pref in scaled_fp8_list:
                         quant_sd, qmetadata = comfy.utils.convert_old_quants(sd, pref, metadata={})
@@ -161,7 +161,7 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
 
             clip_target_device = device_config.get('clip_device', original_clip_device)
             set_current_text_encoder_device(clip_target_device)
-            
+
             clip_target = model_config.clip_target(state_dict=sd)
             if clip_target is not None:
                 clip_sd = model_config.process_clip_state_dict(sd)
@@ -182,15 +182,17 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
                             clip.patcher.model._distorch_high_precision_loras = distorch_config.get('high_precision_loras', True)
 
                     m, u = clip.load_sd(clip_sd, full_model=True) # This respects the patched text_encoder_device
-                    if len(m) > 0: logger.warning(f"CLIP missing keys: {m}")
-                    if len(u) > 0: logger.debug(f"CLIP unexpected keys: {u}")
+                    if len(m) > 0:
+                        logger.warning(f"CLIP missing keys: {m}")
+                    if len(u) > 0:
+                        logger.debug(f"CLIP unexpected keys: {u}")
                     logger.info("CLIP Loaded.")
                     multigpu_memory_log(f"clip:{config_hash[:8]}", "post-load")
                 else:
                     logger.warning("No CLIP/text encoder weights in checkpoint.")
             else:
                 logger.warning("CLIP target not found in model config.")
-        
+
     finally:
         set_current_device(original_main_device)
         set_current_text_encoder_device(original_clip_device)
@@ -206,7 +208,7 @@ def INPUT_TYPES(s):
         import folder_paths
         devices = get_device_list()
         default_device = devices[1] if len(devices) > 1 else devices[0]
-        
+
         return {
             "required": {
                 "ckpt_name": (folder_paths.get_filename_list("checkpoints"), ),
@@ -215,27 +217,27 @@ def INPUT_TYPES(s):
                 "vae_device": (devices, {"default": default_device}),
             }
         }
-    
+
     RETURN_TYPES = ("MODEL", "CLIP", "VAE")
     FUNCTION = "load_checkpoint"
     CATEGORY = "multigpu"
     TITLE = "Checkpoint Loader Advanced (MultiGPU)"
-    
+
     def load_checkpoint(self, ckpt_name, unet_device, clip_device, vae_device):
         patch_load_state_dict_guess_config()
-        
+
         import folder_paths
         import comfy.utils
-        
+
         ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
         sd = comfy.utils.load_torch_file(ckpt_path)
         sd_size = sum(p.numel() for p in sd.values() if hasattr(p, 'numel'))
         config_hash = str(sd_size)
-        
+
         checkpoint_device_config[config_hash] = {
             'unet_device': unet_device, 'clip_device': clip_device, 'vae_device': vae_device
         }
-        
+
         # Load using standard loader, our patch will intercept
         from nodes import CheckpointLoaderSimple
         return CheckpointLoaderSimple().load_checkpoint(ckpt_name)
@@ -247,7 +249,7 @@ def INPUT_TYPES(s):
         import folder_paths
         devices = get_device_list()
         compute_device = devices[1] if len(devices) > 1 else devices[0]
-        
+
         return {
             "required": {
                 "ckpt_name": (folder_paths.get_filename_list("checkpoints"), ),
@@ -265,18 +267,18 @@ def INPUT_TYPES(s):
                 "eject_models": ("BOOLEAN", {"default": True}),
             }
         }
-    
+
     RETURN_TYPES = ("MODEL", "CLIP", "VAE")
     FUNCTION = "load_checkpoint"
     CATEGORY = "multigpu/distorch_2"
     TITLE = "Checkpoint Loader Advanced (DisTorch2)"
-    
+
     def load_checkpoint(self, ckpt_name, unet_compute_device, unet_virtual_vram_gb, unet_donor_device,
                        clip_compute_device, clip_virtual_vram_gb, clip_donor_device, vae_device,
                        unet_expert_mode_allocations="", clip_expert_mode_allocations="", high_precision_loras=True, eject_models=True):
-        
+
         if eject_models:
-            logger.mgpu_mm_log(f"[EJECT_MODELS_SETUP] eject_models=True - marking all loaded models for eviction")
+            logger.mgpu_mm_log("[EJECT_MODELS_SETUP] eject_models=True - marking all loaded models for eviction")
             ejection_count = 0
             for i, lm in enumerate(mm.current_loaded_models):
                 model_name = type(getattr(lm.model, 'model', lm.model)).__name__ if lm.model else 'Unknown'
@@ -289,17 +291,17 @@ def load_checkpoint(self, ckpt_name, unet_compute_device, unet_virtual_vram_gb,
                     logger.mgpu_mm_log(f"[EJECT_MARKED] Model {i}: {model_name} (direct patcher) → marked for eviction")
                     ejection_count += 1
             logger.mgpu_mm_log(f"[EJECT_MODELS_SETUP_COMPLETE] Marked {ejection_count} models for Comfy Core eviction during load_models_gpu")
-        
-        patch_load_state_dict_guess_config()        
-        
+
+        patch_load_state_dict_guess_config()
+
         import folder_paths
         import comfy.utils
-        
+
         ckpt_path = folder_paths.get_full_path("checkpoints", ckpt_name)
         sd = comfy.utils.load_torch_file(ckpt_path)
         sd_size = sum(p.numel() for p in sd.values() if hasattr(p, 'numel'))
         config_hash = str(sd_size)
-        
+
         checkpoint_device_config[config_hash] = {
             'unet_device': unet_compute_device,
             'clip_device': clip_compute_device,
@@ -312,7 +314,7 @@ def load_checkpoint(self, ckpt_name, unet_compute_device, unet_virtual_vram_gb,
         elif unet_expert_mode_allocations:
             unet_vram_str = unet_compute_device
         unet_alloc = f"{unet_expert_mode_allocations}#{unet_vram_str}" if unet_expert_mode_allocations or unet_vram_str else ""
-        
+
         clip_vram_str = ""
         if clip_virtual_vram_gb > 0:
             clip_vram_str = f"{clip_compute_device};{clip_virtual_vram_gb};{clip_donor_device}"
@@ -327,6 +329,6 @@ def load_checkpoint(self, ckpt_name, unet_compute_device, unet_virtual_vram_gb,
             'unet_settings': hashlib.sha256(f"{unet_alloc}{high_precision_loras}".encode()).hexdigest(),
             'clip_settings': hashlib.sha256(f"{clip_alloc}{high_precision_loras}".encode()).hexdigest(),
         }
-        
+
         from nodes import CheckpointLoaderSimple
         return CheckpointLoaderSimple().load_checkpoint(ckpt_name)
@@ -3,10 +3,16 @@
 
 import argparse
 import json
+import sys
 from pathlib import Path
 from typing import Iterable, Iterator, Dict, Any
 
 
+def _write_stdout(message: str = "") -> None:
+    sys.stdout.write(f"{message}\n")
+    sys.stdout.flush()
+
+
 def load_json_lines(path: Path) -> Iterator[Dict[str, Any]]:
     with path.open("r", encoding="utf-8") as handle:
         for line in handle:
@@ -37,12 +43,12 @@ def main() -> int:
 
     entries = list(load_json_lines(args.logfile))
     if not entries:
-        print("No entries found in log file.")
+        _write_stdout("No entries found in log file.")
         return 0
 
     matched = [entry for entry in entries if is_allocation_event(entry, args.keywords)]
     if not matched:
-        print("No allocation events matched provided keywords.")
+        _write_stdout("No allocation events matched provided keywords.")
         return 0
 
     for entry in matched:
@@ -51,9 +57,9 @@ def main() -> int:
         component = entry.get("component", "")
         header_bits = [bit for bit in (timestamp, category, component) if bit]
         header = " | ".join(header_bits) if header_bits else "allocation"
-        print(f"## {header}")
-        print(entry.get("message", ""))
-        print()
+        _write_stdout(f"## {header}")
+        _write_stdout(entry.get("message", ""))
+        _write_stdout()
 
     return 0