Optimize DisTorchV2 loader and FP8 casting logic

pollockjj · pollockjj · commit 842ee650ed51 · 2025-08-24T05:58:07.000-05:00
- Remove redundant logging and counters in safetensor model patcher
- Add model original dtype detection for better precision handling
- Streamline FP8 casting conditions and remove verbose debug logs
- Improve static allocation parsing and device assignment flow
diff --git a/distorch_2.py b/distorch_2.py
@@ -60,29 +60,20 @@ def new_partially_load(self, device_to, extra_memory=0, full_load=False, force_p
             """Override to use our static device assignments"""
             global safetensor_allocation_store
 
-            # Check if we have a device allocation for this model
             debug_hash = create_safetensor_model_hash(self, "partial_load")
             allocations = safetensor_allocation_store.get(debug_hash)
-            
 
             if not hasattr(self.model, '_distorch_high_precision_loras') or not allocations:
                 result = original_partially_load(self, device_to, extra_memory, force_patch_weights) 
-                # Clean up
                 if hasattr(self, '_distorch_block_assignments'):
                     del self._distorch_block_assignments
-    
-
                 return result
 
-            logger.info(f"[MultiGPU_DisTorch2] DisTorchV2 Loader activated")
-
             mem_counter = 0
-            patch_counter = 0
 
             logger.info(f"[MultiGPU_DisTorch2] Using static allocation for model {debug_hash[:8]}")
-            # Parse allocation string and apply static assignment
             device_assignments = analyze_safetensor_loading(self, allocations)
-            
+            model_original_dtype = comfy.utils.weight_dtype(self.model.state_dict())
             high_precision_loras = self.model._distorch_high_precision_loras
             loading = self._load_list()
             loading.sort(reverse=True)
@@ -109,7 +100,6 @@ def new_partially_load(self, device_to, extra_memory=0, full_load=False, force_p
                 has_patches = weight_key in self.patches or bias_key in self.patches
 
                 if not high_precision_loras and block_target_device == "cpu" and has_patches and model_original_dtype in [torch.float8_e4m3fn, torch.float8_e5m2]:
-                    logger.info(f"[MultiGPU_DisTorch2] FP8 casting conditions met for {module_name}")
                     for param_name, param in module_object.named_parameters():
                         if param.dtype.is_floating_point:
                             cast_data = comfy.float.stochastic_rounding(param.data, torch.float8_e4m3fn)