Merge branch 'lora_reapply_fix'

pollockjj · pollockjj · commit fabbc9e7bedf · 2025-09-10T19:44:08.000-05:00
diff --git a/distorch_2.py b/distorch_2.py
@@ -67,12 +67,22 @@ def new_partially_load(self, device_to, extra_memory=0, full_load=False, force_p
             allocations = safetensor_allocation_store.get(debug_hash)
 
             if not hasattr(self.model, '_distorch_high_precision_loras') or not allocations:
+
                 result = original_partially_load(self, device_to, extra_memory, force_patch_weights)
                 if hasattr(self, '_distorch_block_assignments'):
                     del self._distorch_block_assignments
                 return result
-            
-            # soft_empty_cache_multigpu(logger)
+
+            if not hasattr(self.model, 'current_weight_patches_uuid'):
+                self.model.current_weight_patches_uuid = None
+
+            unpatch_weights = self.model.current_weight_patches_uuid is not None and (self.model.current_weight_patches_uuid != self.patches_uuid or force_patch_weights)
+
+            if unpatch_weights:
+                logger.info(f"[MultiGPU_DisTorch2] Patches changed or forced. Unpatching model.")
+                self.unpatch_model(self.offload_device, unpatch_weights=True)
+
+            self.patch_model(load_weights=False)
 
             mem_counter = 0
 
@@ -83,6 +93,22 @@ def new_partially_load(self, device_to, extra_memory=0, full_load=False, force_p
             loading = self._load_list()
             loading.sort(reverse=True)
             for module_size, module_name, module_object, params in loading:
+                if not unpatch_weights and hasattr(module_object, "comfy_patched_weights") and module_object.comfy_patched_weights == True:
+                    block_target_device = device_assignments['block_assignments'].get(module_name, device_to)
+                    current_module_device = None
+                    try:
+                        if any(p.numel() > 0 for p in module_object.parameters(recurse=False)):
+                           current_module_device = next(module_object.parameters(recurse=False)).device
+                    except StopIteration:
+                        pass
+
+                    if current_module_device is not None and str(current_module_device) != str(block_target_device):
+                        logger.debug(f"[MultiGPU_DisTorch2] Moving already patched {module_name} to {block_target_device}")
+                        module_object.to(block_target_device)
+
+                    mem_counter += module_size
+                    continue
+
                 # Step 1: Write block/tensor to compute device first
                 module_object.to(device_to)
 
@@ -123,6 +149,8 @@ def new_partially_load(self, device_to, extra_memory=0, full_load=False, force_p
                 module_object.comfy_patched_weights = True
                 mem_counter += module_size
 
+            self.model.current_weight_patches_uuid = self.patches_uuid
+
             logger.info(f"[MultiGPU_DisTorch2] DisTorch loading completed. Total memory: {mem_counter / (1024 * 1024):.2f}MB")
 
             return 0