pollockjj
diff --git a/‎README.md‎
Lines changed: 8 additions & 15 deletions b/‎README.md‎
Lines changed: 8 additions & 15 deletions
diff --git a/‎__init__.py‎
Lines changed: 0 additions & 2 deletions b/‎__init__.py‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎checkpoint_multigpu.py‎
Lines changed: 7 additions & 11 deletions b/‎checkpoint_multigpu.py‎
Lines changed: 7 additions & 11 deletions
diff --git a/‎ci/example_workflows_api/ComfyUI-Florence2 detailed_caption to flux_api.json‎
Lines changed: 245 additions & 0 deletions b/‎ci/example_workflows_api/ComfyUI-Florence2 detailed_caption to flux_api.json‎
Lines changed: 245 additions & 0 deletions
@@ -137,7 +137,7 @@ All MultiGPU nodes available for your install can be found in the "multigpu" cat
 
 ## Node Documentation
 
-Detailed technical documentation is available for all **automatically-detected core MultiGPU and DisTorch2 nodes**, covering 36+ documented nodes with comprehensive parameter details, output specifications, and DisTorch2 allocation guidance where applicable.
+Detailed technical documentation is available for all **automatically-detected core MultiGPU and DisTorch2 nodes**, covering 70+ documented nodes with comprehensive parameter details, output specifications, and DisTorch2 allocation guidance where applicable.
 
 - **To access documentation**: Click on any core MultiGPU or DisTorch2 node in ComfyUI and select "Help" (question mark inside a circle) from the resultant menu 
 - **Coverage**: All standard ComfyUI loader nodes (UNet, VAE, Checkpoints, CLIP, ControlNet, Diffusers) plus popular GGUF loader variants
@@ -253,19 +253,6 @@ All workflows have been tested on a 2x 3090 + 1060ti linux setup, a 4070 win 11
   </tr>
 </table>
 
-### Florence2
-
-<table>
-  <tr>
-    <td align="center">
-      <a href="example_workflows/ComfyUI-Florence2%20detailed_caption%20to%20flux.json">
-        <img src="example_workflows/ComfyUI-Florence2%20detailed_caption%20to%20flux.jpg" alt="Florence2 Detailed Caption to FLUX Pipeline" style="max-width:160px; max-height:160px;">
-        <div>Florence2 Detailed Caption to FLUX Pipeline</div>
-      </a>
-    </td>
-  </tr>
-</table>
-
 ### GGUF
 
 <table>
@@ -286,7 +273,7 @@ All workflows have been tested on a 2x 3090 + 1060ti linux setup, a 4070 win 11
   </tr>
 </table>
 
-### HunyuanVideoWrapper
+### HunyuanVideoWrapper / Florence2
 
 <table>
   <tr>
@@ -296,6 +283,12 @@ All workflows have been tested on a 2x 3090 + 1060ti linux setup, a 4070 win 11
         <div>HunyuanVideoWrapper DisTorch (Legacy, Deprecated)</div>
       </a>
     </td>
+    <td align="center">
+      <a href="example_workflows/ComfyUI-Florence2%20detailed_caption%20to%20flux.json">
+        <img src="example_workflows/ComfyUI-Florence2%20detailed_caption%20to%20flux.jpg" alt="Florence2 Detailed Caption to FLUX Pipeline" style="max-width:160px; max-height:160px;">
+        <div>Florence2 Detailed Caption to FLUX Pipeline</div>
+      </a>
+    </td>    
   </tr>
 </table>
 
 
@@ -269,8 +269,6 @@ def unet_offload_device_patched():
     override_class_with_distorch_safetensor_v2_clip_no_device,
 )
 from .distorch_2 import (
-    safetensor_allocation_store,
-    create_safetensor_model_hash,
     register_patched_safetensor_modelpatcher,
     analyze_safetensor_loading,
     calculate_safetensor_vvram_allocation,
 
@@ -9,7 +9,7 @@
 from comfy.sd import VAE, CLIP
 from .device_utils import get_device_list, soft_empty_cache_multigpu
 from .model_management_mgpu import multigpu_memory_log
-from .distorch_2 import safetensor_allocation_store, safetensor_settings_store, create_safetensor_model_hash, register_patched_safetensor_modelpatcher
+from .distorch_2 import register_patched_safetensor_modelpatcher
 
 logger = logging.getLogger("MultiGPU")
 
@@ -108,12 +108,10 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
 
             if distorch_config and 'unet_allocation' in distorch_config:
                 register_patched_safetensor_modelpatcher()
-                model_hash = create_safetensor_model_hash(model_patcher, "checkpoint_loader_unet")
-                safetensor_allocation_store[model_hash] = distorch_config['unet_allocation']
-                safetensor_settings_store[model_hash] = distorch_config.get('unet_settings','')
-                model.is_distorch = True
+                inner_model = model_patcher.model
+                inner_model._distorch_v2_meta = {"full_allocation": distorch_config['unet_allocation']}
+                logger.info(f"[CHECKPOINT_META] UNET inner_model id=0x{id(inner_model):x}")
                 model._distorch_high_precision_loras = distorch_config.get('high_precision_loras', True)
-                logger.mgpu_mm_log(f"Stored DisTorch2 config for UNet (hash {model_hash[:8]}): {distorch_config['unet_allocation']}")
 
             model.load_model_weights(sd, diffusion_model_prefix)
             multigpu_memory_log(f"unet:{config_hash[:8]}", "post-weights")
@@ -145,12 +143,10 @@ def patched_load_state_dict_guess_config(sd, output_vae=True, output_clip=True,
                     if distorch_config and 'clip_allocation' in distorch_config:
                          if hasattr(clip, 'patcher'):
                             register_patched_safetensor_modelpatcher()
-                            clip_hash = create_safetensor_model_hash(clip.patcher, "checkpoint_loader_clip")
-                            safetensor_allocation_store[clip_hash] = distorch_config['clip_allocation']
-                            safetensor_settings_store[clip_hash] = distorch_config.get('clip_settings','')
-                            clip.patcher.model.is_distorch = True
+                            inner_clip = clip.patcher.model
+                            inner_clip._distorch_v2_meta = {"full_allocation": distorch_config['clip_allocation']}
+                            logger.info(f"[CHECKPOINT_META] CLIP inner_model id=0x{id(inner_clip):x}")
                             clip.patcher.model._distorch_high_precision_loras = distorch_config.get('high_precision_loras', True)
-                            logger.info(f"Stored DisTorch2 config for CLIP (hash {clip_hash[:8]}): {distorch_config['clip_allocation']}")
 
                     m, u = clip.load_sd(clip_sd, full_model=True) # This respects the patched text_encoder_device
                     if len(m) > 0: logger.warning(f"CLIP missing keys: {m}")
 
@@ -0,0 +1,245 @@
+{
+  "6": {
+    "inputs": {
+      "text": [
+        "47",
+        2
+      ],
+      "clip": [
+        "39",
+        0
+      ]
+    },
+    "class_type": "CLIPTextEncode",
+    "_meta": {
+      "title": "CLIP Text Encode (Positive Prompt)"
+    }
+  },
+  "8": {
+    "inputs": {
+      "samples": [
+        "13",
+        0
+      ],
+      "vae": [
+        "40",
+        0
+      ]
+    },
+    "class_type": "VAEDecode",
+    "_meta": {
+      "title": "VAE Decode"
+    }
+  },
+  "9": {
+    "inputs": {
+      "filename_prefix": "MultiGPU",
+      "images": [
+        "8",
+        0
+      ]
+    },
+    "class_type": "SaveImage",
+    "_meta": {
+      "title": "Save Image"
+    }
+  },
+  "13": {
+    "inputs": {
+      "noise": [
+        "25",
+        0
+      ],
+      "guider": [
+        "22",
+        0
+      ],
+      "sampler": [
+        "16",
+        0
+      ],
+      "sigmas": [
+        "17",
+        0
+      ],
+      "latent_image": [
+        "27",
+        0
+      ]
+    },
+    "class_type": "SamplerCustomAdvanced",
+    "_meta": {
+      "title": "SamplerCustomAdvanced"
+    }
+  },
+  "16": {
+    "inputs": {
+      "sampler_name": "euler"
+    },
+    "class_type": "KSamplerSelect",
+    "_meta": {
+      "title": "KSamplerSelect"
+    }
+  },
+  "17": {
+    "inputs": {
+      "scheduler": "simple",
+      "steps": 20,
+      "denoise": 1,
+      "model": [
+        "30",
+        0
+      ]
+    },
+    "class_type": "BasicScheduler",
+    "_meta": {
+      "title": "BasicScheduler"
+    }
+  },
+  "22": {
+    "inputs": {
+      "model": [
+        "30",
+        0
+      ],
+      "conditioning": [
+        "26",
+        0
+      ]
+    },
+    "class_type": "BasicGuider",
+    "_meta": {
+      "title": "BasicGuider"
+    }
+  },
+  "25": {
+    "inputs": {
+      "noise_seed": 66527593966288
+    },
+    "class_type": "RandomNoise",
+    "_meta": {
+      "title": "RandomNoise"
+    }
+  },
+  "26": {
+    "inputs": {
+      "guidance": 3.5,
+      "conditioning": [
+        "6",
+        0
+      ]
+    },
+    "class_type": "FluxGuidance",
+    "_meta": {
+      "title": "FluxGuidance"
+    }
+  },
+  "27": {
+    "inputs": {
+      "width": 1024,
+      "height": 1024,
+      "batch_size": 1
+    },
+    "class_type": "EmptySD3LatentImage",
+    "_meta": {
+      "title": "EmptySD3LatentImage"
+    }
+  },
+  "30": {
+    "inputs": {
+      "max_shift": 1.15,
+      "base_shift": 0.5,
+      "width": 1024,
+      "height": 1024,
+      "model": [
+        "38",
+        0
+      ]
+    },
+    "class_type": "ModelSamplingFlux",
+    "_meta": {
+      "title": "ModelSamplingFlux"
+    }
+  },
+  "38": {
+    "inputs": {
+      "unet_name": "flux1-dev-fp8.safetensors",
+      "weight_dtype": "default",
+      "device": "cuda:0"
+    },
+    "class_type": "UNETLoaderMultiGPU",
+    "_meta": {
+      "title": "UNETLoaderMultiGPU"
+    }
+  },
+  "39": {
+    "inputs": {
+      "clip_name1": "t5xxl_fp8_e4m3fn_scaled.safetensors",
+      "clip_name2": "clip_l.safetensors",
+      "type": "flux",
+      "device": "cpu"
+    },
+    "class_type": "DualCLIPLoaderMultiGPU",
+    "_meta": {
+      "title": "DualCLIPLoaderMultiGPU"
+    }
+  },
+  "40": {
+    "inputs": {
+      "vae_name": "ae.safetensors",
+      "device": "cuda:1"
+    },
+    "class_type": "VAELoaderMultiGPU",
+    "_meta": {
+      "title": "VAELoaderMultiGPU"
+    }
+  },
+  "44": {
+    "inputs": {
+      "image": "ComfyUI-Florence2 detailed_caption to flux.jpg"
+    },
+    "class_type": "LoadImage",
+    "_meta": {
+      "title": "Load Image"
+    }
+  },
+  "45": {
+    "inputs": {
+      "model": "MiaoshouAI/Florence-2-large-PromptGen-v2.0",
+      "precision": "fp16",
+      "attention": "sdpa",
+      "convert_to_safetensors": "cuda:1",
+      "device": "cuda:1",
+      "offload_device": "cpu"
+    },
+    "class_type": "DownloadAndLoadFlorence2ModelMultiGPU",
+    "_meta": {
+      "title": "DownloadAndLoadFlorence2ModelMultiGPU"
+    }
+  },
+  "47": {
+    "inputs": {
+      "text_input": "",
+      "task": "detailed_caption",
+      "fill_mask": true,
+      "keep_model_loaded": true,
+      "max_new_tokens": 4096,
+      "num_beams": 3,
+      "do_sample": true,
+      "output_mask_select": "",
+      "seed": 61577449829591,
+      "image": [
+        "44",
+        0
+      ],
+      "florence2_model": [
+        "45",
+        0
+      ]
+    },
+    "class_type": "Florence2Run",
+    "_meta": {
+      "title": "Florence2Run"
+    }
+  }
+}
Original file line number	Diff line number	Diff line change
`@@ -269,8 +269,6 @@ def unet_offload_device_patched():`
`269`	`269`	`override_class_with_distorch_safetensor_v2_clip_no_device,`
`270`	`270`	`)`
`271`	`271`	`from .distorch_2 import (`
`272`		`- safetensor_allocation_store,`
`273`		`- create_safetensor_model_hash,`
`274`	`272`	`register_patched_safetensor_modelpatcher,`
`275`	`273`	`analyze_safetensor_loading,`
`276`	`274`	`calculate_safetensor_vvram_allocation,`