Skip to content

Commit 04013c3

Browse files
committed
feat: add WanVideoTextEncodeCached and WanVideoTextEncodeSingle classes for enhanced text encoding functionality
1 parent d300c12 commit 04013c3

2 files changed

Lines changed: 97 additions & 29 deletions

File tree

__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,8 @@ def text_encoder_device_patched():
213213
from .wanvideo import (
214214
LoadWanVideoT5TextEncoder,
215215
WanVideoTextEncode,
216+
WanVideoTextEncodeCached,
217+
WanVideoTextEncodeSingle,
216218
WanVideoVAELoader,
217219
WanVideoTinyVAELoader,
218220
WanVideoBlockSwap,
@@ -358,6 +360,8 @@ def register_and_count(module_names, node_map):
358360
wanvideo_nodes = {
359361
"LoadWanVideoT5TextEncoderMultiGPU": LoadWanVideoT5TextEncoder,
360362
"WanVideoTextEncodeMultiGPU": WanVideoTextEncode,
363+
"WanVideoTextEncodeCachedMultiGPU": WanVideoTextEncodeCached,
364+
"WanVideoTextEncodeSingleMultiGPU": WanVideoTextEncodeSingle,
361365
"WanVideoVAELoaderMultiGPU": WanVideoVAELoader,
362366
"WanVideoTinyVAELoaderMultiGPU": WanVideoTinyVAELoader,
363367
"WanVideoBlockSwapMultiGPU": WanVideoBlockSwap,

wanvideo.py

Lines changed: 93 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,3 @@
1-
"""WanVideoWrapper integration helpers.
2-
3-
For the current progress checklist and outstanding tasks, see
4-
`.github/instructions/ComfyUI-MultiGPU.instructions.md`.
5-
"""
6-
7-
8-
9-
101
import logging
112
import torch
123
import sys
@@ -24,25 +15,6 @@
2415
import os
2516
import importlib.util
2617

27-
scheduler_list = [
28-
"unipc", "unipc/beta",
29-
"dpm++", "dpm++/beta",
30-
"dpm++_sde", "dpm++_sde/beta",
31-
"euler", "euler/beta",
32-
"deis",
33-
"lcm", "lcm/beta",
34-
"res_multistep",
35-
"flowmatch_causvid",
36-
"flowmatch_distill",
37-
"flowmatch_pusa",
38-
"multitalk",
39-
"sa_ode_stable"
40-
]
41-
42-
rope_functions = ["default", "comfy", "comfy_chunked"]
43-
44-
45-
4618
logger = logging.getLogger("MultiGPU")
4719

4820
class LoadWanVideoT5TextEncoder:
@@ -90,6 +62,55 @@ def loadmodel(self, model_name, precision, device=None, quantization="disabled")
9062
return text_encoder, device
9163

9264

65+
class WanVideoTextEncodeCached:
66+
@classmethod
67+
def INPUT_TYPES(s):
68+
devices = get_device_list()
69+
default_device = devices[1] if len(devices) > 1 else devices[0]
70+
return {
71+
"required": {
72+
"model_name": (folder_paths.get_filename_list("text_encoders"), {"tooltip": "These models are loaded from 'ComfyUI/models/text_encoders'"}),
73+
"precision": (["fp32", "bf16"], {"default": "bf16"}),
74+
"positive_prompt": ("STRING", {"default": "", "multiline": True} ),
75+
"negative_prompt": ("STRING", {"default": "", "multiline": True} ),
76+
"quantization": (['disabled', 'fp8_e4m3fn'], {"default": 'disabled', "tooltip": "optional quantization method"}),
77+
"use_disk_cache": ("BOOLEAN", {"default": True, "tooltip": "Cache the text embeddings to disk for faster re-use, under the custom_nodes/ComfyUI-WanVideoWrapper/text_embed_cache directory"}),
78+
"load_device": (devices, {"default": default_device}
79+
),
80+
},
81+
"optional": {
82+
"extender_args": ("WANVIDEOPROMPTEXTENDER_ARGS", {"tooltip": "Use this node to extend the prompt with additional text."}),
83+
}
84+
}
85+
86+
RETURN_TYPES = ("WANVIDEOTEXTEMBEDS", "WANVIDEOTEXTEMBEDS", "STRING")
87+
RETURN_NAMES = ("text_embeds", "negative_text_embeds", "positive_prompt")
88+
OUTPUT_TOOLTIPS = ("The text embeddings for both prompts", "The text embeddings for the negative prompt only (for NAG)", "Positive prompt to display prompt extender results")
89+
FUNCTION = "process"
90+
CATEGORY = "multigpu/WanVideoWrapper"
91+
DESCRIPTION = """Encodes text prompts into text embeddings. This node loads and completely unloads the T5 after done, leaving no VRAM or RAM imprint."""
92+
93+
94+
def process(self, model_name, precision, positive_prompt, negative_prompt, quantization='disabled', use_disk_cache=True, load_device=None, extender_args=None):
95+
from . import set_current_device
96+
97+
if load_device is not None:
98+
set_current_device(load_device)
99+
100+
if load_device == "cpu":
101+
device = "cpu"
102+
else:
103+
device = "gpu"
104+
105+
logger.info(f"[MultiGPU WanVideoWrapper][WanVideoTextEncodeCachedMulitiGPU] current_device set to: {load_device}")
106+
logger.info(f"[MultiGPU WanVideoWrapper][WanVideoTextEncodeCachedMulitiGPU] device set to: {device}")
107+
108+
original_encoder = NODE_CLASS_MAPPINGS["WanVideoTextEncodeCached"]()
109+
prompt_embeds_dict, negative_text_embeds, positive_prompt_out = original_encoder.process(model_name, precision, positive_prompt, negative_prompt, quantization, use_disk_cache, device, extender_args)
110+
111+
return prompt_embeds_dict, negative_text_embeds, positive_prompt_out
112+
113+
93114
class WanVideoTextEncode:
94115
@classmethod
95116
def INPUT_TYPES(s):
@@ -103,7 +124,6 @@ def INPUT_TYPES(s):
103124
"force_offload": ("BOOLEAN", {"default": True}),
104125
"model_to_offload": ("WANVIDEOMODEL", {"tooltip": "Model to move to offload_device before encoding"}),
105126
"use_disk_cache": ("BOOLEAN", {"default": False, "tooltip": "Cache the text embeddings to disk for faster re-use, under the custom_nodes/ComfyUI-WanVideoWrapper/text_embed_cache directory"}),
106-
#"device": (["gpu", "cpu"], {"default": "gpu", "tooltip": "Device to run the text encoding on."}),
107127
}
108128
}
109129

@@ -141,6 +161,50 @@ def parse_prompt_weights(self, prompt):
141161
original_parser = NODE_CLASS_MAPPINGS["WanVideoTextEncode"]()
142162
return original_parser.parse_prompt_weights(prompt)
143163

164+
class WanVideoTextEncodeSingle:
165+
@classmethod
166+
def INPUT_TYPES(s):
167+
return {"required": {
168+
"prompt": ("STRING", {"default": "", "multiline": True} ),
169+
},
170+
"optional": {
171+
"t5": ("WANTEXTENCODER",),
172+
"load_device": ("MULTIGPUDEVICE",),
173+
"force_offload": ("BOOLEAN", {"default": True}),
174+
"model_to_offload": ("WANVIDEOMODEL", {"tooltip": "Model to move to offload_device before encoding"}),
175+
"use_disk_cache": ("BOOLEAN", {"default": False, "tooltip": "Cache the text embeddings to disk for faster re-use, under the custom_nodes/ComfyUI-WanVideoWrapper/text_embed_cache directory"}),
176+
}
177+
}
178+
179+
RETURN_TYPES = ("WANVIDEOTEXTEMBEDS", )
180+
RETURN_NAMES = ("text_embeds",)
181+
FUNCTION = "process"
182+
CATEGORY = "multigpu/WanVideoWrapper"
183+
DESCRIPTION = "Encodes text prompt into text embedding."
184+
185+
def process(self, prompt, t5=None, load_device=None, force_offload=True, model_to_offload=None, use_disk_cache=False):
186+
from . import set_current_device
187+
188+
if load_device is not None:
189+
set_current_device(load_device)
190+
191+
if load_device == "cpu":
192+
device = "cpu"
193+
else:
194+
device = "gpu"
195+
196+
if t5 is not None:
197+
text_encoder = t5[0]
198+
else:
199+
text_encoder = None
200+
201+
logger.info(f"[MultiGPU WanVideoWrapper][WanVideoTextEncodeSingleMulitiGPU] current_device set to: {load_device}")
202+
logger.info(f"[MultiGPU WanVideoWrapper][WanVideoTextEncodeSingleMulitiGPU] device set to: {device}")
203+
204+
original_encoder = NODE_CLASS_MAPPINGS["WanVideoTextEncodeSingle"]()
205+
prompt_embeds_dict = original_encoder.process(prompt, text_encoder, force_offload, model_to_offload, use_disk_cache, device)
206+
return (prompt_embeds_dict)
207+
144208
class WanVideoVAELoader:
145209
@classmethod
146210
def INPUT_TYPES(s):

0 commit comments

Comments
 (0)