diff --git a/docs/Model Support.md b/docs/Model Support.md index 01db974c0..d6705bfa5 100644 --- a/docs/Model Support.md +++ b/docs/Model Support.md @@ -21,6 +21,7 @@ [ERNIE](#ernie) | DiT | 2026 | Baidu | 8B | Minimal | Modern, intelligent, good quality, fast | [HiDream O1](#hidream-o1) | "Pixel UiT" | 2026 | HiDream | 8B | Minimal | Modern, intelligent, fast, decent quality | [Lens](#lens) | MMDiT | 2026 | Microsoft | 4B | Minimal | Modern, lightweight | +[PixelDiT](#pixeldit) | Pixel DiT | 2026 | NVIDIA | 1.3B | Minimal | Modern, fast, pixel-space | Old or bad options also tracked listed via [Obscure Model Support](/docs/Obscure%20Model%20Support.md): @@ -640,6 +641,21 @@ For upscaling with SD3, the `Refiner Do Tiling` parameter is highly recommended - **Steps:** For Turbo, `4` is recommended, `8` works well. For Base, `20` as normal. - **Resolution:** Side length `1440` is the official default, but 1024 is a reasonable option. It retains coherence down to about 512 and up to about 2048. +# PixelDiT + +- NVIDIA's [PixelDiT]() is supported in SwarmUI! + - The smaller FP8 model can be downloaded here: [Comfy-Org/PixelDiT - fp8]() + - Or fat BF16 version: [Comfy-Org/PixelDiT - bf16]() + - Save in `diffusion_models` +- It does not use a VAE +- Uses the Gemma 2 2B text encoder, will be downloaded and handled automatically +- **Parameters:** + - **Sampler:** Default is fine. + - **Scheduler:** Default is fine. + - **CFG Scale:** `4` is recommended. + - **Steps:** `30` is recommended. + - **Resolution:** Side length `1024` is the standard. + # Video Models - Video models are documented in [Video Model Support](/docs/Video%20Model%20Support.md). diff --git a/src/BuiltinExtensions/ComfyUIBackend/ComfyUIBackendExtension.cs b/src/BuiltinExtensions/ComfyUIBackend/ComfyUIBackendExtension.cs index 92a239823..0f5fc092e 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/ComfyUIBackendExtension.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/ComfyUIBackendExtension.cs @@ -635,6 +635,9 @@ public static void AssignValuesFromRaw(JObject rawObjectInfo) ], Schedulers = ["normal///Normal", "karras///Karras", "exponential///Exponential", "simple///Simple", "ddim_uniform///DDIM Uniform", "sgm_uniform///SGM Uniform", "turbo///Turbo (for turbo models, max 10 steps)", "align_your_steps///Align Your Steps (Model-specific behavior)", "beta///Beta", "linear_quadratic///Linear Quadratic (Mochi)", "ltxv///LTX-Video", "ltxv-image///LTXV-Image", "kl_optimal///KL Optimal (Nvidia AYS)", "flux2///Flux.2"]; + /// Lists PiD decoder models. + public static List PidUpscaleModels(Session session) => [.. Program.MainSDModels.ListModelsFor(session).Where(m => m.ModelClass?.CompatClass?.ID == "pid").OrderBy(m => m.Name).Select(m => $"pidmodel-{m.Name}///PiD Model: {m.Name}")]; + public static List IPAdapterModels = ["None"], IPAdapterWeightTypes = ["standard", "prompt is more important", "style transfer"]; public static List GligenModels = ["None"], YoloModels = [], StyleModels = ["None"], SetClipDevices = ["cpu"]; @@ -752,7 +755,7 @@ public override void OnInit() )); RefinerUpscaleMethod = T2IParamTypes.Register(new("Refiner Upscale Method", "How to upscale the image, if upscaling is used.", "pixel-lanczos", Group: T2IParamTypes.GroupRefiners, OrderPriority: -1, FeatureFlag: "comfyui", ChangeWeight: 1, - GetValues: (_) => UpscalerModels, DependNonDefault: T2IParamTypes.RefinerUpscale.Type.ID + GetValues: (session) => [.. UpscalerModels, .. PidUpscaleModels(session)], DependNonDefault: T2IParamTypes.RefinerUpscale.Type.ID )); RefinerSamplerParam = T2IParamTypes.Register(new("Refiner Sampler", SamplerParam.Type.Description + "\nThis is an override to only affect the Refine/Upscale stage.", "euler", Toggleable: true, FeatureFlag: "comfyui", Group: T2IParamTypes.GroupRefinerOverrides, OrderPriority: -2, diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs index b4472813b..66750efe1 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGenerator.cs @@ -166,6 +166,9 @@ public JArray FinalImageOut /// If true, the generator is currently working on the refiner stage. public bool IsRefinerStage = false; + /// If true, the generator is currently working on the pixel-decoder stage. + public bool IsPixelDecoderStage = false; + /// If true, the generator is currently working on Image2Video. public bool IsImageToVideo = false; @@ -959,7 +962,7 @@ public string CreateKSampler(JArray model, JArray pos, JArray neg, JArray latent } } // TODO: Registry of model default preferences instead of this - else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie() || IsHiDreamO1() || IsLens()) + else if (IsFlux() || IsWanVideo() || IsWanVideo22() || IsOmniGen() || IsQwenImage() || IsZImage() || IsZetaChroma() || IsErnie() || IsHiDreamO1() || IsLens() || IsPixelDiT() || IsPiD()) { defscheduler ??= "simple"; } @@ -2518,7 +2521,7 @@ public bool ShouldZeroNegative() } /// Creates a "CLIPTextEncode" or equivalent node for the given input, applying prompt-given conditioning modifiers as relevant. - public JArray CreateConditioning(string prompt, JArray clip, T2IModel model, bool isPositive, string firstId = null, bool isRefiner = false, bool isVideo = false, bool isVideoSwap = false) + public JArray CreateConditioning(string prompt, JArray clip, T2IModel model, bool isPositive, string firstId = null, bool isRefiner = false, bool isVideo = false, bool isVideoSwap = false, bool isPixelDecoder = false) { PromptRegion regionalizer = new(prompt); string globalPromptText = regionalizer.GlobalPrompt; @@ -2534,7 +2537,11 @@ public JArray CreateConditioning(string prompt, JArray clip, T2IModel model, boo { globalPromptText = $"{globalPromptText} {regionalizer.RefinerPrompt}"; } - else if (!isVideo && !isRefiner && !string.IsNullOrWhiteSpace(regionalizer.BasePrompt)) + else if (isPixelDecoder && !string.IsNullOrWhiteSpace(regionalizer.PixelDecoderPrompt)) + { + globalPromptText = $"{globalPromptText} {regionalizer.PixelDecoderPrompt}"; + } + else if (!isVideo && !isRefiner && !isPixelDecoder && !string.IsNullOrWhiteSpace(regionalizer.BasePrompt)) { globalPromptText = $"{globalPromptText} {regionalizer.BasePrompt}"; } diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs index 32e7c4bb8..4c0ebaaa6 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorModelSupport.cs @@ -85,6 +85,12 @@ public bool IsKontext() /// Returns true if the current model is Chroma Radiance. public bool IsChromaRadiance() => IsModelCompatClass(T2IModelClassSorter.CompatChromaRadiance); + /// Returns true if the current model is PixelDiT. + public bool IsPixelDiT() => IsModelCompatClass(T2IModelClassSorter.CompatPixelDiT); + + /// Returns true if the current model is PiD. + public bool IsPiD() => IsModelCompatClass(T2IModelClassSorter.CompatPiD); + /// Returns true if the current model is HiDream-i1. public bool IsHiDream() => IsModelCompatClass(T2IModelClassSorter.CompatHiDreamI1); @@ -401,7 +407,7 @@ public WGNodeData EmptyImage(int width, int height, int batchSize, string id = n ["width"] = width }, id), frames); } - else if (IsChromaRadiance() || IsZetaChroma()) + else if (IsChromaRadiance() || IsZetaChroma() || IsPixelDiT()) { return resultImage(CreateNode("EmptyChromaRadianceLatentImage", new JObject() { @@ -657,6 +663,11 @@ public string GetGemma2Model() return RequireClipModel("gemma_2_2b_fp16.safetensors", "https://huggingface.co/Comfy-Org/Lumina_Image_2.0_Repackaged/resolve/main/split_files/text_encoders/gemma_2_2b_fp16.safetensors", "29761442862f8d064d3f854bb6fabf4379dcff511a7f6ba9405a00bd0f7e2dbd", T2IParamTypes.GemmaModel); } + public string GetGemma2_2bElmModel() + { + return RequireClipModel("gemma_2_2b_it_elm_fp8_scaled.safetensors", "https://huggingface.co/Comfy-Org/PixelDiT/resolve/main/text_encoders/gemma_2_2b_it_elm_fp8_scaled.safetensors", "87692b2ab1714028e29910ea645d96db656505ca0805051048d2298b225c02d1", T2IParamTypes.GemmaModel); + } + public string GetGemma3_12bModel() { return RequireClipModel("gemma_3_12B_it.safetensors", "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "aaca463d11e6d8d2a4bdb0d6299214c15ef78a3f73e0ef8113d5a9d0219b3f6d", T2IParamTypes.GemmaModel); @@ -907,7 +918,7 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) { dtype = "default"; } - else if (IsZImage() || IsZetaChroma() || IsAnima() || IsLens()) // Model is small and dense, so trust user preferred download format + else if (IsZImage() || IsZetaChroma() || IsAnima() || IsLens() || IsPixelDiT() || IsPiD()) // Model is small and dense, so trust user preferred download format { dtype = "default"; } @@ -1139,6 +1150,11 @@ public void LoadClip3(string type, string modelA, string modelB, string modelC) helpers.DoVaeLoader(UserInput.SourceSession?.User?.Settings?.VAEs?.DefaultFluxVAE, "flux-1", "flux-ae"); } } + else if (IsPixelDiT() || IsPiD()) + { + helpers.LoadClip("pixeldit", helpers.GetGemma2_2bElmModel()); + LoadingVAE = CreateVAELoader("pixel_space"); + } else if (IsHiDream()) { string loaderType = "QuadrupleCLIPLoader"; diff --git a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs index 067bd1ee8..59e3813b6 100644 --- a/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs +++ b/src/BuiltinExtensions/ComfyUIBackend/WorkflowGeneratorSteps.cs @@ -106,7 +106,11 @@ public static void Register() (g.LoadingModel, g.LoadingClip) = g.LoadLorasForConfinement(0, g.LoadingModel, g.LoadingClip); if (g.IsRefinerStage) { - (g.LoadingModel, g.LoadingClip) = g.LoadLorasForConfinement(1, g.LoadingModel, g.LoadingClip); + (g.LoadingModel, g.LoadingClip) = g.LoadLorasForConfinement(T2IParamInput.SectionID_Refiner, g.LoadingModel, g.LoadingClip); + } + else if (g.IsPixelDecoderStage) + { + (g.LoadingModel, g.LoadingClip) = g.LoadLorasForConfinement(T2IParamInput.SectionID_PixelDecoder, g.LoadingModel, g.LoadingClip); } else if (g.IsImageToVideoSwap) { @@ -1452,10 +1456,10 @@ JArray doMaskShrinkApply(WorkflowGenerator g, JArray imgIn) prompt = g.CreateConditioning(g.UserInput.Get(T2IParamTypes.Prompt), g.CurrentTextEnc.Path, g.FinalLoadedModel, true, isRefiner: true); negPrompt = g.CreateConditioning(g.UserInput.Get(T2IParamTypes.NegativePrompt), g.CurrentTextEnc.Path, g.FinalLoadedModel, false, isRefiner: true); bool doSave = g.UserInput.Get(T2IParamTypes.OutputIntermediateImages, false); - bool doUspcale = g.UserInput.TryGet(T2IParamTypes.RefinerUpscale, out double refineUpscale) && refineUpscale != 1; + bool doUpscale = g.UserInput.TryGet(T2IParamTypes.RefinerUpscale, out double refineUpscale) && refineUpscale != 1; string upscaleMethod = g.UserInput.Get(ComfyUIBackendExtension.RefinerUpscaleMethod, "None"); // TODO: Better same-VAE check - bool doPixelUpscale = doUspcale && (upscaleMethod.StartsWith("pixel-") || upscaleMethod.StartsWith("model-")); + bool doPixelUpscale = doUpscale && (upscaleMethod.StartsWith("pixel-") || upscaleMethod.StartsWith("model-")); int width = (int)Math.Round(g.UserInput.GetImageWidth() * refineUpscale); int height = (int)Math.Round(g.UserInput.GetImageHeight() * refineUpscale); width = (width / 16) * 16; // avoid unworkable output sizes @@ -1517,7 +1521,7 @@ JArray doMaskShrinkApply(WorkflowGenerator g, JArray imgIn) g.CurrentMedia = decoded.EncodeToLatent(g.CurrentVae, "25"); } } - if (doUspcale && upscaleMethod.StartsWith("latent-")) + if (doUpscale && upscaleMethod.StartsWith("latent-")) { g.CurrentMedia = g.CurrentMedia.AsLatentImage(g.CurrentVae); g.CreateNode("LatentUpscaleBy", new JObject() @@ -1530,7 +1534,7 @@ JArray doMaskShrinkApply(WorkflowGenerator g, JArray imgIn) g.CurrentMedia.Width = width; g.CurrentMedia.Height = height; } - else if (doUspcale && upscaleMethod.StartsWith("latentmodel-")) + else if (doUpscale && upscaleMethod.StartsWith("latentmodel-")) { g.CreateNode("LatentUpscaleModelLoader", new JObject() { @@ -1598,6 +1602,65 @@ JArray doMaskShrinkApply(WorkflowGenerator g, JArray imgIn) explicitSampler: explicitSampler, explicitScheduler: explicitScheduler, sectionId: T2IParamInput.SectionID_Refiner); g.CurrentMedia = g.CurrentMedia.WithPath(["23", 0]); g.IsRefinerStage = false; + if (doUpscale && upscaleMethod.StartsWith("pidmodel-")) + { + string pidModelName = upscaleMethod.After("pidmodel-"); + string pidMatched = T2IParamTypes.GetBestModelInList(pidModelName, Program.MainSDModels.ListModelNamesFor(g.UserInput.SourceSession)); + if (pidMatched is not null && pidMatched.EndsWith(".safetensors")) + { + pidMatched = pidMatched.BeforeLast('.'); + } + T2IModel pidModel = pidMatched is null ? null : Program.MainSDModels.GetModel(pidMatched); + if (pidModel is null || pidModel.ModelClass?.CompatClass?.ID != "pid") + { + throw new SwarmUserErrorException($"Refiner Upscale Method is set to PiD model '{pidModelName}', but that model could not be found or is not a valid PiD model."); + } + string pidLatentFormat = g.IsSD3() ? "sd3" : (g.IsFlux() || g.IsAnyFlux2() || g.IsZImage() || g.IsZetaChroma()) ? "flux" : null; + if (pidLatentFormat is null) + { + throw new SwarmUserErrorException($"PiD model requires the refiner model's VAE to be Flux.1, Flux.2, or SD3, but model '{refineModel.Name}' is '{refineModel.ModelClass?.CompatClass?.ID ?? "unknown"}'."); + } + JArray refinedLatent = g.CurrentMedia.Path; + int pidWidth = g.UserInput.GetImageWidth() * 4; + int pidHeight = g.UserInput.GetImageHeight() * 4; + pidWidth = (pidWidth / 16) * 16; + pidHeight = (pidHeight / 16) * 16; + T2IModel refinerFinalModel = g.FinalLoadedModel; + List refinerFinalModelList = g.FinalLoadedModelList; + g.FinalLoadedModel = pidModel; + g.FinalLoadedModelList = [pidModel]; + g.NoVAEOverride = true; + g.IsPixelDecoderStage = true; + (g.FinalLoadedModel, g.CurrentModel, g.CurrentTextEnc, g.CurrentVae) = g.CreateModelLoader(pidModel, "PixelDecoder", sectionId: T2IParamInput.SectionID_PixelDecoder); + g.IsPixelDecoderStage = false; + g.NoVAEOverride = false; + JArray pidPos = g.CreateConditioning(g.UserInput.Get(T2IParamTypes.Prompt), g.CurrentTextEnc.Path, g.FinalLoadedModel, true, isPixelDecoder: true); + JArray pidNeg = g.CreateConditioning(g.UserInput.Get(T2IParamTypes.NegativePrompt), g.CurrentTextEnc.Path, g.FinalLoadedModel, false, isPixelDecoder: true); + string pidCond = g.CreateNode("PiDConditioning", new JObject() + { + ["positive"] = pidPos, + ["latent"] = refinedLatent, + ["latent_format"] = pidLatentFormat, + ["degrade_sigma"] = 0.0 + }); + string pidEmptyLatent = g.CreateNode("EmptyChromaRadianceLatentImage", new JObject() + { + ["batch_size"] = g.UserInput.Get(T2IParamTypes.BatchSize, 1), + ["width"] = pidWidth, + ["height"] = pidHeight + }); + int pidSteps = g.UserInput.GetNullable(T2IParamTypes.Steps, T2IParamInput.SectionID_PixelDecoder, false) ?? 4; + double pidCfg = g.UserInput.GetNullable(T2IParamTypes.CFGScale, T2IParamInput.SectionID_PixelDecoder, false) ?? 1.0; + string pidSampler = g.UserInput.Get(ComfyUIBackendExtension.SamplerParam, null, sectionId: T2IParamInput.SectionID_PixelDecoder, includeBase: false); + string pidScheduler = g.UserInput.Get(ComfyUIBackendExtension.SchedulerParam, null, sectionId: T2IParamInput.SectionID_PixelDecoder, includeBase: false); + string pidSampled = g.CreateKSampler(g.CurrentModel.Path, [pidCond, 0], pidNeg, [pidEmptyLatent, 0], pidCfg, pidSteps, 0, 10000, + g.UserInput.Get(T2IParamTypes.Seed) + 2, false, true, defsampler: "lcm", defscheduler: "simple", explicitSampler: pidSampler, explicitScheduler: pidScheduler, sectionId: T2IParamInput.SectionID_PixelDecoder); + g.CurrentMedia = g.CurrentMedia.WithPath([pidSampled, 0], WGNodeData.DT_LATENT_IMAGE, pidModel.ModelClass?.CompatClass); + g.CurrentMedia.Width = pidWidth; + g.CurrentMedia.Height = pidHeight; + g.FinalLoadedModel = refinerFinalModel; + g.FinalLoadedModelList = refinerFinalModelList; + } } }, -4); #endregion diff --git a/src/Text2Image/T2IModelClassSorter.cs b/src/Text2Image/T2IModelClassSorter.cs index ac2f45862..df9d24c4c 100644 --- a/src/Text2Image/T2IModelClassSorter.cs +++ b/src/Text2Image/T2IModelClassSorter.cs @@ -72,6 +72,8 @@ public static T2IModelCompatClass CompatAnima = RegisterCompat(new() { ID = "anima", ShortCode = "Anima", LorasTargetTextEnc = false }), CompatHiDreamO1 = RegisterCompat(new() { ID = "hidream-o1", ShortCode = "HiDrO1", LorasTargetTextEnc = false }), CompatLens = RegisterCompat(new() { ID = "lens", ShortCode = "Lens", LorasTargetTextEnc = false }), + CompatPiD = RegisterCompat(new() { ID = "pid", ShortCode = "PiD", LorasTargetTextEnc = false }), + CompatPixelDiT = RegisterCompat(new() { ID = "pixeldit", ShortCode = "PixDiT", LorasTargetTextEnc = false }), // Audio models CompatAceStep15 = RegisterCompat(new() { ID = "ace-step-1_5", ShortCode = "Ace15", IsAudioModel = true }), // Obscure old random ones @@ -206,6 +208,8 @@ bool isZImageLora(JObject h) => (hasLoraKey(h, "layers.0.adaLN_modulation.0") && bool isHiDreamO1Lora(JObject h) => hasLoraKey(h, "final_layer2.linear") && hasLoraKey(h, "language_model.layers.0.self_attn.q_proj"); bool isChroma(JObject h) => h.ContainsKey("distilled_guidance_layer.in_proj.bias") && h.ContainsKey("double_blocks.0.img_attn.proj.bias"); bool isChromaRadiance(JObject h) => h.ContainsKey("nerf_image_embedder.embedder.0.bias"); + bool isPiD(JObject h) => h.ContainsKey("net.lq_proj.latent_proj.0.weight") && h.ContainsKey("net.pixel_blocks.0.attn.q_norm.weight") && h.ContainsKey("net.pixel_blocks.0.compress_to_attn.weight"); + bool isPixelDiT(JObject h) => h.ContainsKey("core.pixel_embedder.proj.weight") && h.ContainsKey("core.pixel_blocks.0.attn.q_norm.weight") && h.ContainsKey("core.pixel_blocks.0.compress_to_attn.weight") && !isPiD(h); bool isOmniGen(JObject h) => h.ContainsKey("time_caption_embed.timestep_embedder.linear_2.weight") && h.ContainsKey("context_refiner.0.attn.norm_k.weight"); bool isQwenImage(JObject h) => (h.ContainsKey("time_text_embed.timestep_embedder.linear_1.bias") && h.ContainsKey("img_in.bias") && (h.ContainsKey("transformer_blocks.0.attn.add_k_proj.bias") || h.ContainsKey("transformer_blocks.0.attn.add_qkv_proj.bias"))) || (h.ContainsKey("model.diffusion_model.time_text_embed.timestep_embedder.linear_1.bias") && h.ContainsKey("model.diffusion_model.img_in.bias") && (h.ContainsKey("model.diffusion_model.transformer_blocks.0.attn.add_k_proj.bias") || h.ContainsKey("model.diffusion_model.transformer_blocks.0.attn.add_qkv_proj.bias"))); @@ -705,6 +709,15 @@ JToken GetEmbeddingKey(JObject h) { return isChroma(h) && isChromaRadiance(h); }}); + // ====================== PixelDiT / PiD ====================== + Register(new() { ID = "pid", CompatClass = CompatPiD, Name = "PiD", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) => + { + return isPiD(h); + }}); + Register(new() { ID = "pixeldit", CompatClass = CompatPixelDiT, Name = "PixelDiT", StandardWidth = 1024, StandardHeight = 1024, IsThisModelOfClass = (m, h) => + { + return isPixelDiT(h); + }}); Register(new() { ID = "alt_diffusion_v1_512_placeholder", CompatClass = CompatAltDiffusion, Name = "Alt-Diffusion", StandardWidth = 512, StandardHeight = 512, IsThisModelOfClass = (m, h) => { return IsAlt(h); diff --git a/src/Text2Image/T2IParamInput.cs b/src/Text2Image/T2IParamInput.cs index dc8ada4f5..a8ce3b084 100644 --- a/src/Text2Image/T2IParamInput.cs +++ b/src/Text2Image/T2IParamInput.cs @@ -13,7 +13,7 @@ namespace SwarmUI.Text2Image; public class T2IParamInput { /// Core section ID numbers. - public static int SectionID_BaseOnly = 5, SectionID_Refiner = 1, SectionID_Video = 2, SectionID_VideoSwap = 3; + public static int SectionID_BaseOnly = 5, SectionID_Refiner = 1, SectionID_Video = 2, SectionID_VideoSwap = 3, SectionID_PixelDecoder = 4; /// Parameter IDs that must be loaded early on, eg extracted from presets in prompts early. Primarily things that affect backend selection. public static readonly string[] ParamsMustLoadEarly = ["model", "images", "internalbackendtype", "exactbackendid"]; diff --git a/src/Text2Image/T2IPromptHandling.cs b/src/Text2Image/T2IPromptHandling.cs index 669e9c70a..723af0ff6 100644 --- a/src/Text2Image/T2IPromptHandling.cs +++ b/src/Text2Image/T2IPromptHandling.cs @@ -598,6 +598,12 @@ static string estimateAsSectionBreak(string data, PromptTagContext context) return $""; }; PromptTagLengthEstimators["refiner"] = estimateAsSectionBreak; + PromptTagBasicProcessors["pixeldecoder"] = (data, context) => + { + context.SectionID = T2IParamInput.SectionID_PixelDecoder; + return $""; + }; + PromptTagLengthEstimators["pixeldecoder"] = estimateAsSectionBreak; PromptTagBasicProcessors["video"] = (data, context) => { context.SectionID = T2IParamInput.SectionID_Video; diff --git a/src/Utils/PromptRegion.cs b/src/Utils/PromptRegion.cs index 77a031a3e..b3fc5f402 100644 --- a/src/Utils/PromptRegion.cs +++ b/src/Utils/PromptRegion.cs @@ -13,6 +13,8 @@ public class PromptRegion public string RefinerPrompt = ""; + public string PixelDecoderPrompt = ""; + public string VideoPrompt = ""; public string VideoSwapPrompt = ""; @@ -26,7 +28,7 @@ public enum PartType public static HashSet CustomPartPrefixes = []; /// List of all prefixes for parts. Use to add to this. - public static List PartPrefixes = [" PartPrefixes = ["Custom Extensions can add new prompt part types here. /// For example, this will add prompt parsing for <example> or <example:somedata> or etc: @@ -129,6 +131,12 @@ public PromptRegion(string prompt) addMore = s => RefinerPrompt += s; continue; } + else if (prefix == "pixeldecoder") + { + PixelDecoderPrompt += content; + addMore = s => PixelDecoderPrompt += s; + continue; + } else if (prefix == "video") { VideoPrompt += content; diff --git a/src/wwwroot/js/genpage/gentab/prompttools.js b/src/wwwroot/js/genpage/gentab/prompttools.js index b688932d1..cfaa14972 100644 --- a/src/wwwroot/js/genpage/gentab/prompttools.js +++ b/src/wwwroot/js/genpage/gentab/prompttools.js @@ -137,6 +137,9 @@ class PromptTabCompleteClass { this.registerPrefix('refiner', 'Add a section of prompt text that is only used for the Refine/Upscale pass.', (prefix) => { return []; }, true); + this.registerPrefix('pixeldecoder', 'Add a section of prompt text that is only used for the PiD pixel-decoder upscale pass.', (prefix) => { + return []; + }, true); this.registerPrefix('video', 'Add a section of prompt text that replaces the prompt for the image-to-video generation pass.', (prefix) => { return []; }, true);