-
Notifications
You must be signed in to change notification settings - Fork 254
Add LoRA handling for image generation #4084
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
3ddabdc
b5ed7d8
c2ba9d8
af51476
5f89fc2
102f8af
330374f
0a65e68
eac3932
f4be12a
a2476ee
15dd08a
7e392c6
eb34c6e
9da0860
4d24c22
dec1959
f10b657
4fe7c75
0ef2ebe
519ea61
c288736
b1d2ca6
453f712
f6d52f4
8e36a01
a241d7f
59eaad2
33e7a85
d76da73
65e6bd2
77c56ed
525be69
a705a22
34ce570
e38114a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -86,6 +86,13 @@ def add_common_arguments(parser): | |
| parser_image_generation.add_argument('--max_num_images_per_prompt', type=int, default=0, help='Max allowed number of images client is allowed to request for a given prompt', dest='max_num_images_per_prompt') | ||
| parser_image_generation.add_argument('--default_num_inference_steps', type=int, default=0, help='Default number of inference steps when not specified by client', dest='default_num_inference_steps') | ||
| parser_image_generation.add_argument('--max_num_inference_steps', type=int, default=0, help='Max allowed number of inference steps client is allowed to request for a given prompt', dest='max_num_inference_steps') | ||
| parser_image_generation.add_argument('--source_loras', default=None, | ||
| help='LoRA adapters to apply. Format: alias1=org1/repo1[:alpha],alias2=org2/repo2[@file.safetensors][:alpha],' | ||
| 'composite=@alias1:alpha+@alias2:alpha. ' | ||
| '@filename specifies which .safetensors file (auto-detected when repo has exactly one). ' | ||
| ':alpha sets adapter weight (default 1.0). ' | ||
| 'Composite entries (source starts with @) blend multiple adapters. Only for image_generation task.', | ||
| dest='source_loras') | ||
|
|
||
| parser_text2speech = subparsers.add_parser('text2speech', help='export model for text2speech endpoint') | ||
| add_common_arguments(parser_text2speech) | ||
|
|
@@ -339,6 +346,17 @@ def add_common_arguments(parser): | |
| default_num_inference_steps: {{default_num_inference_steps}},{% endif %} | ||
| {%- if max_num_inference_steps > 0 %} | ||
| max_num_inference_steps: {{max_num_inference_steps}},{% endif %} | ||
| {%- for lora in lora_adapters %} | ||
| lora_adapters { alias: "{{lora.alias}}" path: "{{lora.path}}"{% if lora.alpha is not none %} alpha: {{lora.alpha}}{% endif %} mode: DYNAMIC } | ||
| {%- endfor %} | ||
| {%- for composite in composite_lora_adapters %} | ||
| composite_lora_adapters { | ||
| alias: "{{composite.alias}}" | ||
| {%- for comp in composite.components %} | ||
| components { adapter_alias: "{{comp.adapter_alias}}"{% if comp.alpha != 1.0 %} alpha: {{comp.alpha}}{% endif %} } | ||
| {%- endfor %} | ||
| } | ||
| {%- endfor %} | ||
| } | ||
| } | ||
| }""" | ||
|
|
@@ -616,7 +634,7 @@ def export_rerank_model(model_repository_path, source_model, model_name, precisi | |
| add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path))) | ||
|
|
||
|
|
||
| def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams): | ||
| def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams, source_loras): | ||
| model_path = "./" | ||
| target_path = os.path.join(model_repository_path, model_name) | ||
| model_index_path = os.path.join(target_path, 'model_index.json') | ||
|
|
@@ -629,6 +647,74 @@ def export_image_generation_model(model_repository_path, source_model, model_nam | |
| if os.system(optimum_command): | ||
| raise ValueError("Failed to export image generation model", source_model) | ||
|
|
||
| # Download and resolve LoRA adapters | ||
| lora_adapters = [] | ||
| composite_lora_adapters = [] | ||
| if source_loras: | ||
| from huggingface_hub import snapshot_download | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I dont think its good practice to import it here, move to the top?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Moved to top of function as it is consistent within export models. |
||
| entries = source_loras.split(',') | ||
| for entry in entries: | ||
| entry = entry.strip() | ||
| if '=' in entry: | ||
| alias, source = entry.split('=', 1) | ||
| else: | ||
| source = entry | ||
| alias = entry.split('/')[-1] if '/' in entry else entry | ||
|
|
||
| # Composite LoRA: source starts with @ | ||
| if source.startswith('@'): | ||
| components = [] | ||
| for comp_token in source.split('+'): | ||
| comp_token = comp_token.strip().lstrip('@') | ||
| if ':' in comp_token: | ||
| ref, alpha_str = comp_token.rsplit(':', 1) | ||
| alpha = float(alpha_str) | ||
| else: | ||
| ref = comp_token | ||
| alpha = 1.0 | ||
| components.append({'adapter_alias': ref, 'alpha': alpha}) | ||
| composite_lora_adapters.append({'alias': alias, 'components': components}) | ||
| print(f"Composite LoRA: {alias} -> {components}") | ||
| continue | ||
|
|
||
| # Parse optional alpha (trailing :float after repo or filename) | ||
| alpha = None | ||
| repo_and_file = source | ||
| # Check for alpha suffix: alias=org/repo:0.8 or alias=org/repo@file.safetensors:0.8 | ||
| if ':' in repo_and_file: | ||
| last_colon = repo_and_file.rfind(':') | ||
| potential_alpha = repo_and_file[last_colon + 1:] | ||
| try: | ||
| alpha = float(potential_alpha) | ||
| repo_and_file = repo_and_file[:last_colon] | ||
| except ValueError: | ||
| pass # Not an alpha suffix (could be part of URL) | ||
|
|
||
| safetensors_file = '' | ||
| if '@' in repo_and_file: | ||
| repo, safetensors_file = repo_and_file.rsplit('@', 1) | ||
| else: | ||
| repo = repo_and_file | ||
| lora_dir = os.path.join(target_path, 'loras', repo) | ||
| if not os.path.isdir(lora_dir): | ||
| print(f"Downloading LoRA adapter: {repo} to {lora_dir}") | ||
| snapshot_download(repo_id=repo, local_dir=lora_dir) | ||
| else: | ||
| print(f"LoRA adapter directory already exists: {lora_dir}") | ||
| if not safetensors_file: | ||
| st_files = [f for f in os.listdir(lora_dir) if f.endswith('.safetensors')] | ||
| if len(st_files) == 0: | ||
| raise ValueError(f"No .safetensors files found in LoRA adapter: {repo}") | ||
| if len(st_files) > 1: | ||
| raise ValueError(f"Multiple .safetensors files in LoRA adapter: {repo}. Use @filename to specify.") | ||
| safetensors_file = st_files[0] | ||
| lora_path = 'loras/' + repo + '/' + safetensors_file | ||
| lora_entry = {'alias': alias, 'path': lora_path, 'alpha': alpha} | ||
| lora_adapters.append(lora_entry) | ||
| print(f"LoRA adapter: {alias} -> {lora_path}" + (f" (alpha={alpha})" if alpha else "")) | ||
| task_parameters['lora_adapters'] = lora_adapters | ||
| task_parameters['composite_lora_adapters'] = composite_lora_adapters | ||
|
|
||
| plugin_config = {} | ||
| assert num_streams >= 0, "num_streams should be a non-negative integer" | ||
| if num_streams > 0: | ||
|
|
@@ -711,4 +797,4 @@ def export_image_generation_model(model_repository_path, source_model, model_nam | |
| 'max_num_inference_steps', | ||
| 'extra_quantization_params' | ||
| ]} | ||
| export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams']) | ||
| export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'], args['source_loras']) | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -397,7 +397,7 @@ A single servable exposes the following endpoints: | |||||
|
|
||||||
| > **Note:** Inpainting/outpainting requests are processed sequentially — concurrent requests will be queued. | ||||||
|
|
||||||
| > **Note:** For inpainting/outpainting, dedicated inpainting models (e.g. `stable-diffusion-v1-5/stable-diffusion-inpainting`) only support the `images/edits` endpoint. Check [supported models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models). | ||||||
| > **Note:** Dedicated inpainting models (e.g. `stable-diffusion-v1-5/stable-diffusion-inpainting`) only support the `images/edits` endpoint — they cannot be used for text-to-image generation via `images/generations`. General-purpose models (e.g. SDXL) support both endpoints. Check [supported models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models). | ||||||
|
|
||||||
| All requests are processed in unary format, with no streaming capabilities. | ||||||
|
|
||||||
|
|
@@ -528,6 +528,12 @@ Output file (`edit_output.png`): | |||||
|
|
||||||
| Inpainting replaces a masked region in an image based on the prompt. The `mask` is a black-and-white image where white pixels mark the area to repaint. | ||||||
|
|
||||||
| Download sample images: | ||||||
| ```console | ||||||
| curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat.png | ||||||
| curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat_mask.png | ||||||
| ``` | ||||||
|
|
||||||
|   | ||||||
|
|
||||||
| ::::{tab-set} | ||||||
|
|
@@ -599,6 +605,12 @@ Outpainting extends an image beyond its original borders. Prepare two images: | |||||
| - **outpaint_input.png** — the original image centered on a larger canvas (e.g. 768×768) with black borders | ||||||
| - **outpaint_mask.png** — white where the new content should be generated (the borders), black where the original image is | ||||||
|
|
||||||
| Download sample images: | ||||||
| ```console | ||||||
| curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_input.png | ||||||
| curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_mask.png | ||||||
| ``` | ||||||
|
|
||||||
|   | ||||||
|
|
||||||
| ::::{tab-set} | ||||||
|
|
@@ -718,6 +730,190 @@ ovms --rest_port 8000 ^ | |||||
|
|
||||||
| Please follow [OpenVINO notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/image-to-image-genai/image-to-image-genai.ipynb) to understand how other parameters affect editing. | ||||||
|
|
||||||
| ## Multi-LoRA Image Generation | ||||||
|
|
||||||
| This section demonstrates how to serve multiple LoRA adapters with a single SDXL base model, enabling per-request style selection. This replicates the [Multi LoRA Image Generation notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/multilora-image-generation/multilora-image-generation.ipynb) but using OVMS for serving. | ||||||
|
|
||||||
| ### Start Server with Multiple LoRA Adapters | ||||||
|
|
||||||
| The following command starts OVMS with Stable Diffusion XL and 5 LoRA adapters for different artistic styles: | ||||||
|
|
||||||
| ::::{tab-set} | ||||||
| :::{tab-item} Docker (Linux) | ||||||
| :sync: docker | ||||||
| ```bash | ||||||
| mkdir -p models | ||||||
|
|
||||||
| docker run -d --rm --user $(id -u):$(id -g) -p 8000:8000 -v $(pwd)/models:/models/:rw \ | ||||||
| -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy \ | ||||||
| openvino/model_server:latest \ | ||||||
| --rest_port 8000 \ | ||||||
| --model_repository_path /models/ \ | ||||||
| --task image_generation \ | ||||||
| --source_model stabilityai/stable-diffusion-xl-base-1.0 \ | ||||||
| --source_loras "xray=DoctorDiffusion/doctor-diffusion-s-xray-xl-lora@DD-xray-v1.safetensors,thepoint=alvdansen/the-point@araminta_k_the_point.safetensors,ukiyo=KappaNeuro/ukiyo-e-art@Ukiyo-e Art.safetensors,vector=DoctorDiffusion/doctor-diffusion-s-controllable-vector-art-xl-lora@DD-vector-v2.safetensors,chalk=Norod78/sdxl-chalkboarddrawing-lora@SDXL_ChalkBoardDrawing_LoRA_r8.safetensors" | ||||||
| ``` | ||||||
| ::: | ||||||
|
|
||||||
| :::{tab-item} Bare metal (Windows) | ||||||
| :sync: bare-metal | ||||||
| ```bat | ||||||
| mkdir models | ||||||
|
|
||||||
| ovms --rest_port 8000 ^ | ||||||
| --model_repository_path ./models/ ^ | ||||||
| --task image_generation ^ | ||||||
| --source_model stabilityai/stable-diffusion-xl-base-1.0 ^ | ||||||
| --source_loras "xray=DoctorDiffusion/doctor-diffusion-s-xray-xl-lora@DD-xray-v1.safetensors,thepoint=alvdansen/the-point@araminta_k_the_point.safetensors,ukiyo=KappaNeuro/ukiyo-e-art@Ukiyo-e Art.safetensors,vector=DoctorDiffusion/doctor-diffusion-s-controllable-vector-art-xl-lora@DD-vector-v2.safetensors,chalk=Norod78/sdxl-chalkboarddrawing-lora@SDXL_ChalkBoardDrawing_LoRA_r8.safetensors" | ||||||
| ``` | ||||||
| ::: | ||||||
|
|
||||||
| :::: | ||||||
|
|
||||||
| The registered adapters and their recommended use: | ||||||
|
|
||||||
| | Alias | Repository | Style | Recommended Weight | Prompt Template | | ||||||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| |-------|-----------|-------|-------------------|-----------------| | ||||||
| | `xray` | DoctorDiffusion/doctor-diffusion-s-xray-xl-lora | X-Ray style | 0.8 | `xray <subject>` | | ||||||
| | `thepoint` | alvdansen/the-point | Artistic illustration | 0.6 | `<subject>` | | ||||||
| | `ukiyo` | KappaNeuro/ukiyo-e-art | Ukiyo-e Japanese art | 0.8 | `an illustration of <subject> in Ukiyo-e Art style` | | ||||||
| | `vector` | DoctorDiffusion/doctor-diffusion-s-controllable-vector-art-xl-lora | Vector art | 0.8 | `vector <subject>` | | ||||||
| | `chalk` | Norod78/sdxl-chalkboarddrawing-lora | Chalkboard drawing | 0.45 | `A colorful chalkboard drawing of <subject>` | | ||||||
|
|
||||||
| ### Generate Images with Different Styles | ||||||
|
|
||||||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add example images. |
||||||
| Use the adapter alias as the `model` field to select which adapter to apply per request. The adapter is activated via **model name routing** — when the `model` field matches a registered LoRA alias, that adapter is automatically applied. | ||||||
|
|
||||||
| **X-Ray style:** | ||||||
| ```bash | ||||||
| curl http://localhost:8000/v3/images/generations \ | ||||||
| -H "Content-Type: application/json" \ | ||||||
| -d '{ | ||||||
| "model": "xray", | ||||||
| "prompt": "xray a cute cat in sunglasses", | ||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024" | ||||||
| }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat.png | ||||||
| ``` | ||||||
|
|
||||||
| **Ukiyo-e Japanese art:** | ||||||
| ```bash | ||||||
| curl http://localhost:8000/v3/images/generations \ | ||||||
| -H "Content-Type: application/json" \ | ||||||
| -d '{ | ||||||
| "model": "ukiyo", | ||||||
| "prompt": "an illustration of a cute cat in sunglasses in Ukiyo-e Art style", | ||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024" | ||||||
| }' | jq -r '.data[0].b64_json' | base64 --decode > ukiyo_cat.png | ||||||
| ``` | ||||||
|
|
||||||
| **Chalkboard drawing:** | ||||||
| ```bash | ||||||
| curl http://localhost:8000/v3/images/generations \ | ||||||
| -H "Content-Type: application/json" \ | ||||||
| -d '{ | ||||||
| "model": "chalk", | ||||||
| "prompt": "A colorful chalkboard drawing of a cute cat in sunglasses", | ||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024" | ||||||
| }' | jq -r '.data[0].b64_json' | base64 --decode > chalk_cat.png | ||||||
| ``` | ||||||
|
|
||||||
| Optionally override the adapter alpha using `lora_alphas`: | ||||||
| ```bash | ||||||
| curl http://localhost:8000/v3/images/generations \ | ||||||
| -H "Content-Type: application/json" \ | ||||||
| -d '{ | ||||||
| "model": "xray", | ||||||
| "prompt": "xray a cute cat in sunglasses", | ||||||
| "lora_alphas": {"xray": 0.5}, | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. what if someone requests base model and defines lora_alphas of xray 0.5 in request? |
||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024" | ||||||
| }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat_half_weight.png | ||||||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| ``` | ||||||
| ### Using OpenAI Python Client with LoRA | ||||||
|
|
||||||
| ```python | ||||||
| from openai import OpenAI | ||||||
| import base64 | ||||||
| from io import BytesIO | ||||||
| from PIL import Image | ||||||
|
|
||||||
| client = OpenAI( | ||||||
| base_url="http://localhost:8000/v3", | ||||||
| api_key="unused" | ||||||
| ) | ||||||
|
|
||||||
| # Define LoRA styles — the adapter alias is used as the model name | ||||||
| styles = { | ||||||
| "xray": {"prompt": "xray {subject}"}, | ||||||
| "thepoint": {"prompt": "{subject}"}, | ||||||
| "ukiyo": {"prompt": "an illustration of {subject} in Ukiyo-e Art style"}, | ||||||
| "vector": {"prompt": "vector {subject}"}, | ||||||
| "chalk": {"prompt": "A colorful chalkboard drawing of {subject}"}, | ||||||
| } | ||||||
|
|
||||||
| subject = "a cute cat in sunglasses" | ||||||
|
|
||||||
| for style_name, style_config in styles.items(): | ||||||
| prompt = style_config["prompt"].format(subject=subject) | ||||||
| response = client.images.generate( | ||||||
| model=style_name, # adapter alias activates the LoRA | ||||||
| prompt=prompt, | ||||||
| extra_body={ | ||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024", | ||||||
| } | ||||||
| ) | ||||||
| image_data = base64.b64decode(response.data[0].b64_json) | ||||||
| image = Image.open(BytesIO(image_data)) | ||||||
| image.save(f'{style_name}_cat.png') | ||||||
| print(f"Saved {style_name}_cat.png") | ||||||
| ``` | ||||||
|
|
||||||
| ### Blending Multiple Adapters | ||||||
|
|
||||||
| To blend multiple adapters, define a **composite adapter** at startup using the `@alias:weight` syntax: | ||||||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. weight->alpha |
||||||
|
|
||||||
| ```bash | ||||||
| --source_loras="xray=...,ukiyo=...,blend=@xray:0.5+@ukiyo:0.4" | ||||||
| ``` | ||||||
|
|
||||||
| Then use the composite alias as the model name: | ||||||
| ```python | ||||||
| response = client.images.generate( | ||||||
| model="blend", # activates both xray and ukiyo | ||||||
| prompt="a cute cat in sunglasses", | ||||||
| extra_body={ | ||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024", | ||||||
| } | ||||||
| ) | ||||||
| ``` | ||||||
|
|
||||||
| You can override individual component weights at request time: | ||||||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| ```python | ||||||
| response = client.images.generate( | ||||||
| model="blend", | ||||||
| prompt="a cute cat in sunglasses", | ||||||
| extra_body={ | ||||||
| "lora_alphas": {"xray": 0.8, "ukiyo": 0.2}, | ||||||
| "num_inference_steps": 20, | ||||||
| "guidance_scale": 0.0, | ||||||
| "size": "1024x1024", | ||||||
| } | ||||||
| ) | ||||||
| ``` | ||||||
|
|
||||||
| > **Note:** For more details on LoRA adapter configuration, see the [Image Generation reference documentation](../../docs/image_generation/reference.md#lora-adapters). | ||||||
|
|
||||||
| ## References | ||||||
| - [Image Generation API](../../docs/model_server_rest_api_image_generation.md) | ||||||
| - [Image Edit API](../../docs/model_server_rest_api_image_edit.md) | ||||||
|
|
||||||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.