openvinotoolkit · atobiszei · Mar 2, 2026 · Mar 9, 2026 · Mar 11, 2026 · Mar 13, 2026
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -65,7 +65,10 @@ When analyzing a Pull Request, follow this protocol:
     - **Keep headers self-contained but minimal**: each header must compile on its own, but should not pull in transitive dependencies that callers don't need.
     - **Prefer opaque types / Pimpl**: for complex implementation details, consider the Pimpl idiom to keep implementation-only types out of the public header entirely.
     - **Never include a header solely for a typedef or enum**: forward-declare the enum (`enum class Foo;` in C++17) or relocate the typedef to a lightweight `fwd.hpp`-style header.
-13. Be mindful when accepting `const T&` in constructors or functions that store the reference: verify that the referenced object's lifetime outlives the usage to avoid dangling references.
+13. **No dangling references or temporaries bound to `const T&`**:
+    - Never use `const T&` parameters with default arguments that construct temporaries (e.g. `const std::string& param = ""`). This binds a reference to a temporary — use a function overload instead, or pass by value.
+    - When accepting `const T&` in constructors or functions that store the reference, verify that the referenced object's lifetime outlives the usage to avoid dangling references.
+    - Prefer overloads over default arguments for non-trivial types passed by reference.
 
 ## Build System
 

diff --git a/demos/common/export_models/export_model.py b/demos/common/export_models/export_model.py
@@ -86,6 +86,13 @@ def add_common_arguments(parser):
 parser_image_generation.add_argument('--max_num_images_per_prompt', type=int, default=0, help='Max allowed number of images client is allowed to request for a given prompt', dest='max_num_images_per_prompt')
 parser_image_generation.add_argument('--default_num_inference_steps', type=int, default=0, help='Default number of inference steps when not specified by client', dest='default_num_inference_steps')
 parser_image_generation.add_argument('--max_num_inference_steps', type=int, default=0, help='Max allowed number of inference steps client is allowed to request for a given prompt', dest='max_num_inference_steps')
+parser_image_generation.add_argument('--source_loras', default=None,
+    help='LoRA adapters to apply. Format: alias1=org1/repo1[:alpha],alias2=org2/repo2[@file.safetensors][:alpha],'
+         'composite=@alias1:alpha+@alias2:alpha. '
-         'composite=@alias1:alpha+@alias2:alpha. '
+         'composite=@alias1:alpha+@alias2:alpha '
-         'composite=@alias1:alpha+@alias2:alpha. '
+         'composite=@alias1:alpha+@alias2:alpha '
+         '@filename specifies which .safetensors file (auto-detected when repo has exactly one). '
+         ':alpha sets adapter weight (default 1.0). '
+         'Composite entries (source starts with @) blend multiple adapters. Only for image_generation task.',
+    dest='source_loras')
 
 parser_text2speech = subparsers.add_parser('text2speech', help='export model for text2speech endpoint')
 add_common_arguments(parser_text2speech)
@@ -339,6 +346,17 @@ def add_common_arguments(parser):
       default_num_inference_steps: {{default_num_inference_steps}},{% endif %}
       {%- if max_num_inference_steps > 0 %}
       max_num_inference_steps: {{max_num_inference_steps}},{% endif %}
+      {%- for lora in lora_adapters %}
+      lora_adapters { alias: "{{lora.alias}}" path: "{{lora.path}}"{% if lora.alpha is not none %} alpha: {{lora.alpha}}{% endif %} mode: DYNAMIC }
+      {%- endfor %}
+      {%- for composite in composite_lora_adapters %}
+      composite_lora_adapters {
+            alias: "{{composite.alias}}"
+      {%- for comp in composite.components %}
+            components { adapter_alias: "{{comp.adapter_alias}}"{% if comp.alpha != 1.0 %} alpha: {{comp.alpha}}{% endif %} }
+      {%- endfor %}
+          }
+      {%- endfor %}
     }
   }
 }"""
@@ -616,7 +634,7 @@ def export_rerank_model(model_repository_path, source_model, model_name, precisi
     add_servable_to_config(config_file_path, model_name, os.path.relpath(os.path.join(model_repository_path, model_name), os.path.dirname(config_file_path)))
 
 
-def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams):
+def export_image_generation_model(model_repository_path, source_model, model_name, precision, task_parameters, config_file_path, num_streams, source_loras):
     model_path = "./"
     target_path = os.path.join(model_repository_path, model_name)
     model_index_path = os.path.join(target_path, 'model_index.json')
@@ -629,6 +647,74 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
         if os.system(optimum_command):
             raise ValueError("Failed to export image generation model", source_model)
 
+    # Download and resolve LoRA adapters
+    lora_adapters = []
+    composite_lora_adapters = []
+    if source_loras:
+        from huggingface_hub import snapshot_download
+        entries = source_loras.split(',')
+        for entry in entries:
+            entry = entry.strip()
+            if '=' in entry:
+                alias, source = entry.split('=', 1)
+            else:
+                source = entry
+                alias = entry.split('/')[-1] if '/' in entry else entry
+
+            # Composite LoRA: source starts with @
+            if source.startswith('@'):
+                components = []
+                for comp_token in source.split('+'):
+                    comp_token = comp_token.strip().lstrip('@')
+                    if ':' in comp_token:
+                        ref, alpha_str = comp_token.rsplit(':', 1)
+                        alpha = float(alpha_str)
+                    else:
+                        ref = comp_token
+                        alpha = 1.0
+                    components.append({'adapter_alias': ref, 'alpha': alpha})
+                composite_lora_adapters.append({'alias': alias, 'components': components})
+                print(f"Composite LoRA: {alias} -> {components}")
+                continue
+
+            # Parse optional alpha (trailing :float after repo or filename)
+            alpha = None
+            repo_and_file = source
+            # Check for alpha suffix: alias=org/repo:0.8 or alias=org/repo@file.safetensors:0.8
+            if ':' in repo_and_file:
+                last_colon = repo_and_file.rfind(':')
+                potential_alpha = repo_and_file[last_colon + 1:]
+                try:
+                    alpha = float(potential_alpha)
+                    repo_and_file = repo_and_file[:last_colon]
+                except ValueError:
+                    pass  # Not an alpha suffix (could be part of URL)
+
+            safetensors_file = ''
+            if '@' in repo_and_file:
+                repo, safetensors_file = repo_and_file.rsplit('@', 1)
+            else:
+                repo = repo_and_file
+            lora_dir = os.path.join(target_path, 'loras', repo)
+            if not os.path.isdir(lora_dir):
+                print(f"Downloading LoRA adapter: {repo} to {lora_dir}")
+                snapshot_download(repo_id=repo, local_dir=lora_dir)
+            else:
+                print(f"LoRA adapter directory already exists: {lora_dir}")
+            if not safetensors_file:
+                st_files = [f for f in os.listdir(lora_dir) if f.endswith('.safetensors')]
+                if len(st_files) == 0:
+                    raise ValueError(f"No .safetensors files found in LoRA adapter: {repo}")
+                if len(st_files) > 1:
+                    raise ValueError(f"Multiple .safetensors files in LoRA adapter: {repo}. Use @filename to specify.")
+                safetensors_file = st_files[0]
+            lora_path = 'loras/' + repo + '/' + safetensors_file
+            lora_entry = {'alias': alias, 'path': lora_path, 'alpha': alpha}
+            lora_adapters.append(lora_entry)
+            print(f"LoRA adapter: {alias} -> {lora_path}" + (f" (alpha={alpha})" if alpha else ""))
+    task_parameters['lora_adapters'] = lora_adapters
+    task_parameters['composite_lora_adapters'] = composite_lora_adapters
+
     plugin_config = {}
     assert num_streams >= 0, "num_streams should be a non-negative integer"
     if num_streams > 0:
@@ -711,4 +797,4 @@ def export_image_generation_model(model_repository_path, source_model, model_nam
         'max_num_inference_steps',
         'extra_quantization_params'
     ]}
-    export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'])
+    export_image_generation_model(args['model_repository_path'], args['source_model'], args['model_name'], args['precision'], template_parameters, args['config_file_path'], args['num_streams'], args['source_loras'])
diff --git a/demos/image_generation/README.md b/demos/image_generation/README.md
@@ -397,7 +397,7 @@ A single servable exposes the following endpoints:
 
 > **Note:** Inpainting/outpainting requests are processed sequentially — concurrent requests will be queued.
 
-> **Note:** For inpainting/outpainting, dedicated inpainting models (e.g. `stable-diffusion-v1-5/stable-diffusion-inpainting`) only support the `images/edits` endpoint. Check [supported models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models).
+> **Note:** Dedicated inpainting models (e.g. `stable-diffusion-v1-5/stable-diffusion-inpainting`) only support the `images/edits` endpoint — they cannot be used for text-to-image generation via `images/generations`. General-purpose models (e.g. SDXL) support both endpoints. Check [supported models](https://openvinotoolkit.github.io/openvino.genai/docs/supported-models/#image-generation-models).
 
 All requests are processed in unary format, with no streaming capabilities.
 
@@ -528,6 +528,12 @@ Output file (`edit_output.png`):
 
 Inpainting replaces a masked region in an image based on the prompt. The `mask` is a black-and-white image where white pixels mark the area to repaint.
 
+Download sample images:
+```console
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat.png
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/cat_mask.png
+```
+
 ![cat](./cat.png) ![cat_mask](./cat_mask.png)
 
 ::::{tab-set}
@@ -599,6 +605,12 @@ Outpainting extends an image beyond its original borders. Prepare two images:
 - **outpaint_input.png** — the original image centered on a larger canvas (e.g. 768×768) with black borders
 - **outpaint_mask.png** — white where the new content should be generated (the borders), black where the original image is
 
+Download sample images:
+```console
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_input.png
+curl -O https://raw.githubusercontent.com/openvinotoolkit/model_server/main/demos/image_generation/outpaint_mask.png
+```
+
 ![outpaint_input](./outpaint_input.png) ![outpaint_mask](./outpaint_mask.png)
 
 ::::{tab-set}
@@ -718,6 +730,190 @@ ovms --rest_port 8000 ^
 
 Please follow [OpenVINO notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/image-to-image-genai/image-to-image-genai.ipynb) to understand how other parameters affect editing.
 
+## Multi-LoRA Image Generation
+
+This section demonstrates how to serve multiple LoRA adapters with a single SDXL base model, enabling per-request style selection. This replicates the [Multi LoRA Image Generation notebook](https://github.com/openvinotoolkit/openvino_notebooks/blob/latest/notebooks/multilora-image-generation/multilora-image-generation.ipynb) but using OVMS for serving.
+
+### Start Server with Multiple LoRA Adapters
+
+The following command starts OVMS with Stable Diffusion XL and 5 LoRA adapters for different artistic styles:
+
+::::{tab-set}
+:::{tab-item} Docker (Linux)
+:sync: docker
+```bash
+mkdir -p models
+
+docker run -d --rm --user $(id -u):$(id -g) -p 8000:8000 -v $(pwd)/models:/models/:rw \
+  -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e no_proxy=$no_proxy \
+  openvino/model_server:latest \
+    --rest_port 8000 \
+    --model_repository_path /models/ \
+    --task image_generation \
+    --source_model stabilityai/stable-diffusion-xl-base-1.0 \
+    --source_loras "xray=DoctorDiffusion/doctor-diffusion-s-xray-xl-lora@DD-xray-v1.safetensors,thepoint=alvdansen/the-point@araminta_k_the_point.safetensors,ukiyo=KappaNeuro/ukiyo-e-art@Ukiyo-e Art.safetensors,vector=DoctorDiffusion/doctor-diffusion-s-controllable-vector-art-xl-lora@DD-vector-v2.safetensors,chalk=Norod78/sdxl-chalkboarddrawing-lora@SDXL_ChalkBoardDrawing_LoRA_r8.safetensors"
+```
+:::
+
+:::{tab-item} Bare metal (Windows)
+:sync: bare-metal
+```bat
+mkdir models
+
+ovms --rest_port 8000 ^
+  --model_repository_path ./models/ ^
+  --task image_generation ^
+  --source_model stabilityai/stable-diffusion-xl-base-1.0 ^
+  --source_loras "xray=DoctorDiffusion/doctor-diffusion-s-xray-xl-lora@DD-xray-v1.safetensors,thepoint=alvdansen/the-point@araminta_k_the_point.safetensors,ukiyo=KappaNeuro/ukiyo-e-art@Ukiyo-e Art.safetensors,vector=DoctorDiffusion/doctor-diffusion-s-controllable-vector-art-xl-lora@DD-vector-v2.safetensors,chalk=Norod78/sdxl-chalkboarddrawing-lora@SDXL_ChalkBoardDrawing_LoRA_r8.safetensors"
+```
+:::
+
+::::
+
+The registered adapters and their recommended use:
+
+| Alias | Repository | Style | Recommended Weight | Prompt Template |
-| Alias | Repository | Style | Recommended Weight | Prompt Template |
+| Alias | Repository | Style | Recommended Alpha| Prompt Template |
-| Alias | Repository | Style | Recommended Weight | Prompt Template |
+| Alias | Repository | Style | Recommended Alpha| Prompt Template |
+|-------|-----------|-------|-------------------|-----------------|
+| `xray` | DoctorDiffusion/doctor-diffusion-s-xray-xl-lora | X-Ray style | 0.8 | `xray <subject>` |
+| `thepoint` | alvdansen/the-point | Artistic illustration | 0.6 | `<subject>` |
+| `ukiyo` | KappaNeuro/ukiyo-e-art | Ukiyo-e Japanese art | 0.8 | `an illustration of <subject> in Ukiyo-e Art style` |
+| `vector` | DoctorDiffusion/doctor-diffusion-s-controllable-vector-art-xl-lora | Vector art | 0.8 | `vector <subject>` |
+| `chalk` | Norod78/sdxl-chalkboarddrawing-lora | Chalkboard drawing | 0.45 | `A colorful chalkboard drawing of <subject>` |
+
+### Generate Images with Different Styles
+
+Use the adapter alias as the `model` field to select which adapter to apply per request. The adapter is activated via **model name routing** — when the `model` field matches a registered LoRA alias, that adapter is automatically applied.
+
+**X-Ray style:**
+```bash
+curl http://localhost:8000/v3/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "xray",
+    "prompt": "xray a cute cat in sunglasses",
+    "num_inference_steps": 20,
+    "guidance_scale": 0.0,
+    "size": "1024x1024"
+  }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat.png
+```
+
+**Ukiyo-e Japanese art:**
+```bash
+curl http://localhost:8000/v3/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "ukiyo",
+    "prompt": "an illustration of a cute cat in sunglasses in Ukiyo-e Art style",
+    "num_inference_steps": 20,
+    "guidance_scale": 0.0,
+    "size": "1024x1024"
+  }' | jq -r '.data[0].b64_json' | base64 --decode > ukiyo_cat.png
+```
+
+**Chalkboard drawing:**
+```bash
+curl http://localhost:8000/v3/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "chalk",
+    "prompt": "A colorful chalkboard drawing of a cute cat in sunglasses",
+    "num_inference_steps": 20,
+    "guidance_scale": 0.0,
+    "size": "1024x1024"
+  }' | jq -r '.data[0].b64_json' | base64 --decode > chalk_cat.png
+```
+
+Optionally override the adapter alpha using `lora_alphas`:
+```bash
+curl http://localhost:8000/v3/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "xray",
+    "prompt": "xray a cute cat in sunglasses",
+    "lora_alphas": {"xray": 0.5},
+    "num_inference_steps": 20,
+    "guidance_scale": 0.0,
+    "size": "1024x1024"
+  }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat_half_weight.png
-  }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat_half_weight.png
+  }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat_half_alpha.png
-  }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat_half_weight.png
+  }' | jq -r '.data[0].b64_json' | base64 --decode > xray_cat_half_alpha.png
+```
+### Using OpenAI Python Client with LoRA
+
+```python
+from openai import OpenAI
+import base64
+from io import BytesIO
+from PIL import Image
+
+client = OpenAI(
+    base_url="http://localhost:8000/v3",
+    api_key="unused"
+)
+
+# Define LoRA styles — the adapter alias is used as the model name
+styles = {
+    "xray": {"prompt": "xray {subject}"},
+    "thepoint": {"prompt": "{subject}"},
+    "ukiyo": {"prompt": "an illustration of {subject} in Ukiyo-e Art style"},
+    "vector": {"prompt": "vector {subject}"},
+    "chalk": {"prompt": "A colorful chalkboard drawing of {subject}"},
+}
+
+subject = "a cute cat in sunglasses"
+
+for style_name, style_config in styles.items():
+    prompt = style_config["prompt"].format(subject=subject)
+    response = client.images.generate(
+        model=style_name,  # adapter alias activates the LoRA
+        prompt=prompt,
+        extra_body={
+            "num_inference_steps": 20,
+            "guidance_scale": 0.0,
+            "size": "1024x1024",
+        }
+    )
+    image_data = base64.b64decode(response.data[0].b64_json)
+    image = Image.open(BytesIO(image_data))
+    image.save(f'{style_name}_cat.png')
+    print(f"Saved {style_name}_cat.png")
+```
+
+### Blending Multiple Adapters
+
+To blend multiple adapters, define a **composite adapter** at startup using the `@alias:weight` syntax:
+
+```bash
+--source_loras="xray=...,ukiyo=...,blend=@xray:0.5+@ukiyo:0.4"
+```
+
+Then use the composite alias as the model name:
+```python
+response = client.images.generate(
+    model="blend",  # activates both xray and ukiyo
+    prompt="a cute cat in sunglasses",
+    extra_body={
+        "num_inference_steps": 20,
+        "guidance_scale": 0.0,
+        "size": "1024x1024",
+    }
+)
+```
+
+You can override individual component weights at request time:
-You can override individual component weights at request time:
+You can override individual component alpha at request time:
-You can override individual component weights at request time:
+You can override individual component alpha at request time:
+```python
+response = client.images.generate(
+    model="blend",
+    prompt="a cute cat in sunglasses",
+    extra_body={
+        "lora_alphas": {"xray": 0.8, "ukiyo": 0.2},
+        "num_inference_steps": 20,
+        "guidance_scale": 0.0,
+        "size": "1024x1024",
+    }
+)
+```
+
+> **Note:** For more details on LoRA adapter configuration, see the [Image Generation reference documentation](../../docs/image_generation/reference.md#lora-adapters).
+
 ## References
 - [Image Generation API](../../docs/model_server_rest_api_image_generation.md)
 - [Image Edit API](../../docs/model_server_rest_api_image_edit.md)