togethercomputer · mbrostami · Mar 27, 2026 · Mar 24, 2026
diff --git a/examples/flux2-dev/README.md b/examples/flux2-dev/README.md
@@ -0,0 +1,43 @@
+# FLUX.2-dev Example
+
+Serves [black-forest-labs/FLUX.2-klein-9B](https://huggingface.co/black-forest-labs/FLUX.2-klein-9B) as an image generation endpoint using Sprocket.
+
+## How to Deploy
+
+1. Generate a unique deployment name and update `pyproject.toml`:
+
+   ```bash
+   sed -i '' "s/^name = \"sprocket-flux2-dev\"/name = \"flux2-dev-$(date +%s)\"/" pyproject.toml
+   ```
+
+2. Set your Hugging Face token as a secret (required to download the gated model weights):
+
+   ```bash
+   together beta jig secrets set --name HF_TOKEN --value <your-hf-token>
+   ```
+
+3. Deploy:
+
+   ```bash
+   together beta jig deploy
+   ```
+
+4. Submit a generation request:
+
+   ```bash
+   together beta jig submit --prompt "a cat sitting on a red chair"
+   ```
+
+   You can follow the logs while the job is processing:
+
+   ```bash
+   together beta jig logs --follow
+   ```
+
+5. Check the job output:
+
+   ```bash
+   together beta jig job-status --request-id <request-id>
+   ```
+
+   The response contains a base64-encoded PNG image under `outputs.image`.
diff --git a/examples/flux2-dev/pyproject.toml b/examples/flux2-dev/pyproject.toml
@@ -2,13 +2,12 @@
 name = "sprocket-flux2-dev"
 version = "0.1.0"
 dependencies = [
-    "diffusers>=0.33.0",
-    "transformers>=4.44.0",
+    "diffusers>=0.33.1",
+    "transformers>=4.51.0",
     "torch>=2.0.0",
     "torchvision",
     "pillow",
     "accelerate",
-    "bitsandbytes",
     "safetensors",
     "sprocket",
 ]
@@ -32,7 +31,7 @@ gpu_type = "h100-80gb"
 gpu_count = 1
 cpu = 4
 memory = 32
-storage = 20
+storage = 50
 port = 8000
 min_replicas = 1
 max_replicas = 1
diff --git a/examples/flux2-dev/run.py b/examples/flux2-dev/run.py
@@ -5,27 +5,27 @@
 
 import sprocket
 import torch
-from diffusers import Flux2Pipeline
+from diffusers import Flux2KleinPipeline
 
 logging.basicConfig(level=logging.INFO)
 
 
 class Flux2Sprocket(sprocket.Sprocket):
     def setup(self) -> None:
-        model = "diffusers/FLUX.2-dev-bnb-4bit"
+        model = "black-forest-labs/FLUX.2-klein-9B"
         device = "cuda" if torch.cuda.is_available() else "cpu"
 
         logging.info(f"Loading Flux2 pipeline from {model} on {device}...")
-        pipe = Flux2Pipeline.from_pretrained(model, torch_dtype=torch.bfloat16)
+        pipe = Flux2KleinPipeline.from_pretrained(model, torch_dtype=torch.bfloat16)
         self.pipe = pipe.to(device)
         logging.info("Pipeline loaded successfully!")
 
     def predict(self, args: dict) -> dict:
         prompt = args.get("prompt", "a cat")
 
         # Optional parameters with defaults
-        num_inference_steps = args.get("num_inference_steps", 28)
-        guidance_scale = args.get("guidance_scale", 4.0)
+        num_inference_steps = args.get("num_inference_steps", 4)
+        guidance_scale = args.get("guidance_scale", 1.0)
 
         # Generate image
         logging.info(f"Generating image for prompt: {prompt[:50]}...")