Deploy FLUX.2-Dev image generation on Modal with MCP support, so AI agents (Claude, Cursor, VS Code Copilot) can generate images for you!
What you'll get:
- π¨ FLUX.2-Dev running on H200/H100 GPU
- π Web UI at
your-url.modal.run - π€ MCP endpoint for AI agents
- β‘ ~15-20s per image generation
- Modal Account β modal.com/signup
- Hugging Face Account β huggingface.co/join
- Go to huggingface.co/black-forest-labs/FLUX.2-dev
- Click "Agree and access repository"
- Wait for approval (usually instant)
- Go to huggingface.co/settings/tokens
- Click "New token"
- Name:
modal-flux(or anything) - Type: Read
- Click "Generate"
- Copy the token (starts with
hf_...)
pip install modalmodal setupThis opens browser β login β done!
- Go to modal.com/secrets
- Click "Create new secret"
- Choose "Custom"
- Name:
huggingface - Add key-value:
- Key:
HF_TOKEN - Value:
hf_your_token_here(paste your token)
- Key:
- Click "Create"
mkdir -p flux-mcp && cd flux-mcp
touch flux_2_api.pyimport time
from io import BytesIO
from pathlib import Path
import modal
# --- 1. Container Images ---
# GPU Backend Image
flux_image = (
modal.Image.from_registry("nvidia/cuda:12.4.0-devel-ubuntu22.04", add_python="3.11")
.apt_install("git", "libglib2.0-0", "libsm6", "libxrender1", "libxext6", "ffmpeg", "libgl1")
.pip_install(
"invisible_watermark>=0.2.0",
"huggingface_hub",
"hf_transfer",
"safetensors",
"sentencepiece",
"numpy<2",
"torch==2.5.0",
"git+https://github.com/huggingface/transformers.git",
"git+https://github.com/huggingface/diffusers.git@e6d46123091afd58281dc7487c0f6b67055683b9",
"git+https://github.com/huggingface/peft.git",
"git+https://github.com/huggingface/accelerate.git",
"gradio_client",
)
.env({
"HF_HUB_ENABLE_HF_TRANSFER": "1",
"HF_HUB_CACHE": "/cache",
})
)
# Web UI Image
web_image = (
modal.Image.debian_slim(python_version="3.12")
.pip_install(
"fastapi[standard]",
"gradio[mcp]>=5.0.0",
"pillow"
)
.env({"GRADIO_MCP_SERVER": "True"})
)
app = modal.App("flux-mcp-app")
# Imports
with flux_image.imports():
import os
import torch
from huggingface_hub import login
from diffusers import Flux2Pipeline, Flux2Transformer2DModel
with web_image.imports():
import gradio as gr
from fastapi import FastAPI
from PIL import Image
import os
MINUTES = 60
# --- 2. GPU Backend ---
@app.cls(
image=flux_image,
gpu=["H200", "H100"], # H200 first, fallback to H100
scaledown_window=20 * MINUTES,
timeout=60 * MINUTES,
secrets=[modal.Secret.from_name("huggingface")],
volumes={
"/cache": modal.Volume.from_name("hf-hub-cache", create_if_missing=True),
},
)
class Model:
@modal.enter()
def enter(self):
hf_token = os.environ.get("HF_TOKEN")
if hf_token:
login(token=hf_token)
print("β
Logged in to HuggingFace")
repo_id = "black-forest-labs/FLUX.2-dev"
print(f"β³ Loading {repo_id}...")
self.pipe = Flux2Pipeline.from_pretrained(
repo_id, torch_dtype=torch.bfloat16, token=hf_token
)
self.pipe.to("cuda")
try:
self.pipe.transformer.fuse_qkv_projections()
self.pipe.vae.fuse_qkv_projections()
print("β
QKV projections fused")
except AttributeError:
pass
self.device = "cuda"
print("β
Model loaded!")
@modal.method()
def generate_image(self, prompt: str, width: int, height: int, steps: int, guidance: float, seed: int):
print(f"π¨ Generating: {prompt}")
start_time = time.time()
generator = torch.Generator(device=self.device).manual_seed(seed)
out = self.pipe(
prompt=prompt, width=width, height=height,
num_inference_steps=steps, guidance_scale=guidance,
generator=generator
).images[0]
print(f"β
Generated in {time.time() - start_time:.1f}s")
byte_stream = BytesIO()
out.save(byte_stream, format="JPEG", quality=95)
return byte_stream.getvalue()
# --- 3. Web UI & MCP Server ---
RESOLUTION_PRESETS = {
"1:1 Square (1024Γ1024)": (1024, 1024),
"16:9 Landscape (1360Γ768)": (1360, 768),
"9:16 Portrait (768Γ1360)": (768, 1360),
"4:3 Standard (1152Γ896)": (1152, 896),
"3:4 Portrait (896Γ1152)": (896, 1152),
"3:2 Photo (1216Γ832)": (1216, 832),
"2:3 Portrait Photo (832Γ1216)": (832, 1216),
"21:9 Ultrawide (1536Γ640)": (1536, 640),
"2K HD (1920Γ1080)": (1920, 1080),
"2K Vertical (1080Γ1920)": (1080, 1920),
}
QUALITY_PRESETS = {
"β‘ Fast (20 steps)": 20,
"π Balanced (28 steps)": 28,
"β¨ Quality (35 steps)": 35,
"π¨ Maximum (50 steps)": 50,
}
@app.function(image=web_image, max_containers=1)
@modal.concurrent(max_inputs=100)
@modal.asgi_app()
def ui():
os.environ["GRADIO_MCP_SERVER"] = "True"
def generate_flux_image(
prompt: str,
aspect_ratio: str = "1:1 Square (1024Γ1024)",
quality_preset: str = "π Balanced (28 steps)",
guidance: str = "3.5",
seed: str = "42",
progress=gr.Progress()
):
"""
Generate high-quality images using Flux.2-Dev model on Modal H200/H100.
Args:
prompt (str): Detailed text description of the image.
aspect_ratio (str): Image aspect ratio preset.
quality_preset (str): Quality/speed preset. Default: Balanced.
guidance (str): Guidance scale (1.0-10.0). Default: 3.5.
seed (str): Random seed for reproducibility. Default: 42.
"""
sd = int(seed)
g = float(guidance)
s = QUALITY_PRESETS.get(quality_preset, 28)
w, h = RESOLUTION_PRESETS.get(aspect_ratio, (1024, 1024))
progress(0.2, desc=f"Generating ({s} steps)...")
image_bytes = Model().generate_image.remote(prompt, w, h, s, g, sd)
progress(1.0, desc="Done!")
return Image.open(BytesIO(image_bytes))
demo = gr.Interface(
fn=generate_flux_image,
inputs=[
gr.Textbox(label="Prompt", lines=3, placeholder="A cat holding a sign that says 'Hello FLUX.2'"),
gr.Dropdown(choices=list(RESOLUTION_PRESETS.keys()), value="1:1 Square (1024Γ1024)", label="Aspect Ratio"),
gr.Dropdown(choices=list(QUALITY_PRESETS.keys()), value="π Balanced (28 steps)", label="Quality"),
gr.Slider(1.0, 10.0, 3.5, step=0.5, label="Guidance Scale"),
gr.Number(42, label="Seed")
],
outputs=gr.Image(label="Result"),
title="π¨ FLUX.2-Dev MCP Server",
description="Generate images with FLUX.2-Dev on H200/H100. MCP enabled for AI agents.",
api_name="generate"
)
demo.queue()
return gr.mount_gradio_app(FastAPI(), demo, path="/")modal deploy flux_2_api.pyYou'll see output like:
β Created objects.
βββ π¨ Created web function ui => https://YOUR-USERNAME--flux-mcp-app-ui.modal.run
βββ π¨ Created function Model.*.
β App deployed! π
Your app is now live at:
https://YOUR-USERNAME--flux-mcp-app-ui.modal.run
- Open your URL in browser:
https://YOUR-USERNAME--flux-mcp-app-ui.modal.run - Enter a prompt:
A cat holding a sign that says "Hello World" - Click Submit
- Wait ~30-60s for first image (cold start), then ~15-20s for subsequent images
curl https://YOUR-USERNAME--flux-mcp-app-ui.modal.run/gradio_api/mcp/schemaShould return JSON with tool definitions.
-
Find config file:
- Mac:
~/Library/Application Support/Claude/claude_desktop_config.json - Windows:
%APPDATA%\Claude\claude_desktop_config.json - Linux:
~/.config/Claude/claude_desktop_config.json
- Mac:
-
Add this (replace YOUR-USERNAME):
{
"mcpServers": {
"flux-generator": {
"url": "https://YOUR-USERNAME--flux-mcp-app-ui.modal.run/gradio_api/mcp/sse"
}
}
}-
Restart Claude Desktop
-
Ask Claude: "Generate an image of a sunset over mountains using the flux generator"
Add to your MCP settings (.vscode/mcp.json or settings):
{
"servers": {
"flux-generator": {
"url": "https://YOUR-USERNAME--flux-mcp-app-ui.modal.run/gradio_api/mcp/sse"
}
}
}FLUX.2-Dev excels at text in images! Try these:
A coffee shop storefront with a neon sign that says "OPEN 24/7"
A movie poster for "The Last Galaxy" featuring a spaceship and stars
A birthday card with "Happy Birthday Sarah!" in elegant script
Professional headshot of a business woman, studio lighting
Anime style character holding a sword with "HERO" written on it
| GPU | Price/hour | Typical session |
|---|---|---|
| H100 | ~$4/hr | ~$0.02/image |
| H200 | ~$5/hr | ~$0.02/image |
Container stays warm for 20 minutes, so rapid generations are cheap!
- Your HF token doesn't have access to FLUX.2-dev
- Make sure you accepted the license at huggingface.co/black-forest-labs/FLUX.2-dev
- Regenerate your token and update the Modal secret
- This shouldn't happen on H100/H200
- If using smaller GPU, add
self.pipe.enable_model_cpu_offload()after loading
- Make sure URL ends with
/gradio_api/mcp/sse - Check that
GRADIO_MCP_SERVER=Trueis set - Restart your AI agent after config change
- First request takes ~60s (cold start + model loading)
- Subsequent requests: ~15-20s
- Keep container warm by generating regularly
You now have:
- β FLUX.2-Dev running on H200/H100
- β Web UI for manual generation
- β MCP server for AI agents
- β 10 resolution presets
- β 4 quality presets
Your URLs:
- Web UI:
https://YOUR-USERNAME--flux-mcp-app-ui.modal.run - MCP:
https://YOUR-USERNAME--flux-mcp-app-ui.modal.run/gradio_api/mcp/sse
Happy generating! π¨