danielealbano · December 12, 2025 11:50
diff --git a/main.py b/main.py
 from diffusers import FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
 import torch
 import gc

 # Example for FLUX, can be adapted to other models as needed as long as the diffuser supports GGUF

 ckpt_id = "black-forest-labs/FLUX.1-dev"
 ckpt_path = "/absolute/path/to/gguf/eg/flux1-dev-Q4_K_S.gguf"
 prompt = "PROMPT_HERE"
 prompt_2 = prompt # Added for reference, can be changed
 max_sequence_length = 512
 height, width = 512, 512
 num_inference_steps = 30
 guidance_scale = 10.0

 # First, encode the prompt to get the embeddings.

 pipeline = FluxPipeline.from_pretrained(
    ckpt_id,
    transformer=None,
    vae=None,
    torch_dtype=torch.bfloat16,
 )
 pipeline.enable_sequential_cpu_offload()

 print("Encoding prompts.")
 with torch.no_grad():
    prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
        prompt=prompt, prompt_2=prompt_2, max_sequence_length=max_sequence_length
    )

 del pipeline
 gc.collect()
 torch.cuda.empty_cache()

 # Now load the quantized model and run the denoising process.
 print("Loading quantized model.")
 transformer = FluxTransformer2DModel.from_single_file(
    ckpt_path,
    quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
    dtype=torch.bfloat16,
 )

 # Create the pipeline with the quantized transformer.
 print("Creating pipeline with quantized model.")
 pipeline = FluxPipeline.from_pretrained(
    ckpt_id,
    text_encoder=None,
    text_encoder_2=None,
    tokenizer=None,
    tokenizer_2=None,
    transformer=transformer,
    torch_dtype=torch.bfloat16,
 ).to("cuda")

 print("Running denoising.")
 # No need to wrap it up under `torch.no_grad()` as pipeline call method
 # is already wrapped under that.
 images = pipeline(
    prompt_embeds=prompt_embeds,
    pooled_prompt_embeds=pooled_prompt_embeds,
    num_inference_steps=num_inference_steps,
    guidance_scale=guidance_scale,
    height=height,
    width=width,
 ).images[0]

 images.save("image.png")
	from diffusers import FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig
	import torch
	import gc

	# Example for FLUX, can be adapted to other models as needed as long as the diffuser supports GGUF

	ckpt_id = "black-forest-labs/FLUX.1-dev"
	ckpt_path = "/absolute/path/to/gguf/eg/flux1-dev-Q4_K_S.gguf"
	prompt = "PROMPT_HERE"
	prompt_2 = prompt # Added for reference, can be changed
	max_sequence_length = 512
	height, width = 512, 512
	num_inference_steps = 30
	guidance_scale = 10.0

	# First, encode the prompt to get the embeddings.

	pipeline = FluxPipeline.from_pretrained(
	ckpt_id,
	transformer=None,
	vae=None,
	torch_dtype=torch.bfloat16,
	)
	pipeline.enable_sequential_cpu_offload()

	print("Encoding prompts.")
	with torch.no_grad():
	prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt(
	prompt=prompt, prompt_2=prompt_2, max_sequence_length=max_sequence_length
	)

	del pipeline
	gc.collect()
	torch.cuda.empty_cache()

	# Now load the quantized model and run the denoising process.
	print("Loading quantized model.")
	transformer = FluxTransformer2DModel.from_single_file(
	ckpt_path,
	quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16),
	dtype=torch.bfloat16,
	)

	# Create the pipeline with the quantized transformer.
	print("Creating pipeline with quantized model.")
	pipeline = FluxPipeline.from_pretrained(
	ckpt_id,
	text_encoder=None,
	text_encoder_2=None,
	tokenizer=None,
	tokenizer_2=None,
	transformer=transformer,
	torch_dtype=torch.bfloat16,
	).to("cuda")

	print("Running denoising.")
	# No need to wrap it up under `torch.no_grad()` as pipeline call method
	# is already wrapped under that.
	images = pipeline(
	prompt_embeds=prompt_embeds,
	pooled_prompt_embeds=pooled_prompt_embeds,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	height=height,
	width=width,
	).images[0]

	images.save("image.png")
No results found