Created
December 12, 2025 11:50
-
-
Save danielealbano/4be1bf0c7a5ca7337b9fea450eb82582 to your computer and use it in GitHub Desktop.
GGUF image generation via python (FLUX) - example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from diffusers import FluxPipeline, FluxTransformer2DModel, GGUFQuantizationConfig | |
| import torch | |
| import gc | |
| # Example for FLUX, can be adapted to other models as needed as long as the diffuser supports GGUF | |
| ckpt_id = "black-forest-labs/FLUX.1-dev" | |
| ckpt_path = "/absolute/path/to/gguf/eg/flux1-dev-Q4_K_S.gguf" | |
| prompt = "PROMPT_HERE" | |
| prompt_2 = prompt # Added for reference, can be changed | |
| max_sequence_length = 512 | |
| height, width = 512, 512 | |
| num_inference_steps = 30 | |
| guidance_scale = 10.0 | |
| # First, encode the prompt to get the embeddings. | |
| pipeline = FluxPipeline.from_pretrained( | |
| ckpt_id, | |
| transformer=None, | |
| vae=None, | |
| torch_dtype=torch.bfloat16, | |
| ) | |
| pipeline.enable_sequential_cpu_offload() | |
| print("Encoding prompts.") | |
| with torch.no_grad(): | |
| prompt_embeds, pooled_prompt_embeds, text_ids = pipeline.encode_prompt( | |
| prompt=prompt, prompt_2=prompt_2, max_sequence_length=max_sequence_length | |
| ) | |
| del pipeline | |
| gc.collect() | |
| torch.cuda.empty_cache() | |
| # Now load the quantized model and run the denoising process. | |
| print("Loading quantized model.") | |
| transformer = FluxTransformer2DModel.from_single_file( | |
| ckpt_path, | |
| quantization_config=GGUFQuantizationConfig(compute_dtype=torch.bfloat16), | |
| dtype=torch.bfloat16, | |
| ) | |
| # Create the pipeline with the quantized transformer. | |
| print("Creating pipeline with quantized model.") | |
| pipeline = FluxPipeline.from_pretrained( | |
| ckpt_id, | |
| text_encoder=None, | |
| text_encoder_2=None, | |
| tokenizer=None, | |
| tokenizer_2=None, | |
| transformer=transformer, | |
| torch_dtype=torch.bfloat16, | |
| ).to("cuda") | |
| print("Running denoising.") | |
| # No need to wrap it up under `torch.no_grad()` as pipeline call method | |
| # is already wrapped under that. | |
| images = pipeline( | |
| prompt_embeds=prompt_embeds, | |
| pooled_prompt_embeds=pooled_prompt_embeds, | |
| num_inference_steps=num_inference_steps, | |
| guidance_scale=guidance_scale, | |
| height=height, | |
| width=width, | |
| ).images[0] | |
| images.save("image.png") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment