Skip to content

Instantly share code, notes, and snippets.

@traversaro
Last active October 17, 2025 13:16
Show Gist options
  • Select an option

  • Save traversaro/89c9b9a654c10297f1936d3a8a616240 to your computer and use it in GitHub Desktop.

Select an option

Save traversaro/89c9b9a654c10297f1936d3a8a616240 to your computer and use it in GitHub Desktop.
Stress GPU VRAM
#!/usr/bin/env python3
"""
allocate_vram.py — attempt to allocate ~80 GiB of VRAM with PyTorch.
Usage examples:
python allocate_vram.py # try for 80 GiB on cuda:0
python allocate_vram.py --target-gib 78 # pick a different target
python allocate_vram.py --chunk-gib 2 # bigger allocation chunks
python allocate_vram.py --device cuda:1 # choose another GPU
"""
import os
# Helps PyTorch reduce fragmentation for large allocations (set before torch import)
os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
import argparse
import math
import time
import torch
def fmt_bytes(b: int) -> str:
return f"{b/1024**3:.2f} GiB"
def main():
parser = argparse.ArgumentParser(description="Allocate lots of VRAM with PyTorch.")
parser.add_argument("--device", type=str, default="cuda:0",
help="CUDA device like cuda:0, cuda:1, etc.")
parser.add_argument("--target-gib", type=float, default=80.0,
help="Target VRAM to allocate in GiB (binary, 1024^3).")
parser.add_argument("--chunk-gib", type=float, default=1.0,
help="Allocate in chunks of this many GiB.")
parser.add_argument("--hold-seconds", type=float, default=10.0,
help="How long to keep memory allocated before freeing.")
parser.add_argument("--dtype", type=str, default="uint8",
choices=["uint8", "float16", "bfloat16", "float32", "float64"],
help="Tensor dtype to use for allocations (uint8 = 1 byte/element).")
args = parser.parse_args()
if not torch.cuda.is_available():
raise SystemExit("CUDA is not available. Make sure you have a CUDA-capable GPU and drivers installed.")
device = torch.device(args.device)
torch.cuda.set_device(device)
# Clear any cached allocator state for a clean start
torch.cuda.empty_cache()
torch.cuda.reset_peak_memory_stats(device)
# Map dtype string to actual torch dtype
dtype_map = {
"uint8": torch.uint8,
"float16": torch.float16,
"bfloat16": torch.bfloat16,
"float32": torch.float32,
"float64": torch.float64,
}
dtype = dtype_map[args.dtype]
elem_size = torch.empty((), dtype=dtype).element_size()
# Show device info
props = torch.cuda.get_device_properties(device)
total_vram = props.total_memory # bytes
free_before, total_reported = torch.cuda.mem_get_info() # free/total usable by allocator
print(f"Device : {props.name} ({args.device})")
print(f"Total VRAM : {fmt_bytes(total_vram)}")
print(f"Free (start) : {fmt_bytes(free_before)} (allocator-reported)\n")
target_bytes = int(args.target_gib * (1024 ** 3))
chunk_bytes = int(args.chunk_gib * (1024 ** 3))
if chunk_bytes <= 0:
raise SystemExit("--chunk-gib must be positive")
print(f"Target alloc : {fmt_bytes(target_bytes)} with chunks of {fmt_bytes(chunk_bytes)}; dtype={args.dtype} (elem={elem_size} B)")
tensors = []
allocated_bytes = 0
num_chunks = 0
try:
while allocated_bytes < target_bytes:
remaining = target_bytes - allocated_bytes
this_bytes = min(chunk_bytes, remaining)
# Convert bytes to element count (round up)
numel = (this_bytes + elem_size - 1) // elem_size
try:
t = torch.empty(numel, dtype=dtype, device=device)
# Access a small slice to ensure the allocation is realized
t[:1] = 0
tensors.append(t)
num_chunks += 1
added = t.numel() * elem_size
allocated_bytes += added
reserved_now = torch.cuda.memory_reserved(device)
allocated_now = torch.cuda.memory_allocated(device)
print(f"[{num_chunks:03d}] Allocated +{fmt_bytes(added)} "
f"(total allocated={fmt_bytes(allocated_now)}, reserved={fmt_bytes(reserved_now)})")
except RuntimeError as e:
# Typical PyTorch OOM is RuntimeError with "out of memory"
if "out of memory" in str(e).lower():
print(f"OOM reached after {fmt_bytes(allocated_bytes)}. "
f"Cannot allocate next chunk of {fmt_bytes(this_bytes)}.")
break
else:
raise
torch.cuda.synchronize()
peak_alloc = torch.cuda.max_memory_allocated(device)
peak_res = torch.cuda.max_memory_reserved(device)
free_after, _ = torch.cuda.mem_get_info()
print("\n=== Summary ===")
print(f"Requested : {fmt_bytes(target_bytes)}")
print(f"Allocated : {fmt_bytes(allocated_bytes)} across {num_chunks} chunk(s)")
print(f"Peak allocated : {fmt_bytes(peak_alloc)}")
print(f"Peak reserved : {fmt_bytes(peak_res)}")
print(f"Free (end) : {fmt_bytes(free_after)} (allocator-reported)")
if allocated_bytes >= target_bytes:
print("\nSUCCESS: Target met or exceeded. Holding allocation briefly...")
else:
print("\nPARTIAL: Could not reach the full target (likely device limit or fragmentation).")
time.sleep(max(0.0, args.hold_seconds))
finally:
# Cleanup
tensors.clear()
torch.cuda.empty_cache()
torch.cuda.synchronize()
print("Freed allocations and cleared CUDA cache.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment