Skip to content

Instantly share code, notes, and snippets.

@ehzawad
Created September 26, 2025 13:20
Show Gist options
  • Select an option

  • Save ehzawad/381abc583656d930b46e11a14e831947 to your computer and use it in GitHub Desktop.

Select an option

Save ehzawad/381abc583656d930b46e11a14e831947 to your computer and use it in GitHub Desktop.
import os
import sys
import numpy as np
import psutil
import torch
def get_memory_info():
"""Get current memory usage of the process."""
process = psutil.Process(os.getpid())
return process.memory_info()
def select_device():
"""Pick the best available accelerator device, preferring Apple MPS."""
mps_available = hasattr(torch.backends, "mps") and torch.backends.mps.is_available() and torch.backends.mps.is_built()
if mps_available:
return torch.device("mps")
if torch.cuda.is_available():
return torch.device("cuda")
return torch.device("cpu")
def prompt_next(message):
"""Wait for the user to press Enter before continuing."""
input(f"\n{message}\n")
def print_memory_usage(obj, name):
"""Print detailed memory information for an object."""
print(f"\n=== Memory Analysis for {name} ===")
size_bytes = sys.getsizeof(obj)
print(f"Object size: {size_bytes} bytes ({size_bytes / 1024:.2f} KB)")
print(f"Memory address (id): {id(obj)} (0x{id(obj):x})")
if isinstance(obj, torch.Tensor):
print(f"Tensor shape: {obj.shape}")
print(f"Tensor dtype: {obj.dtype}")
print(f"Tensor device: {obj.device}")
storage = obj.untyped_storage() # Per PyTorch deprecation notice, use untyped storage APIs
storage_nbytes = storage.nbytes()
element_size = obj.element_size()
print(f"Tensor storage bytes: {storage_nbytes}")
print(f"Tensor element size: {element_size} bytes")
print(f"Tensor storage size (elements): {storage_nbytes // element_size}")
print(f"Tensor total memory: {obj.numel() * element_size} bytes")
print(f"Tensor storage address: {storage.data_ptr()}")
print(f"Tensor is contiguous: {obj.is_contiguous()}")
elif isinstance(obj, np.ndarray):
print(f"Array shape: {obj.shape}")
print(f"Array dtype: {obj.dtype}")
print(f"Array size: {obj.size}")
print(f"Array nbytes: {obj.nbytes} bytes")
print(f"Array data address: {obj.ctypes.data}")
elif isinstance(obj, list):
print(f"List length: {len(obj)}")
print(f"List items memory: {sum(sys.getsizeof(item) for item in obj)} bytes")
def main():
device = select_device()
device_label = f"{device.type}" if device.index is None else f"{device.type}:{device.index}"
print("Interactive memory walkthrough. Press Enter when you're ready for each section.")
print(f"Using PyTorch device: {device_label}")
prompt_next("Start with the raw Python data?")
data = [[1, 2, 3], [4, 5, 6]]
print("\n=== Original Data ===")
print(f"Data: {data}")
np_array = np.array(data)
prompt_next("Inspect memory details for the original list?")
print_memory_usage(data, "Original list data")
prompt_next("Create tensors from the list data?")
x_tensor_data_cpu = torch.tensor(data)
x_tensor_list_cpu = torch.tensor([[1, 2, 3], [4, 5, 6]])
x_tensor_numpy_copy_cpu = torch.tensor(np_array)
x_tensor_numpy_view_cpu = torch.as_tensor(np_array)
print("\nCreated CPU tensors using different constructors.")
prompt_next("See memory details for tensor created from the existing list?")
print_memory_usage(x_tensor_data_cpu, "x_tensor_data_cpu")
prompt_next("See memory details for tensor created from inline list literal?")
print_memory_usage(x_tensor_list_cpu, "x_tensor_list_cpu")
prompt_next("See memory details for tensor copied from NumPy data (torch.tensor)?")
print_memory_usage(x_tensor_numpy_copy_cpu, "x_tensor_numpy_copy_cpu")
prompt_next("See memory details for tensor sharing NumPy storage (torch.as_tensor)?")
print_memory_usage(x_tensor_numpy_view_cpu, "x_tensor_numpy_view_cpu")
numpy_data_ptr = np_array.ctypes.data
tensor_storage_ptr_cpu = x_tensor_numpy_view_cpu.untyped_storage().data_ptr()
print(
"Shares storage with NumPy array (CPU view):",
tensor_storage_ptr_cpu == numpy_data_ptr,
)
if device.type != "cpu":
prompt_next(f"Move tensors to {device_label}?")
x_tensor_data_device = x_tensor_data_cpu.to(device)
x_tensor_list_device = x_tensor_list_cpu.to(device)
x_tensor_numpy_copy_device = x_tensor_numpy_copy_cpu.to(device)
x_tensor_numpy_view_device = x_tensor_numpy_view_cpu.to(device)
print(f"\nMoved tensors to {device_label} for accelerator inspection.")
prompt_next(f"See memory details for tensor on {device_label} (from list data)?")
print_memory_usage(x_tensor_data_device, f"x_tensor_data_{device.type}")
prompt_next(f"See memory details for tensor on {device_label} (NumPy copy)?")
print_memory_usage(x_tensor_numpy_copy_device, f"x_tensor_numpy_copy_{device.type}")
prompt_next(f"See memory details for tensor on {device_label} (NumPy view converted)?")
print_memory_usage(x_tensor_numpy_view_device, f"x_tensor_numpy_view_{device.type}")
tensor_storage_ptr_device = x_tensor_numpy_view_device.untyped_storage().data_ptr()
print(
f"Shares storage with NumPy array on {device_label}:",
tensor_storage_ptr_device == numpy_data_ptr,
)
prompt_next("Compare the NumPy array used for tensor creation?")
print_memory_usage(np_array, "numpy array")
prompt_next("Check current Python process memory usage?")
print("\n=== Process Memory Info ===")
memory_info = get_memory_info()
print(f"RSS (Resident Set Size): {memory_info.rss / 1024 / 1024:.2f} MB")
print(f"VMS (Virtual Memory Size): {memory_info.vms / 1024 / 1024:.2f} MB")
prompt_next("Look at PyTorch-specific runtime memory info?")
print("\n=== PyTorch Memory Info ===")
if torch.cuda.is_available():
print("CUDA available: True")
print(f"CUDA memory allocated: {torch.cuda.memory_allocated() / 1024 / 1024:.2f} MB")
print(f"CUDA memory cached: {torch.cuda.memory_reserved() / 1024 / 1024:.2f} MB")
elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available() and torch.backends.mps.is_built() and hasattr(torch, "mps") and hasattr(torch.mps, "current_allocated_memory"):
print("MPS available: True")
print(f"MPS current allocated: {torch.mps.current_allocated_memory() / 1024 / 1024:.2f} MB")
if hasattr(torch.mps, "driver_allocated_memory"):
print(f"MPS driver allocated: {torch.mps.driver_allocated_memory() / 1024 / 1024:.2f} MB")
else:
print("CUDA/MPS not available - using CPU")
print("\nAll sections complete. Restart the script to review again.")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment