Skip to content

Instantly share code, notes, and snippets.

@hvaara
Created May 27, 2025 17:01
Show Gist options
  • Select an option

  • Save hvaara/34afb8fb3fc1422c319a5fd972f8fc3a to your computer and use it in GitHub Desktop.

Select an option

Save hvaara/34afb8fb3fc1422c319a5fd972f8fc3a to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[2m2025-05-27T16:50:07.642175Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mtorch.__version__ = '2.8.0a0+gitc52a002'\u001b[0m\n"
]
}
],
"source": [
"import time\n",
"import torch\n",
"import logging\n",
"import datetime\n",
"import structlog\n",
"import os\n",
"\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
"OPERATION_INFO = (1 << 0)\n",
"COPY_INFO = (1 << 1)\n",
"CPU_FALLBACK_INFO = (1 << 2)\n",
"\n",
"ALL_STATS = (1 << 3)\n",
"OPERATION_STATS = (1 << 4)\n",
"COPY_STATS = (1 << 5)\n",
"CPU_FALLBACK_STATS = (1 << 6)\n",
"\n",
"INCLUDE_GPU_TIME = (1 << 7)\n",
"INCLUDE_KERNEL_TIME = (1 << 8)\n",
"INCLUDE_BUFFER_ID = (1 << 9)\n",
"\n",
"LOG_COUNT = (INCLUDE_BUFFER_ID << 1) - 1\n",
"\n",
"\n",
"ALL_FLAGS = OPERATION_INFO+COPY_INFO+CPU_FALLBACK_INFO+INCLUDE_GPU_TIME+INCLUDE_KERNEL_TIME+INCLUDE_BUFFER_ID\n",
"\n",
"# os.environ[\"PYTORCH_MPS_LOG_PROFILE_INFO\"] = str(ALL_FLAGS)\n",
"# os.environ[\"PYTORCH_DEBUG_MPS_ALLOCATOR\"] = \"1\"\n",
"\n",
"DEVICE = \"mps\"\n",
"\n",
"MEMLEAK_DETECTED = 1\n",
"NO_MEMLEAK_DETECTED = 2\n",
"\n",
"structlog.configure(\n",
" processors=[\n",
" structlog.contextvars.merge_contextvars,\n",
" structlog.processors.add_log_level,\n",
" structlog.processors.StackInfoRenderer(),\n",
" structlog.dev.set_exc_info,\n",
" structlog.processors.TimeStamper(fmt=\"iso\", utc=True),\n",
" structlog.dev.ConsoleRenderer()\n",
" ],\n",
" wrapper_class=structlog.make_filtering_bound_logger(logging.NOTSET),\n",
" context_class=dict,\n",
" logger_factory=structlog.PrintLoggerFactory(),\n",
" cache_logger_on_first_use=False\n",
")\n",
"logger = structlog.get_logger()\n",
"\n",
"logger.info(f\"{torch.__version__ = }\")"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def print_allocated_memory(i=None, iters=None):\n",
" log_line = \"Memory info\"\n",
" if i is not None and iters is not None:\n",
" log_line += f\" ({i}/{iters})\"\n",
" logger.info(log_line, current_allocated_memory=torch.mps.current_allocated_memory(), driver_allocated_memory=torch.mps.driver_allocated_memory())"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"def empty_cache():\n",
" print_allocated_memory()\n",
" torch.mps.empty_cache()\n",
" time.sleep(5)\n",
" logger.info(f\"MPS cache cleared.\")\n",
" print_allocated_memory()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
"def benchmark(\n",
" model,\n",
" input,\n",
" should_backward=False,\n",
" should_print_model=True,\n",
" should_empty_cache=True,\n",
" debug_n_iters=100,\n",
" memory_threshold=50 * 1024**3,\n",
" iters=1000,\n",
" is_torch_model=True):\n",
" ret = NO_MEMLEAK_DETECTED\n",
"\n",
" timings = []\n",
" \n",
" if is_torch_model:\n",
" model = model.to(DEVICE)\n",
" input = input.to(DEVICE)\n",
" \n",
" if should_print_model:\n",
" print(model)\n",
"\n",
" if should_empty_cache:\n",
" empty_cache()\n",
"\n",
" logger.info(\"Entering benchmark loop.\")\n",
" for i in range(1, iters+1):\n",
" start_time = time.time()\n",
" output = model(input)\n",
" if should_backward:\n",
" loss = output.sum()\n",
" loss.backward()\n",
" end_time = time.time()\n",
" elapsed_time = end_time - start_time\n",
" timings.append(elapsed_time)\n",
" \n",
" if torch.mps.driver_allocated_memory() > memory_threshold:\n",
" logger.warning(f\"torch.mps.driver_allocated_memory() > {memory_threshold/1024**3} GiB threshold reached.\")\n",
" ret = MEMLEAK_DETECTED\n",
" break\n",
" if (i == 1 or i%debug_n_iters == 0) and i != iters:\n",
" print_allocated_memory(i, iters)\n",
" print_allocated_memory(i, iters)\n",
" logger.info(\"Exited benchmark loop.\")\n",
" timings = torch.Tensor(timings)\n",
" logger.info(f\"Timings\", sum=torch.sum(timings).item(), mean=torch.mean(timings).item(), std=torch.std(timings).item())\n",
" return ret"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Our familiar example"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=256, out_features=256, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:09.102338Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m1619476480\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.120981Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.121409Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m1619476480\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.121662Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.122249Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.127254Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.132191Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.137162Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.141286Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.145670Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.150201Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.154693Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.158882Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.162828Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.167163Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.167516Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:14.168109Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m4.1001796489581466e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m1.9848786905640736e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.04100179672241211\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 32, 256, 256\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Reduce the channels and issue goes away"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=256, out_features=256, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:22.411000Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.434869Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.437309Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m1619476480\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.438837Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.442094Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.451211Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.458664Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.465347Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.470943Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.477112Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.482423Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.487837Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.493330Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.497866Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.502322Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.502605Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:27.503055Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.717253588954918e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m7.370045932475477e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.057172536849975586\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 2, 256, 256\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Increase the channels by 1 and we start seeing leaked bytes\n",
"\n",
"This section contains multiple examples, each increasing the channel count by 1."
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=256, out_features=256, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:28.051557Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m50594816\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.063814Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.065323Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m50594816\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.065999Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.070187Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.081306Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.089523Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.096595Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.102586Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.108522Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.114268Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.119192Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.123973Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.128554Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.133161Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.133403Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:33.133985Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.971503196633421e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m9.584465442458168e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.05971503257751465\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 3, 256, 256\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=256, out_features=256, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:33.967250Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m67372032\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.972764Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.975176Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m67372032\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.975739Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.976712Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.985566Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.992857Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:38.998849Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.004684Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.010144Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.015467Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.020436Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.025351Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.030190Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.035078Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.035373Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:39.035829Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.461335240397602e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m2.6176043320447206e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.054613351821899414\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 4, 256, 256\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Eventually we hit our 75 GiB leak threshold as before."
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=256, out_features=256, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:40.121548Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m84149248\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.125972Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.126351Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m84149248\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.126542Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.126957Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.132154Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.136570Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.141372Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.145519Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.149451Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.153962Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.158704Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.162878Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.167162Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.171428Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.171790Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:45.172456Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m4.0439128497382626e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m2.223329465778079e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.04043912887573242\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 5, 256, 256\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Different topology again\n",
"\n",
"1st example produces no leaks"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=512, out_features=512, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:46.498332Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m85196800\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.500684Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.501809Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m85196800\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.502719Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.593059Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.597061Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.600685Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.604214Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.608266Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.612165Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.615970Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.619549Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.623229Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.626759Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.630374Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.630559Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:51.631060Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m0.0001243672304553911\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m0.0028319021221250296\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.12436723709106445\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 1, 512, 512\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Increase H and W by 1 from the example above and we start seeing leaks again."
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=513, out_features=513, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:50:53.541760Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m68686336\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.547258Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.549233Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m68686336\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.550357Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.657237Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.661368Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.665148Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.669004Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.672582Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.676341Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.679970Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.683724Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.687608Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.691326Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.694962Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.695133Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:50:58.695635Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m0.00014083004498388618\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m0.003342921379953623\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.14083003997802734\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 64, 1, 513, 513\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Different topology again\n",
"\n",
"Increasing H and W in this case does not produce leaks"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Sequential(\n",
" (0): Linear(in_features=8192, out_features=8192, bias=True)\n",
")\n",
"\u001b[2m2025-05-27T16:51:01.217889Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537133056\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.221584Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.223512Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537133056\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.224430Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.228886Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.242050Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.249042Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.255334Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.261302Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.266904Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.272457Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.277428Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.281962Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.286430Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.290771Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.291093Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n",
"\u001b[2m2025-05-27T16:51:06.291607Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.893349589314312e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m0.00011335179442539811\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.05893349647521973\u001b[0m\n"
]
},
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"N, C, H, W = 1, 1, 8192, 8192\n",
"\n",
"model = torch.nn.Sequential(\n",
" torch.nn.Linear(H, W),\n",
")\n",
"\n",
"inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"model.to(DEVICE)\n",
"\n",
"benchmark(model, inputs)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Even with a 2 GiB tensor we don't see any leaks this time. The failure mode does not seem to be directly related to the tensor size, rather the tensor topology and size is what causes it."
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# N, C, H, W = 1, 1, 2 * 8192, 2 * 8192\n",
"\n",
"# model = torch.nn.Sequential(\n",
"# torch.nn.Linear(H, W),\n",
"# )\n",
"\n",
"# inputs = torch.rand(N, C, H, W).to(DEVICE)\n",
"# model.to(DEVICE)\n",
"\n",
"# benchmark(model, inputs, iters=500)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "pytorchdev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment