Created
May 27, 2025 17:01
-
-
Save hvaara/34afb8fb3fc1422c319a5fd972f8fc3a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 25, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\u001b[2m2025-05-27T16:50:07.642175Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mtorch.__version__ = '2.8.0a0+gitc52a002'\u001b[0m\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "import time\n", | |
| "import torch\n", | |
| "import logging\n", | |
| "import datetime\n", | |
| "import structlog\n", | |
| "import os\n", | |
| "\n", | |
| "import torch.nn as nn\n", | |
| "import torch.nn.functional as F\n", | |
| "\n", | |
| "OPERATION_INFO = (1 << 0)\n", | |
| "COPY_INFO = (1 << 1)\n", | |
| "CPU_FALLBACK_INFO = (1 << 2)\n", | |
| "\n", | |
| "ALL_STATS = (1 << 3)\n", | |
| "OPERATION_STATS = (1 << 4)\n", | |
| "COPY_STATS = (1 << 5)\n", | |
| "CPU_FALLBACK_STATS = (1 << 6)\n", | |
| "\n", | |
| "INCLUDE_GPU_TIME = (1 << 7)\n", | |
| "INCLUDE_KERNEL_TIME = (1 << 8)\n", | |
| "INCLUDE_BUFFER_ID = (1 << 9)\n", | |
| "\n", | |
| "LOG_COUNT = (INCLUDE_BUFFER_ID << 1) - 1\n", | |
| "\n", | |
| "\n", | |
| "ALL_FLAGS = OPERATION_INFO+COPY_INFO+CPU_FALLBACK_INFO+INCLUDE_GPU_TIME+INCLUDE_KERNEL_TIME+INCLUDE_BUFFER_ID\n", | |
| "\n", | |
| "# os.environ[\"PYTORCH_MPS_LOG_PROFILE_INFO\"] = str(ALL_FLAGS)\n", | |
| "# os.environ[\"PYTORCH_DEBUG_MPS_ALLOCATOR\"] = \"1\"\n", | |
| "\n", | |
| "DEVICE = \"mps\"\n", | |
| "\n", | |
| "MEMLEAK_DETECTED = 1\n", | |
| "NO_MEMLEAK_DETECTED = 2\n", | |
| "\n", | |
| "structlog.configure(\n", | |
| " processors=[\n", | |
| " structlog.contextvars.merge_contextvars,\n", | |
| " structlog.processors.add_log_level,\n", | |
| " structlog.processors.StackInfoRenderer(),\n", | |
| " structlog.dev.set_exc_info,\n", | |
| " structlog.processors.TimeStamper(fmt=\"iso\", utc=True),\n", | |
| " structlog.dev.ConsoleRenderer()\n", | |
| " ],\n", | |
| " wrapper_class=structlog.make_filtering_bound_logger(logging.NOTSET),\n", | |
| " context_class=dict,\n", | |
| " logger_factory=structlog.PrintLoggerFactory(),\n", | |
| " cache_logger_on_first_use=False\n", | |
| ")\n", | |
| "logger = structlog.get_logger()\n", | |
| "\n", | |
| "logger.info(f\"{torch.__version__ = }\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 26, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def print_allocated_memory(i=None, iters=None):\n", | |
| " log_line = \"Memory info\"\n", | |
| " if i is not None and iters is not None:\n", | |
| " log_line += f\" ({i}/{iters})\"\n", | |
| " logger.info(log_line, current_allocated_memory=torch.mps.current_allocated_memory(), driver_allocated_memory=torch.mps.driver_allocated_memory())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 27, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def empty_cache():\n", | |
| " print_allocated_memory()\n", | |
| " torch.mps.empty_cache()\n", | |
| " time.sleep(5)\n", | |
| " logger.info(f\"MPS cache cleared.\")\n", | |
| " print_allocated_memory()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 28, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def benchmark(\n", | |
| " model,\n", | |
| " input,\n", | |
| " should_backward=False,\n", | |
| " should_print_model=True,\n", | |
| " should_empty_cache=True,\n", | |
| " debug_n_iters=100,\n", | |
| " memory_threshold=50 * 1024**3,\n", | |
| " iters=1000,\n", | |
| " is_torch_model=True):\n", | |
| " ret = NO_MEMLEAK_DETECTED\n", | |
| "\n", | |
| " timings = []\n", | |
| " \n", | |
| " if is_torch_model:\n", | |
| " model = model.to(DEVICE)\n", | |
| " input = input.to(DEVICE)\n", | |
| " \n", | |
| " if should_print_model:\n", | |
| " print(model)\n", | |
| "\n", | |
| " if should_empty_cache:\n", | |
| " empty_cache()\n", | |
| "\n", | |
| " logger.info(\"Entering benchmark loop.\")\n", | |
| " for i in range(1, iters+1):\n", | |
| " start_time = time.time()\n", | |
| " output = model(input)\n", | |
| " if should_backward:\n", | |
| " loss = output.sum()\n", | |
| " loss.backward()\n", | |
| " end_time = time.time()\n", | |
| " elapsed_time = end_time - start_time\n", | |
| " timings.append(elapsed_time)\n", | |
| " \n", | |
| " if torch.mps.driver_allocated_memory() > memory_threshold:\n", | |
| " logger.warning(f\"torch.mps.driver_allocated_memory() > {memory_threshold/1024**3} GiB threshold reached.\")\n", | |
| " ret = MEMLEAK_DETECTED\n", | |
| " break\n", | |
| " if (i == 1 or i%debug_n_iters == 0) and i != iters:\n", | |
| " print_allocated_memory(i, iters)\n", | |
| " print_allocated_memory(i, iters)\n", | |
| " logger.info(\"Exited benchmark loop.\")\n", | |
| " timings = torch.Tensor(timings)\n", | |
| " logger.info(f\"Timings\", sum=torch.sum(timings).item(), mean=torch.mean(timings).item(), std=torch.std(timings).item())\n", | |
| " return ret" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Our familiar example" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=256, out_features=256, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:09.102338Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m1619476480\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.120981Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.121409Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m1619476480\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.121662Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.122249Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.127254Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.132191Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.137162Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.141286Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.145670Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.150201Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.154693Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.158882Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.162828Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.167163Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m1074004992\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.167516Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:14.168109Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m4.1001796489581466e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m1.9848786905640736e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.04100179672241211\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 29, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 32, 256, 256\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Reduce the channels and issue goes away" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=256, out_features=256, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:22.411000Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.434869Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.437309Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537134080\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m1619476480\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.438837Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.442094Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.451211Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.458664Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.465347Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.470943Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.477112Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.482423Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.487837Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.493330Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.497866Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.502322Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m570688512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.502605Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:27.503055Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.717253588954918e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m7.370045932475477e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.057172536849975586\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 30, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 2, 256, 256\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Increase the channels by 1 and we start seeing leaked bytes\n", | |
| "\n", | |
| "This section contains multiple examples, each increasing the channel count by 1." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=256, out_features=256, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:28.051557Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m50594816\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2693218304\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.063814Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.065323Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m50594816\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.065999Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.070187Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.081306Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.089523Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.096595Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.102586Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.108522Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.114268Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.119192Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.123973Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.128554Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.133161Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m100926464\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.133403Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:33.133985Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.971503196633421e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m9.584465442458168e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.05971503257751465\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 31, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 3, 256, 256\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=256, out_features=256, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:33.967250Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m67372032\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.972764Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.975176Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m67372032\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.975739Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.976712Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.985566Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.992857Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:38.998849Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.004684Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.010144Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.015467Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.020436Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.025351Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.030190Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.035078Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m134480896\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.035373Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:39.035829Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.461335240397602e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m2.6176043320447206e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.054613351821899414\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 32, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 4, 256, 256\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Eventually we hit our 75 GiB leak threshold as before." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=256, out_features=256, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:40.121548Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m84149248\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.125972Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.126351Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m84149248\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.126542Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.126957Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.132154Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.136570Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.141372Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.145519Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.149451Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.153962Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.158704Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.162878Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.167162Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.171428Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m168035328\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.171790Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:45.172456Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m4.0439128497382626e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m2.223329465778079e-05\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.04043912887573242\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 33, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 5, 256, 256\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Different topology again\n", | |
| "\n", | |
| "1st example produces no leaks" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=512, out_features=512, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:46.498332Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m85196800\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.500684Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.501809Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m85196800\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.502719Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.593059Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.597061Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.600685Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.604214Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.608266Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.612165Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.615970Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.619549Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.623229Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.626759Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.630374Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m152305664\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.630559Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:51.631060Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m0.0001243672304553911\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m0.0028319021221250296\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.12436723709106445\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 34, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 1, 512, 512\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Increase H and W by 1 from the example above and we start seeing leaks again." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=513, out_features=513, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:50:53.541760Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m68686336\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.547258Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.549233Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m68686336\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.550357Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.657237Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.661368Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.665148Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.669004Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.672582Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.676341Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.679970Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.683724Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.687608Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.691326Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.694962Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m136057600\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.695133Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:50:58.695635Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m0.00014083004498388618\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m0.003342921379953623\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.14083003997802734\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 35, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 64, 1, 513, 513\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "#### Different topology again\n", | |
| "\n", | |
| "Increasing H and W in this case does not produce leaks" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Sequential(\n", | |
| " (0): Linear(in_features=8192, out_features=8192, bias=True)\n", | |
| ")\n", | |
| "\u001b[2m2025-05-27T16:51:01.217889Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537133056\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.221584Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMPS cache cleared. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.223512Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m537133056\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m2156347392\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.224430Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mEntering benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.228886Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.242050Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (100/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.249042Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (200/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.255334Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (300/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.261302Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (400/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.266904Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (500/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.272457Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (600/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.277428Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (700/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.281962Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (800/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.286430Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (900/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.290771Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mMemory info (1000/1000) \u001b[0m \u001b[36mcurrent_allocated_memory\u001b[0m=\u001b[35m805568512\u001b[0m \u001b[36mdriver_allocated_memory\u001b[0m=\u001b[35m3230089216\u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.291093Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mExited benchmark loop. \u001b[0m\n", | |
| "\u001b[2m2025-05-27T16:51:06.291607Z\u001b[0m [\u001b[32m\u001b[1minfo \u001b[0m] \u001b[1mTimings \u001b[0m \u001b[36mmean\u001b[0m=\u001b[35m5.893349589314312e-05\u001b[0m \u001b[36mstd\u001b[0m=\u001b[35m0.00011335179442539811\u001b[0m \u001b[36msum\u001b[0m=\u001b[35m0.05893349647521973\u001b[0m\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "2" | |
| ] | |
| }, | |
| "execution_count": 36, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "N, C, H, W = 1, 1, 8192, 8192\n", | |
| "\n", | |
| "model = torch.nn.Sequential(\n", | |
| " torch.nn.Linear(H, W),\n", | |
| ")\n", | |
| "\n", | |
| "inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "model.to(DEVICE)\n", | |
| "\n", | |
| "benchmark(model, inputs)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Even with a 2 GiB tensor we don't see any leaks this time. The failure mode does not seem to be directly related to the tensor size, rather the tensor topology and size is what causes it." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# N, C, H, W = 1, 1, 2 * 8192, 2 * 8192\n", | |
| "\n", | |
| "# model = torch.nn.Sequential(\n", | |
| "# torch.nn.Linear(H, W),\n", | |
| "# )\n", | |
| "\n", | |
| "# inputs = torch.rand(N, C, H, W).to(DEVICE)\n", | |
| "# model.to(DEVICE)\n", | |
| "\n", | |
| "# benchmark(model, inputs, iters=500)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "pytorchdev", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment