jvlmdr · September 7, 2022 08:53
diff --git a/power_method_grad.ipynb b/power_method_grad.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 37,
   "id": "4cfe585b",
   "metadata": {},
   "outputs": [],
   "source": [
    "import torch\n",
    "from torch.nn import functional as F"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "id": "f67d97a4",
   "metadata": {},
   "outputs": [],
   "source": [
    "def baseline(a):\n",
    "    return torch.linalg.matrix_norm(a, ord=2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "id": "3c0c6cd5",
   "metadata": {},
   "outputs": [],
   "source": [
    "shape = (2048, 512)\n",
    "x_cpu = torch.randn(shape).cpu()\n",
    "x_gpu = x_cpu.clone().cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "id": "2065c2ab",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "15.5 ms ± 216 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "baseline(x_cpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "id": "5da72f3b",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25.4 ms ± 469 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "baseline(x_gpu)\n",
    "torch.cuda.synchronize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "id": "9580ab91",
   "metadata": {},
   "outputs": [],
   "source": [
    "def power_iter(a, u):\n",
    "    u = u / torch.linalg.norm(u, 2)\n",
    "    au = torch.tensordot(a, u, dims=1)\n",
    "    aau = torch.tensordot(a.T, au, dims=1)\n",
    "    return aau"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "id": "b0c1228c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def power_method(n, a, u):\n",
    "    for i in range(n):\n",
    "        u = power_iter(a, u)\n",
    "    return torch.sqrt(torch.linalg.norm(u))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "id": "3cbc7919",
   "metadata": {},
   "outputs": [],
   "source": [
    "def power_iter_lr(a, u, v):\n",
    "    v = v / torch.linalg.norm(v, 2)\n",
    "    u = torch.tensordot(a, v, dims=1)\n",
    "    u = u / torch.linalg.norm(u, 2)\n",
    "    v = torch.tensordot(a.T, u, dims=1)\n",
    "    return u, v"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "id": "ba530707",
   "metadata": {},
   "outputs": [],
   "source": [
    "def power_method_lr(n, a, u, v):\n",
    "    for i in range(n):\n",
    "        u, v = power_iter_lr(a, u, v)\n",
    "    v = v / torch.linalg.norm(v)\n",
    "    return torch.dot(u, torch.tensordot(a, v, dims=1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "id": "d8931a08",
   "metadata": {},
   "outputs": [],
   "source": [
    "u_cpu = torch.randn(shape[0]).cpu()\n",
    "u_gpu = u_cpu.clone().cuda()\n",
    "\n",
    "v_cpu = torch.randn(shape[1]).cpu()\n",
    "v_gpu = v_cpu.clone().cuda()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "id": "2d756ca2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(67.7760)"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "baseline(x_cpu)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b64c09f7",
   "metadata": {},
   "source": [
    "## Check accuracy of power method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "id": "0203270e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(65.2768)"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "power_method(10, x_cpu, v_cpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "id": "16076001",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(66.9108)"
      ]
     },
     "execution_count": 49,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "power_method(20, x_cpu, v_cpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "id": "26310ab2",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(65.3707)"
      ]
     },
     "execution_count": 50,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "power_method_lr(10, x_cpu, u_cpu, v_cpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "id": "54ee3967",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(66.9255)"
      ]
     },
     "execution_count": 51,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "power_method_lr(20, x_cpu, u_cpu, v_cpu)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "97ed0d53",
   "metadata": {},
   "source": [
    "## Check speed of power method"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "id": "6212b50c",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_iters = 10"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "id": "c7c285bb",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "454 µs ± 5.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method(num_iters, x_cpu, v_cpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "id": "ea7adf07",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "446 µs ± 6.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method(num_iters, x_gpu, v_gpu)\n",
    "torch.cuda.synchronize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "id": "9d8f9d04",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "542 µs ± 8.77 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method_lr(num_iters, x_cpu, u_cpu, v_cpu)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 56,
   "id": "f3c95486",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "599 µs ± 6.15 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method_lr(num_iters, x_gpu, u_gpu, v_gpu)\n",
    "torch.cuda.synchronize()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "987aae4f",
   "metadata": {},
   "source": [
    "## Check accuracy of gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 57,
   "id": "ef2c2e5d",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_cpu.requires_grad = True\n",
    "x_gpu.requires_grad = True\n",
    "tol = 1e-6"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 58,
   "id": "906b771a",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_cpu.sum().backward()  # Ensure that x_cpu.grad is not None."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 59,
   "id": "7203ae4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "x_cpu.grad.zero_()\n",
    "baseline(x_cpu).backward()\n",
    "grad_baseline = x_cpu.grad.clone()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 60,
   "id": "19154881",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(20.9058)"
      ]
     },
     "execution_count": 60,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_cpu.grad.zero_()\n",
    "power_method(10, x_cpu, v_cpu).backward()\n",
    "grad_power = x_cpu.grad.clone()\n",
    "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "id": "3de72778",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(6.3677)"
      ]
     },
     "execution_count": 61,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_cpu.grad.zero_()\n",
    "power_method(100, x_cpu, v_cpu).backward()\n",
    "grad_power = x_cpu.grad.clone()\n",
    "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 62,
   "id": "17c6ee80",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(7.1975e-05)"
      ]
     },
     "execution_count": 62,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_cpu.grad.zero_()\n",
    "power_method(1000, x_cpu, v_cpu).backward()\n",
    "grad_power = x_cpu.grad.clone()\n",
    "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "id": "84c2f0a7",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(20.7808)"
      ]
     },
     "execution_count": 63,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_cpu.grad.zero_()\n",
    "power_method_lr(10, x_cpu, u_cpu, v_cpu).backward()\n",
    "grad_power = x_cpu.grad.clone()\n",
    "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 64,
   "id": "e6235bb0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(6.3527)"
      ]
     },
     "execution_count": 64,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_cpu.grad.zero_()\n",
    "power_method_lr(100, x_cpu, u_cpu, v_cpu).backward()\n",
    "grad_power = x_cpu.grad.clone()\n",
    "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 65,
   "id": "f8367c46",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "tensor(7.0777e-05)"
      ]
     },
     "execution_count": 65,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x_cpu.grad.zero_()\n",
    "power_method_lr(1000, x_cpu, u_cpu, v_cpu).backward()\n",
    "grad_power = x_cpu.grad.clone()\n",
    "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "575f9e70",
   "metadata": {},
   "source": [
    "## Check speed of gradients"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 66,
   "id": "4e4e8983",
   "metadata": {},
   "outputs": [],
   "source": [
    "num_iters = 100  # Still seems too low for gradient accuracy."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 67,
   "id": "ed86752f",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25.9 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "baseline(x_cpu).backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 68,
   "id": "324ecf47",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "25 ms ± 990 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "baseline(x_gpu).backward()\n",
    "torch.cuda.synchronize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 69,
   "id": "536eeaf9",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "93.1 ms ± 7.64 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method(num_iters, x_cpu, v_cpu).backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 70,
   "id": "8e90e3e4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "17.4 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method(num_iters, x_gpu, v_gpu).backward()\n",
    "torch.cuda.synchronize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 71,
   "id": "1cbae1bd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "93.1 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method_lr(num_iters, x_cpu, u_cpu, v_cpu).backward()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 72,
   "id": "b7eb9901",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "23.2 ms ± 70.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
     ]
    }
   ],
   "source": [
    "%%timeit\n",
    "power_method_lr(num_iters, x_gpu, u_gpu, v_gpu).backward()\n",
    "torch.cuda.synchronize()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3c2843e0",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 37,
	"id": "4cfe585b",
	"metadata": {},
	"outputs": [],
	"source": [
	"import torch\n",
	"from torch.nn import functional as F"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 38,
	"id": "f67d97a4",
	"metadata": {},
	"outputs": [],
	"source": [
	"def baseline(a):\n",
	" return torch.linalg.matrix_norm(a, ord=2)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 39,
	"id": "3c0c6cd5",
	"metadata": {},
	"outputs": [],
	"source": [
	"shape = (2048, 512)\n",
	"x_cpu = torch.randn(shape).cpu()\n",
	"x_gpu = x_cpu.clone().cuda()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 40,
	"id": "2065c2ab",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"15.5 ms ± 216 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"baseline(x_cpu)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 41,
	"id": "5da72f3b",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"25.4 ms ± 469 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"baseline(x_gpu)\n",
	"torch.cuda.synchronize()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 42,
	"id": "9580ab91",
	"metadata": {},
	"outputs": [],
	"source": [
	"def power_iter(a, u):\n",
	" u = u / torch.linalg.norm(u, 2)\n",
	" au = torch.tensordot(a, u, dims=1)\n",
	" aau = torch.tensordot(a.T, au, dims=1)\n",
	" return aau"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 43,
	"id": "b0c1228c",
	"metadata": {},
	"outputs": [],
	"source": [
	"def power_method(n, a, u):\n",
	" for i in range(n):\n",
	" u = power_iter(a, u)\n",
	" return torch.sqrt(torch.linalg.norm(u))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 44,
	"id": "3cbc7919",
	"metadata": {},
	"outputs": [],
	"source": [
	"def power_iter_lr(a, u, v):\n",
	" v = v / torch.linalg.norm(v, 2)\n",
	" u = torch.tensordot(a, v, dims=1)\n",
	" u = u / torch.linalg.norm(u, 2)\n",
	" v = torch.tensordot(a.T, u, dims=1)\n",
	" return u, v"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 45,
	"id": "ba530707",
	"metadata": {},
	"outputs": [],
	"source": [
	"def power_method_lr(n, a, u, v):\n",
	" for i in range(n):\n",
	" u, v = power_iter_lr(a, u, v)\n",
	" v = v / torch.linalg.norm(v)\n",
	" return torch.dot(u, torch.tensordot(a, v, dims=1))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 46,
	"id": "d8931a08",
	"metadata": {},
	"outputs": [],
	"source": [
	"u_cpu = torch.randn(shape[0]).cpu()\n",
	"u_gpu = u_cpu.clone().cuda()\n",
	"\n",
	"v_cpu = torch.randn(shape[1]).cpu()\n",
	"v_gpu = v_cpu.clone().cuda()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 47,
	"id": "2d756ca2",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(67.7760)"
	]
	},
	"execution_count": 47,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"baseline(x_cpu)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "b64c09f7",
	"metadata": {},
	"source": [
	"## Check accuracy of power method"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 48,
	"id": "0203270e",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(65.2768)"
	]
	},
	"execution_count": 48,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"power_method(10, x_cpu, v_cpu)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 49,
	"id": "16076001",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(66.9108)"
	]
	},
	"execution_count": 49,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"power_method(20, x_cpu, v_cpu)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 50,
	"id": "26310ab2",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(65.3707)"
	]
	},
	"execution_count": 50,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"power_method_lr(10, x_cpu, u_cpu, v_cpu)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 51,
	"id": "54ee3967",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(66.9255)"
	]
	},
	"execution_count": 51,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"power_method_lr(20, x_cpu, u_cpu, v_cpu)"
	]
	},
	{
	"cell_type": "markdown",
	"id": "97ed0d53",
	"metadata": {},
	"source": [
	"## Check speed of power method"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 52,
	"id": "6212b50c",
	"metadata": {},
	"outputs": [],
	"source": [
	"num_iters = 10"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 53,
	"id": "c7c285bb",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"454 µs ± 5.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method(num_iters, x_cpu, v_cpu)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 54,
	"id": "ea7adf07",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"446 µs ± 6.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method(num_iters, x_gpu, v_gpu)\n",
	"torch.cuda.synchronize()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 55,
	"id": "9d8f9d04",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"542 µs ± 8.77 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method_lr(num_iters, x_cpu, u_cpu, v_cpu)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 56,
	"id": "f3c95486",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"599 µs ± 6.15 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method_lr(num_iters, x_gpu, u_gpu, v_gpu)\n",
	"torch.cuda.synchronize()"
	]
	},
	{
	"cell_type": "markdown",
	"id": "987aae4f",
	"metadata": {},
	"source": [
	"## Check accuracy of gradients"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 57,
	"id": "ef2c2e5d",
	"metadata": {},
	"outputs": [],
	"source": [
	"x_cpu.requires_grad = True\n",
	"x_gpu.requires_grad = True\n",
	"tol = 1e-6"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 58,
	"id": "906b771a",
	"metadata": {},
	"outputs": [],
	"source": [
	"x_cpu.sum().backward() # Ensure that x_cpu.grad is not None."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 59,
	"id": "7203ae4c",
	"metadata": {},
	"outputs": [],
	"source": [
	"x_cpu.grad.zero_()\n",
	"baseline(x_cpu).backward()\n",
	"grad_baseline = x_cpu.grad.clone()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 60,
	"id": "19154881",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(20.9058)"
	]
	},
	"execution_count": 60,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x_cpu.grad.zero_()\n",
	"power_method(10, x_cpu, v_cpu).backward()\n",
	"grad_power = x_cpu.grad.clone()\n",
	"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 61,
	"id": "3de72778",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(6.3677)"
	]
	},
	"execution_count": 61,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x_cpu.grad.zero_()\n",
	"power_method(100, x_cpu, v_cpu).backward()\n",
	"grad_power = x_cpu.grad.clone()\n",
	"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 62,
	"id": "17c6ee80",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(7.1975e-05)"
	]
	},
	"execution_count": 62,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x_cpu.grad.zero_()\n",
	"power_method(1000, x_cpu, v_cpu).backward()\n",
	"grad_power = x_cpu.grad.clone()\n",
	"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 63,
	"id": "84c2f0a7",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(20.7808)"
	]
	},
	"execution_count": 63,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x_cpu.grad.zero_()\n",
	"power_method_lr(10, x_cpu, u_cpu, v_cpu).backward()\n",
	"grad_power = x_cpu.grad.clone()\n",
	"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 64,
	"id": "e6235bb0",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(6.3527)"
	]
	},
	"execution_count": 64,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x_cpu.grad.zero_()\n",
	"power_method_lr(100, x_cpu, u_cpu, v_cpu).backward()\n",
	"grad_power = x_cpu.grad.clone()\n",
	"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 65,
	"id": "f8367c46",
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"tensor(7.0777e-05)"
	]
	},
	"execution_count": 65,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"x_cpu.grad.zero_()\n",
	"power_method_lr(1000, x_cpu, u_cpu, v_cpu).backward()\n",
	"grad_power = x_cpu.grad.clone()\n",
	"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
	]
	},
	{
	"cell_type": "markdown",
	"id": "575f9e70",
	"metadata": {},
	"source": [
	"## Check speed of gradients"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 66,
	"id": "4e4e8983",
	"metadata": {},
	"outputs": [],
	"source": [
	"num_iters = 100 # Still seems too low for gradient accuracy."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 67,
	"id": "ed86752f",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"25.9 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"baseline(x_cpu).backward()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 68,
	"id": "324ecf47",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"25 ms ± 990 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"baseline(x_gpu).backward()\n",
	"torch.cuda.synchronize()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 69,
	"id": "536eeaf9",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"93.1 ms ± 7.64 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method(num_iters, x_cpu, v_cpu).backward()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 70,
	"id": "8e90e3e4",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"17.4 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method(num_iters, x_gpu, v_gpu).backward()\n",
	"torch.cuda.synchronize()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 71,
	"id": "1cbae1bd",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"93.1 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method_lr(num_iters, x_cpu, u_cpu, v_cpu).backward()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 72,
	"id": "b7eb9901",
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"23.2 ms ± 70.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
	]
	}
	],
	"source": [
	"%%timeit\n",
	"power_method_lr(num_iters, x_gpu, u_gpu, v_gpu).backward()\n",
	"torch.cuda.synchronize()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"id": "3c2843e0",
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3 (ipykernel)",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.10"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 5
	}
No results found