Skip to content

Instantly share code, notes, and snippets.

@jvlmdr
Last active September 7, 2022 08:53
Show Gist options
  • Select an option

  • Save jvlmdr/619ace0facd2995ef27fd57c54510d24 to your computer and use it in GitHub Desktop.

Select an option

Save jvlmdr/619ace0facd2995ef27fd57c54510d24 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 37,
"id": "4cfe585b",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"from torch.nn import functional as F"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "f67d97a4",
"metadata": {},
"outputs": [],
"source": [
"def baseline(a):\n",
" return torch.linalg.matrix_norm(a, ord=2)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "3c0c6cd5",
"metadata": {},
"outputs": [],
"source": [
"shape = (2048, 512)\n",
"x_cpu = torch.randn(shape).cpu()\n",
"x_gpu = x_cpu.clone().cuda()"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "2065c2ab",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"15.5 ms ± 216 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"baseline(x_cpu)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "5da72f3b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25.4 ms ± 469 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"baseline(x_gpu)\n",
"torch.cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "9580ab91",
"metadata": {},
"outputs": [],
"source": [
"def power_iter(a, u):\n",
" u = u / torch.linalg.norm(u, 2)\n",
" au = torch.tensordot(a, u, dims=1)\n",
" aau = torch.tensordot(a.T, au, dims=1)\n",
" return aau"
]
},
{
"cell_type": "code",
"execution_count": 43,
"id": "b0c1228c",
"metadata": {},
"outputs": [],
"source": [
"def power_method(n, a, u):\n",
" for i in range(n):\n",
" u = power_iter(a, u)\n",
" return torch.sqrt(torch.linalg.norm(u))"
]
},
{
"cell_type": "code",
"execution_count": 44,
"id": "3cbc7919",
"metadata": {},
"outputs": [],
"source": [
"def power_iter_lr(a, u, v):\n",
" v = v / torch.linalg.norm(v, 2)\n",
" u = torch.tensordot(a, v, dims=1)\n",
" u = u / torch.linalg.norm(u, 2)\n",
" v = torch.tensordot(a.T, u, dims=1)\n",
" return u, v"
]
},
{
"cell_type": "code",
"execution_count": 45,
"id": "ba530707",
"metadata": {},
"outputs": [],
"source": [
"def power_method_lr(n, a, u, v):\n",
" for i in range(n):\n",
" u, v = power_iter_lr(a, u, v)\n",
" v = v / torch.linalg.norm(v)\n",
" return torch.dot(u, torch.tensordot(a, v, dims=1))"
]
},
{
"cell_type": "code",
"execution_count": 46,
"id": "d8931a08",
"metadata": {},
"outputs": [],
"source": [
"u_cpu = torch.randn(shape[0]).cpu()\n",
"u_gpu = u_cpu.clone().cuda()\n",
"\n",
"v_cpu = torch.randn(shape[1]).cpu()\n",
"v_gpu = v_cpu.clone().cuda()"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "2d756ca2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(67.7760)"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"baseline(x_cpu)"
]
},
{
"cell_type": "markdown",
"id": "b64c09f7",
"metadata": {},
"source": [
"## Check accuracy of power method"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "0203270e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(65.2768)"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"power_method(10, x_cpu, v_cpu)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "16076001",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(66.9108)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"power_method(20, x_cpu, v_cpu)"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "26310ab2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(65.3707)"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"power_method_lr(10, x_cpu, u_cpu, v_cpu)"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "54ee3967",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(66.9255)"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"power_method_lr(20, x_cpu, u_cpu, v_cpu)"
]
},
{
"cell_type": "markdown",
"id": "97ed0d53",
"metadata": {},
"source": [
"## Check speed of power method"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "6212b50c",
"metadata": {},
"outputs": [],
"source": [
"num_iters = 10"
]
},
{
"cell_type": "code",
"execution_count": 53,
"id": "c7c285bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"454 µs ± 5.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method(num_iters, x_cpu, v_cpu)"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "ea7adf07",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"446 µs ± 6.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method(num_iters, x_gpu, v_gpu)\n",
"torch.cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "9d8f9d04",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"542 µs ± 8.77 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method_lr(num_iters, x_cpu, u_cpu, v_cpu)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"id": "f3c95486",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"599 µs ± 6.15 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method_lr(num_iters, x_gpu, u_gpu, v_gpu)\n",
"torch.cuda.synchronize()"
]
},
{
"cell_type": "markdown",
"id": "987aae4f",
"metadata": {},
"source": [
"## Check accuracy of gradients"
]
},
{
"cell_type": "code",
"execution_count": 57,
"id": "ef2c2e5d",
"metadata": {},
"outputs": [],
"source": [
"x_cpu.requires_grad = True\n",
"x_gpu.requires_grad = True\n",
"tol = 1e-6"
]
},
{
"cell_type": "code",
"execution_count": 58,
"id": "906b771a",
"metadata": {},
"outputs": [],
"source": [
"x_cpu.sum().backward() # Ensure that x_cpu.grad is not None."
]
},
{
"cell_type": "code",
"execution_count": 59,
"id": "7203ae4c",
"metadata": {},
"outputs": [],
"source": [
"x_cpu.grad.zero_()\n",
"baseline(x_cpu).backward()\n",
"grad_baseline = x_cpu.grad.clone()"
]
},
{
"cell_type": "code",
"execution_count": 60,
"id": "19154881",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(20.9058)"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_cpu.grad.zero_()\n",
"power_method(10, x_cpu, v_cpu).backward()\n",
"grad_power = x_cpu.grad.clone()\n",
"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
]
},
{
"cell_type": "code",
"execution_count": 61,
"id": "3de72778",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(6.3677)"
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_cpu.grad.zero_()\n",
"power_method(100, x_cpu, v_cpu).backward()\n",
"grad_power = x_cpu.grad.clone()\n",
"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
]
},
{
"cell_type": "code",
"execution_count": 62,
"id": "17c6ee80",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(7.1975e-05)"
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_cpu.grad.zero_()\n",
"power_method(1000, x_cpu, v_cpu).backward()\n",
"grad_power = x_cpu.grad.clone()\n",
"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
]
},
{
"cell_type": "code",
"execution_count": 63,
"id": "84c2f0a7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(20.7808)"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_cpu.grad.zero_()\n",
"power_method_lr(10, x_cpu, u_cpu, v_cpu).backward()\n",
"grad_power = x_cpu.grad.clone()\n",
"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
]
},
{
"cell_type": "code",
"execution_count": 64,
"id": "e6235bb0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(6.3527)"
]
},
"execution_count": 64,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_cpu.grad.zero_()\n",
"power_method_lr(100, x_cpu, u_cpu, v_cpu).backward()\n",
"grad_power = x_cpu.grad.clone()\n",
"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
]
},
{
"cell_type": "code",
"execution_count": 65,
"id": "f8367c46",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"tensor(7.0777e-05)"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"x_cpu.grad.zero_()\n",
"power_method_lr(1000, x_cpu, u_cpu, v_cpu).backward()\n",
"grad_power = x_cpu.grad.clone()\n",
"torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))"
]
},
{
"cell_type": "markdown",
"id": "575f9e70",
"metadata": {},
"source": [
"## Check speed of gradients"
]
},
{
"cell_type": "code",
"execution_count": 66,
"id": "4e4e8983",
"metadata": {},
"outputs": [],
"source": [
"num_iters = 100 # Still seems too low for gradient accuracy."
]
},
{
"cell_type": "code",
"execution_count": 67,
"id": "ed86752f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25.9 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"baseline(x_cpu).backward()"
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "324ecf47",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25 ms ± 990 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"baseline(x_gpu).backward()\n",
"torch.cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": 69,
"id": "536eeaf9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"93.1 ms ± 7.64 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method(num_iters, x_cpu, v_cpu).backward()"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "8e90e3e4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"17.4 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method(num_iters, x_gpu, v_gpu).backward()\n",
"torch.cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"id": "1cbae1bd",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"93.1 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method_lr(num_iters, x_cpu, u_cpu, v_cpu).backward()"
]
},
{
"cell_type": "code",
"execution_count": 72,
"id": "b7eb9901",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"23.2 ms ± 70.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
]
}
],
"source": [
"%%timeit\n",
"power_method_lr(num_iters, x_gpu, u_gpu, v_gpu).backward()\n",
"torch.cuda.synchronize()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3c2843e0",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment