Last active
September 7, 2022 08:53
-
-
Save jvlmdr/619ace0facd2995ef27fd57c54510d24 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 37, | |
| "id": "4cfe585b", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import torch\n", | |
| "from torch.nn import functional as F" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 38, | |
| "id": "f67d97a4", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def baseline(a):\n", | |
| " return torch.linalg.matrix_norm(a, ord=2)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 39, | |
| "id": "3c0c6cd5", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "shape = (2048, 512)\n", | |
| "x_cpu = torch.randn(shape).cpu()\n", | |
| "x_gpu = x_cpu.clone().cuda()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 40, | |
| "id": "2065c2ab", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "15.5 ms ± 216 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "baseline(x_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 41, | |
| "id": "5da72f3b", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "25.4 ms ± 469 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "baseline(x_gpu)\n", | |
| "torch.cuda.synchronize()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 42, | |
| "id": "9580ab91", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def power_iter(a, u):\n", | |
| " u = u / torch.linalg.norm(u, 2)\n", | |
| " au = torch.tensordot(a, u, dims=1)\n", | |
| " aau = torch.tensordot(a.T, au, dims=1)\n", | |
| " return aau" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 43, | |
| "id": "b0c1228c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def power_method(n, a, u):\n", | |
| " for i in range(n):\n", | |
| " u = power_iter(a, u)\n", | |
| " return torch.sqrt(torch.linalg.norm(u))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 44, | |
| "id": "3cbc7919", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def power_iter_lr(a, u, v):\n", | |
| " v = v / torch.linalg.norm(v, 2)\n", | |
| " u = torch.tensordot(a, v, dims=1)\n", | |
| " u = u / torch.linalg.norm(u, 2)\n", | |
| " v = torch.tensordot(a.T, u, dims=1)\n", | |
| " return u, v" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 45, | |
| "id": "ba530707", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def power_method_lr(n, a, u, v):\n", | |
| " for i in range(n):\n", | |
| " u, v = power_iter_lr(a, u, v)\n", | |
| " v = v / torch.linalg.norm(v)\n", | |
| " return torch.dot(u, torch.tensordot(a, v, dims=1))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 46, | |
| "id": "d8931a08", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "u_cpu = torch.randn(shape[0]).cpu()\n", | |
| "u_gpu = u_cpu.clone().cuda()\n", | |
| "\n", | |
| "v_cpu = torch.randn(shape[1]).cpu()\n", | |
| "v_gpu = v_cpu.clone().cuda()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 47, | |
| "id": "2d756ca2", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(67.7760)" | |
| ] | |
| }, | |
| "execution_count": 47, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "baseline(x_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "b64c09f7", | |
| "metadata": {}, | |
| "source": [ | |
| "## Check accuracy of power method" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 48, | |
| "id": "0203270e", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(65.2768)" | |
| ] | |
| }, | |
| "execution_count": 48, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "power_method(10, x_cpu, v_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 49, | |
| "id": "16076001", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(66.9108)" | |
| ] | |
| }, | |
| "execution_count": 49, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "power_method(20, x_cpu, v_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 50, | |
| "id": "26310ab2", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(65.3707)" | |
| ] | |
| }, | |
| "execution_count": 50, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "power_method_lr(10, x_cpu, u_cpu, v_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 51, | |
| "id": "54ee3967", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(66.9255)" | |
| ] | |
| }, | |
| "execution_count": 51, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "power_method_lr(20, x_cpu, u_cpu, v_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "97ed0d53", | |
| "metadata": {}, | |
| "source": [ | |
| "## Check speed of power method" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 52, | |
| "id": "6212b50c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "num_iters = 10" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 53, | |
| "id": "c7c285bb", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "454 µs ± 5.63 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method(num_iters, x_cpu, v_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 54, | |
| "id": "ea7adf07", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "446 µs ± 6.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method(num_iters, x_gpu, v_gpu)\n", | |
| "torch.cuda.synchronize()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 55, | |
| "id": "9d8f9d04", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "542 µs ± 8.77 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method_lr(num_iters, x_cpu, u_cpu, v_cpu)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 56, | |
| "id": "f3c95486", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "599 µs ± 6.15 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method_lr(num_iters, x_gpu, u_gpu, v_gpu)\n", | |
| "torch.cuda.synchronize()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "987aae4f", | |
| "metadata": {}, | |
| "source": [ | |
| "## Check accuracy of gradients" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 57, | |
| "id": "ef2c2e5d", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "x_cpu.requires_grad = True\n", | |
| "x_gpu.requires_grad = True\n", | |
| "tol = 1e-6" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 58, | |
| "id": "906b771a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "x_cpu.sum().backward() # Ensure that x_cpu.grad is not None." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 59, | |
| "id": "7203ae4c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "baseline(x_cpu).backward()\n", | |
| "grad_baseline = x_cpu.grad.clone()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 60, | |
| "id": "19154881", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(20.9058)" | |
| ] | |
| }, | |
| "execution_count": 60, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "power_method(10, x_cpu, v_cpu).backward()\n", | |
| "grad_power = x_cpu.grad.clone()\n", | |
| "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 61, | |
| "id": "3de72778", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(6.3677)" | |
| ] | |
| }, | |
| "execution_count": 61, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "power_method(100, x_cpu, v_cpu).backward()\n", | |
| "grad_power = x_cpu.grad.clone()\n", | |
| "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 62, | |
| "id": "17c6ee80", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(7.1975e-05)" | |
| ] | |
| }, | |
| "execution_count": 62, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "power_method(1000, x_cpu, v_cpu).backward()\n", | |
| "grad_power = x_cpu.grad.clone()\n", | |
| "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 63, | |
| "id": "84c2f0a7", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(20.7808)" | |
| ] | |
| }, | |
| "execution_count": 63, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "power_method_lr(10, x_cpu, u_cpu, v_cpu).backward()\n", | |
| "grad_power = x_cpu.grad.clone()\n", | |
| "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 64, | |
| "id": "e6235bb0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(6.3527)" | |
| ] | |
| }, | |
| "execution_count": 64, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "power_method_lr(100, x_cpu, u_cpu, v_cpu).backward()\n", | |
| "grad_power = x_cpu.grad.clone()\n", | |
| "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 65, | |
| "id": "f8367c46", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor(7.0777e-05)" | |
| ] | |
| }, | |
| "execution_count": 65, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "x_cpu.grad.zero_()\n", | |
| "power_method_lr(1000, x_cpu, u_cpu, v_cpu).backward()\n", | |
| "grad_power = x_cpu.grad.clone()\n", | |
| "torch.mean(torch.abs(grad_baseline - grad_power) / (torch.abs(grad_baseline) + tol))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "575f9e70", | |
| "metadata": {}, | |
| "source": [ | |
| "## Check speed of gradients" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 66, | |
| "id": "4e4e8983", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "num_iters = 100 # Still seems too low for gradient accuracy." | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 67, | |
| "id": "ed86752f", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "25.9 ms ± 1.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "baseline(x_cpu).backward()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 68, | |
| "id": "324ecf47", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "25 ms ± 990 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "baseline(x_gpu).backward()\n", | |
| "torch.cuda.synchronize()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 69, | |
| "id": "536eeaf9", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "93.1 ms ± 7.64 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method(num_iters, x_cpu, v_cpu).backward()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 70, | |
| "id": "8e90e3e4", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "17.4 ms ± 248 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method(num_iters, x_gpu, v_gpu).backward()\n", | |
| "torch.cuda.synchronize()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 71, | |
| "id": "1cbae1bd", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "93.1 ms ± 12.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method_lr(num_iters, x_cpu, u_cpu, v_cpu).backward()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 72, | |
| "id": "b7eb9901", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "23.2 ms ± 70.7 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "%%timeit\n", | |
| "power_method_lr(num_iters, x_gpu, u_gpu, v_gpu).backward()\n", | |
| "torch.cuda.synchronize()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "id": "3c2843e0", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.10" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment