Created
March 20, 2020 16:55
-
-
Save rsk2327/d3ceb4e30c6c06092c6e7ddfa4691e3a to your computer and use it in GitHub Desktop.
Stacked RNNs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": {}, | |
| "colab_type": "code", | |
| "id": "cDur3CYiI0mP" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import torch\n", | |
| "import torchtext\n", | |
| "from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, PackedSequence\n", | |
| "from torch import nn\n", | |
| "\n", | |
| "from torch import Tensor, dot, matmul\n", | |
| "\n", | |
| "import torch.nn.functional as F" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab_type": "text", | |
| "id": "gxiu3OeWy82W" | |
| }, | |
| "source": [ | |
| "## **Stacked RNNs**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": {}, | |
| "colab_type": "code", | |
| "id": "5X-sRoHRwT_t" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "# Defining the RNN layer\n", | |
| "rnn= nn.RNN(input_size=3, hidden_size=3, num_layers = 2, bias = True, batch_first=True, bidirectional=False)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 230, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 51 | |
| }, | |
| "colab_type": "code", | |
| "id": "Uw_LFS22zIdJ", | |
| "outputId": "23eeb0a9-876b-4587-cdab-64f8fcc49284" | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Out all shape : torch.Size([1, 4, 3])\n", | |
| "Out last shape : torch.Size([2, 1, 3])\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "out_all, out_last = rnn(seq)\n", | |
| "\n", | |
| "print(f\"Out all shape : {out_all.shape}\")\n", | |
| "\n", | |
| "print(f\"Out last shape : {out_last.shape}\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 231, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 85 | |
| }, | |
| "colab_type": "code", | |
| "id": "L1IJ49-3zOC0", | |
| "outputId": "3db5077a-4dd6-476f-f200-b8b9f0c78f6d" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor([[[ 0.0348, -0.2825, 0.5068],\n", | |
| " [-0.0356, -0.2683, 0.6636],\n", | |
| " [ 0.0149, -0.2029, 0.6910],\n", | |
| " [ 0.0190, -0.2103, 0.6927]]], grad_fn=<TransposeBackward1>)" | |
| ] | |
| }, | |
| "execution_count": 231, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "out_all" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 232, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 68 | |
| }, | |
| "colab_type": "code", | |
| "id": "hfw_S7AEzTYS", | |
| "outputId": "793109ba-e910-4e8a-f090-b1d5354bd6a7" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor([[[ 0.7414, 0.9353, 0.8858]],\n", | |
| "\n", | |
| " [[ 0.0190, -0.2103, 0.6927]]], grad_fn=<StackBackward>)" | |
| ] | |
| }, | |
| "execution_count": 232, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "out_last" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 233, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 289 | |
| }, | |
| "colab_type": "code", | |
| "id": "nWfdQczczVBs", | |
| "outputId": "56fe11ac-2684-4ae8-c7f9-0f84ad563e8f" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "OrderedDict([('weight_ih_l0', tensor([[ 0.0130, 0.2709, -0.3058],\n", | |
| " [-0.4475, 0.3193, 0.4943],\n", | |
| " [ 0.2080, 0.0944, -0.2989]])),\n", | |
| " ('weight_hh_l0', tensor([[ 0.4855, 0.0833, -0.1622],\n", | |
| " [-0.5323, -0.1938, 0.3005],\n", | |
| " [ 0.1978, 0.5410, 0.3437]])),\n", | |
| " ('bias_ih_l0', tensor([-0.1454, 0.4145, -0.2442])),\n", | |
| " ('bias_hh_l0', tensor([0.3113, 0.5509, 0.5318])),\n", | |
| " ('weight_ih_l1', tensor([[ 0.0108, 0.3439, 0.4292],\n", | |
| " [-0.2595, -0.0296, 0.3443],\n", | |
| " [ 0.4159, 0.0034, 0.0543]])),\n", | |
| " ('weight_hh_l1', tensor([[-0.5193, 0.3936, -0.3125],\n", | |
| " [-0.5299, 0.5054, 0.1984],\n", | |
| " [ 0.4348, -0.4416, -0.1205]])),\n", | |
| " ('bias_ih_l1', tensor([-0.1728, -0.0305, 0.5360])),\n", | |
| " ('bias_hh_l1', tensor([-0.2146, -0.2945, -0.0553]))])" | |
| ] | |
| }, | |
| "execution_count": 233, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "rnn.state_dict()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab_type": "text", | |
| "id": "eriCaO5VzjPe" | |
| }, | |
| "source": [ | |
| "### **Computing Outputs - Layer 1**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 245, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 85 | |
| }, | |
| "colab_type": "code", | |
| "id": "e5PpKyp7zceP", | |
| "outputId": "1d0f777f-1b4b-4499-e6c8-866b05d711a4" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[tensor([[0.1430, 0.8696, 0.2832]], grad_fn=<TanhBackward>),\n", | |
| " tensor([[0.4706, 0.9036, 0.7538]], grad_fn=<TanhBackward>),\n", | |
| " tensor([[0.7066, 0.8677, 0.9102]], grad_fn=<TanhBackward>),\n", | |
| " tensor([[0.7414, 0.9353, 0.8858]], grad_fn=<TanhBackward>)]" | |
| ] | |
| }, | |
| "execution_count": 245, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Extracting the weights for RNN Layer 1\n", | |
| "wih_10 = rnn.weight_ih_l0\n", | |
| "whh_10 = rnn.weight_hh_l0\n", | |
| "\n", | |
| "bih_10 = rnn.bias_ih_l0\n", | |
| "bhh_10 = rnn.bias_hh_l0\n", | |
| "\n", | |
| "output_1 = []\n", | |
| "\n", | |
| "h_previous = torch.zeros([1,3]) # Since the hidden_size parameter is 3, all hidden states will have a shape of [1,3]\n", | |
| "\n", | |
| "for i in range(seq.shape[1]):\n", | |
| "\n", | |
| " x = seq[0][i]\n", | |
| " h_current = torch.tanh(Tensor(matmul(x,wih_10.T) + bih_10 + matmul(h_previous,whh_10.T) + bhh_10))\n", | |
| " h_previous = h_current\n", | |
| " output_1.append(h_current)\n", | |
| "\n", | |
| "output_1" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab_type": "text", | |
| "id": "7_V4L8jX0iAS" | |
| }, | |
| "source": [ | |
| "### **Computing Outputs - Layer 2**" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 252, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 85 | |
| }, | |
| "colab_type": "code", | |
| "id": "NDiktTrC0MeV", | |
| "outputId": "dcef8a79-b7a8-4b9c-dd79-13455120e191" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[tensor([[ 0.0348, -0.2825, 0.5068]], grad_fn=<TanhBackward>),\n", | |
| " tensor([[-0.0356, -0.2683, 0.6636]], grad_fn=<TanhBackward>),\n", | |
| " tensor([[ 0.0149, -0.2029, 0.6910]], grad_fn=<TanhBackward>),\n", | |
| " tensor([[ 0.0190, -0.2103, 0.6927]], grad_fn=<TanhBackward>)]" | |
| ] | |
| }, | |
| "execution_count": 252, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Extracting the weights for RNN Layer 1\n", | |
| "wih_11 = rnn.weight_ih_l1\n", | |
| "whh_11 = rnn.weight_hh_l1\n", | |
| "\n", | |
| "bih_11 = rnn.bias_ih_l1\n", | |
| "bhh_11 = rnn.bias_hh_l1\n", | |
| "\n", | |
| "output_2 = []\n", | |
| "\n", | |
| "h_previous = torch.zeros([1,3]) # Since the hidden_size parameter is 2, all hidden states will have a shape of [1,2]\n", | |
| "\n", | |
| "for i in range(seq.shape[1]):\n", | |
| " \n", | |
| " x = seq[0][i]\n", | |
| " h_current = torch.tanh(Tensor(matmul(output_1[i],wih_11.T) + bih_11 + matmul(h_previous,whh_11.T) + bhh_11))\n", | |
| " h_previous = h_current\n", | |
| " output_2.append(h_current)\n", | |
| "\n", | |
| "output_2" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 238, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 85 | |
| }, | |
| "colab_type": "code", | |
| "id": "_nx4VyeH0Ps2", | |
| "outputId": "cc562e28-0d56-49ba-c6ba-8aa750977e8c" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor([[[ 0.0348, -0.2825, 0.5068],\n", | |
| " [-0.0356, -0.2683, 0.6636],\n", | |
| " [ 0.0149, -0.2029, 0.6910],\n", | |
| " [ 0.0190, -0.2103, 0.6927]]], grad_fn=<TransposeBackward1>)" | |
| ] | |
| }, | |
| "execution_count": 238, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "out_all" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 239, | |
| "metadata": { | |
| "Collapsed": "false", | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 68 | |
| }, | |
| "colab_type": "code", | |
| "id": "wAk-XTe609qc", | |
| "outputId": "2e3cbbc5-b842-4291-af60-9d4abc5d102f" | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "tensor([[[ 0.7414, 0.9353, 0.8858]],\n", | |
| "\n", | |
| " [[ 0.0190, -0.2103, 0.6927]]], grad_fn=<StackBackward>)" | |
| ] | |
| }, | |
| "execution_count": 239, | |
| "metadata": { | |
| "tags": [] | |
| }, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "out_last" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "name": "Understanding RNNs.ipynb", | |
| "provenance": [] | |
| }, | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.4" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment