Created
November 18, 2025 12:44
-
-
Save AhmedCoolProjects/09c3bb6b1950f744047585e2ed629101 to your computer and use it in GitHub Desktop.
GRU application.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyO/9u22khfPwSifq4TSYWNn", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/AhmedCoolProjects/09c3bb6b1950f744047585e2ed629101/gru-application.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "LN11Q64X9vTo" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import matplotlib.pyplot as plt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "class GRU:\n", | |
| " def __init__(self, hidden_size, vocab_size, learning_rate=0.01):\n", | |
| " self.H = hidden_size\n", | |
| " self.V = vocab_size\n", | |
| " self.lr = learning_rate\n", | |
| "\n", | |
| " # Weights Initialization\n", | |
| " W_shape = (self.H, self.H + self.V)\n", | |
| "\n", | |
| " self.Wr = np.random.randn(*W_shape) * 0.01\n", | |
| " self.br = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " self.Wz = np.random.randn(*W_shape) * 0.01\n", | |
| " self.bz = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " self.Wh = np.random.randn(*W_shape) * 0.01\n", | |
| " self.bh = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " self.Wy = np.random.randn(self.V, self.H) * 0.01\n", | |
| " self.by = np.zeros((self.V, 1))\n", | |
| "\n", | |
| " # Gradients\n", | |
| " self.dWr, self.dWz, self.dWh, self.dWy = [None]*4\n", | |
| " self.dbr, self.dbz, self.dbh, self.dby = [None]*4\n", | |
| "\n", | |
| " def _sigmoid(self, z):\n", | |
| " return 1 / (1 + np.exp(-z))\n", | |
| "\n", | |
| " def _sigmoid_derivative(self, a):\n", | |
| " return a * (1 - a)\n", | |
| "\n", | |
| " def _tanh(self, z):\n", | |
| " return np.tanh(z)\n", | |
| "\n", | |
| " def _tanh_derivative(self, a):\n", | |
| " return 1 - a ** 2\n", | |
| "\n", | |
| " def _softmax(self, z):\n", | |
| " exp_z = np.exp(z - np.max(z))\n", | |
| " return exp_z / exp_z.sum(axis=0)\n", | |
| "\n", | |
| " def forward(self, inputs, h_prev):\n", | |
| " xs, hs, zs, ys = {}, {}, {}, {}\n", | |
| " rs_g, zs_g, h_cands = {}, {}, {}\n", | |
| " concat_inputs = {}\n", | |
| "\n", | |
| " hs[-1] = np.copy(h_prev)\n", | |
| "\n", | |
| " for t in range(len(inputs)):\n", | |
| " xs[t] = np.zeros((self.V, 1))\n", | |
| " xs[t][inputs[t]] = 1\n", | |
| "\n", | |
| " concat_inputs[t] = np.vstack((hs[t-1], xs[t]))\n", | |
| "\n", | |
| " rs_g[t] = self._sigmoid(np.dot(self.Wr, concat_inputs[t]) + self.br)\n", | |
| " zs_g[t] = self._sigmoid(np.dot(self.Wz, concat_inputs[t]) + self.bz)\n", | |
| " h_cands = self._tanh(np.dot(self.Wh, np.vstack((rs_g[t] * hs[t-1], xs[t]))) + self.bh)\n", | |
| "\n", | |
| " hs[t] = (1 - zs_g[t]) * hs[t-1] + zs_g[t] * h_cands\n", | |
| "\n", | |
| " # Final Prediction\n", | |
| " zs[t] = np.dot(self.Wy, hs[t]) + self.by\n", | |
| " ys[t] = self._softmax(zs[t])\n", | |
| "\n", | |
| " cache = (xs, hs, zs, ys, rs_g, zs_g, h_cands, concat_inputs)\n", | |
| " return ys, hs[len(inputs)-1], cache\n", | |
| "\n", | |
| " def compute_cost(self, ys, targets):\n", | |
| " total_cost = 0\n", | |
| " for t in range(len(targets)):\n", | |
| " prob = ys[t][targets[t], 0]\n", | |
| " total_cost += -np.log(prob + 1e-9)\n", | |
| " return total_cost / len(targets)\n", | |
| "\n", | |
| " def backpropagation(self, targets, cache):\n", | |
| " xs, hs, zs, ys, rs_g, zs_g, h_cands, concat_inputs = cache\n", | |
| " T = len(targets)\n", | |
| "\n", | |
| " self.dWr = np.zeros_like(self.Wr)\n", | |
| " self.dWz = np.zeros_like(self.Wz)\n", | |
| " self.dWh = np.zeros_like(self.Wh)\n", | |
| " self.dWy = np.zeros_like(self.Wy)\n", | |
| " self.dbr = np.zeros_like(self.br)\n", | |
| " self.dbz = np.zeros_like(self.bz)\n", | |
| " self.dbh = np.zeros_like(self.bh)\n", | |
| " self.dby = np.zeros_like(self.by)\n", | |
| " dh_next = np.zeros_like(hs[0])\n", | |
| "\n", | |
| " for t in reversed(range(T)):\n", | |
| " dy = np.copy(ys[t])\n", | |
| " dy[targets[t]] -= 1\n", | |
| "\n", | |
| " self.dWy += np.dot(dy, hs[t].T)\n", | |
| " self.dby += dy\n", | |
| "\n", | |
| " dh = np.dot(self.Wy.T, dy) + dh_next\n", | |
| "\n", | |
| " dz = dh * (h_cands[t] - hs[t-1]) * self._sigmoid_derivative(zs_g[t])\n", | |
| " self.dWz += np.dot(dz, concat_inputs[t].T)\n", | |
| " self.dbz += dz\n", | |
| "\n", | |
| " dh_cand = dh * zs_g[t] * self._tanh_derivative(h_cands[t])\n", | |
| " self.dWh += np.dot(dh_cand, np.vstack((rs_g[t] * hs[t-1], xs[t])).T)\n", | |
| " self.dbh += dh_cand\n", | |
| "\n", | |
| " dr = np.dot(self.Wh[:, :self.H].T, dh_cand) * hs[t-1] * self._sigmoid_derivative(rs_g[t])\n", | |
| " self.dWr += np.dot(dr, concat_inputs[t].T)\n", | |
| " self.dbr += dr\n", | |
| "\n", | |
| " # Gradient for Previous Hidden State (dh_next) at time t-1\n", | |
| " # a. From Linear Combination: dh_next += dh * (1 - zs_g[t])\n", | |
| " d_from_linear = dh * (1 - zs_g[t])\n", | |
| " # b. From Candidate Memory: dh_next += (W_h[:, :H].T * dh_cand) * rs_g[t]\n", | |
| " d_from_cand = np.dot(self.Wh[:, :self.H].T, dh_cand) * rs_g[t]\n", | |
| " # c. From Update Gate: dh_next += np.dot(W_z[:, :H].T * dz)\n", | |
| " d_from_update = np.dot(self.Wz[:, :self.H].T, dz)\n", | |
| " # d. From Reset Gate: dh_next += np.dot(W_r[:, :H].T * dr)\n", | |
| " d_from_reset = np.dot(self.Wr[:, :self.H].T, dr)\n", | |
| "\n", | |
| " dh_next = d_from_linear + d_from_cand + d_from_update + d_from_reset\n", | |
| "\n", | |
| " # Gradient Clipping\n", | |
| " for grad in [self.dWr, self.dWz, self.dWh, self.dWy, self.dbr, self.dbz, self.dbh, self.dby]:\n", | |
| " np.clip(grad, -5, 5, out=grad)\n", | |
| "\n", | |
| " def update_parameters(self):\n", | |
| " self.Wr -= self.lr * self.dWr\n", | |
| " self.Wz -= self.lr * self.dWz\n", | |
| " self.Wh -= self.lr * self.dWh\n", | |
| " self.Wy -= self.lr * self.dWy\n", | |
| " self.br -= self.lr * self.dbr\n", | |
| " self.bz -= self.lr * self.dbz\n", | |
| " self.bh -= self.lr * self.dbh\n", | |
| " self.by -= self.lr * self.dby\n", | |
| "\n", | |
| " def sample(self, seed_idx, h_prev, length=20):\n", | |
| " x = np.zeros((self.V, 1))\n", | |
| " x[seed_idx] = 1\n", | |
| " indices = []\n", | |
| "\n", | |
| " for t in range(length):\n", | |
| " concat_input = np.vstack((h_prev, x))\n", | |
| "\n", | |
| " r = self._sigmoid(np.dot(self.Wr, concat_input) + self.br)\n", | |
| " z = self._sigmoid(np.dot(self.Wz, concat_input) + self.bz)\n", | |
| " h_cand = self._tanh(np.dot(self.Wh, np.vstack((r * h_prev, x))) + self.bh)\n", | |
| " h = (1 - z) * h_prev + z * h_cand\n", | |
| " y = self._softmax(np.dot(self.Wy, h) + self.by)\n", | |
| " idx = np.random.choice(range(self.V), p=y.ravel())\n", | |
| " indices.append(idx)\n", | |
| " x = np.zeros((self.V, 1))\n", | |
| " x[idx] = 1\n", | |
| " h_prev = h\n", | |
| " return indices\n" | |
| ], | |
| "metadata": { | |
| "id": "2c5tXzqu9xko" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 1. prepare data\n", | |
| "data = \"helloahmed\"\n", | |
| "chars = list(set(data))\n", | |
| "vocab_size = len(chars)\n", | |
| "char_to_idx = { ch:i for i,ch in enumerate(chars)}\n", | |
| "idx_to_char = { i:ch for i,ch in enumerate(chars)}\n", | |
| "\n", | |
| "print(f\"Data: {data}\")\n", | |
| "print(f\"Vocabulary: {chars}\")\n", | |
| "print(f\"Vocab Size: {vocab_size}\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "RMdNRx2R91mH", | |
| "outputId": "1b8f8c91-74c9-45ef-fdcc-048d47a03380" | |
| }, | |
| "execution_count": 18, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Data: helloahmed\n", | |
| "Vocabulary: ['h', 'e', 'o', 'd', 'l', 'a', 'm']\n", | |
| "Vocab Size: 7\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 2. create model\n", | |
| "hidden_size = 25\n", | |
| "epochs = 3000\n", | |
| "learning_rate = 0.01\n", | |
| "\n", | |
| "gru = GRU(hidden_size, vocab_size, learning_rate)" | |
| ], | |
| "metadata": { | |
| "id": "yqGicNYX939O" | |
| }, | |
| "execution_count": 19, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 3. training loop\n", | |
| "print(\"Training GRU...\")\n", | |
| "costs = []\n", | |
| "\n", | |
| "inputs = [char_to_idx[ch] for ch in data[:-1]] # all chars except last\n", | |
| "targets = [char_to_idx[ch] for ch in data[1:]] # all chars except first\n", | |
| "\n", | |
| "for epoch in range(epochs):\n", | |
| " # We reset the memory at the start of each epoch\n", | |
| " h_prev = np.zeros((hidden_size, 1))\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " y_preds, h_final, cache = gru.forward(inputs, h_prev)\n", | |
| "\n", | |
| " cost = gru.compute_cost(y_preds, targets)\n", | |
| "\n", | |
| " gru.backpropagation(targets, cache)\n", | |
| "\n", | |
| " gru.update_parameters()\n", | |
| "\n", | |
| " if epoch % 200 == 0:\n", | |
| " print(f\"Epoch {epoch}, Cost: {cost}\")\n", | |
| " costs.append(cost) # Append cost for plotting\n", | |
| "\n", | |
| "print(\"Training complete.\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "9TQ5BDfa95_C", | |
| "outputId": "3fb3042d-f031-4b8c-bb68-cf5726b514f9" | |
| }, | |
| "execution_count": 20, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Training GRU...\n", | |
| "Epoch 0, Cost: 1.9459141102092898\n", | |
| "Epoch 200, Cost: 1.8820941376017426\n", | |
| "Epoch 400, Cost: 1.7173890355162649\n", | |
| "Epoch 600, Cost: 0.8028859557757452\n", | |
| "Epoch 800, Cost: 0.3321508924395241\n", | |
| "Epoch 1000, Cost: 0.1979177453568896\n", | |
| "Epoch 1200, Cost: 0.11995815684712927\n", | |
| "Epoch 1400, Cost: 0.18499005840229943\n", | |
| "Epoch 1600, Cost: 0.18629243670532064\n", | |
| "Epoch 1800, Cost: 0.17070964217578966\n", | |
| "Epoch 2000, Cost: 0.11924944862850778\n", | |
| "Epoch 2200, Cost: 0.0873657902096159\n", | |
| "Epoch 2400, Cost: 0.05842180501814652\n", | |
| "Epoch 2600, Cost: 0.04516581113149621\n", | |
| "Epoch 2800, Cost: 0.03725497325100427\n", | |
| "Training complete.\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "plt.plot(np.squeeze(costs))\n", | |
| "plt.ylabel('Cost')\n", | |
| "plt.xlabel('Epochs (per 200)')\n", | |
| "plt.title(f\"Learning rate = {0.01}\")\n", | |
| "plt.show()" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 472 | |
| }, | |
| "id": "a3vEAjW699q3", | |
| "outputId": "2ddaa498-c8b3-4147-8bd1-fc04a024df10" | |
| }, | |
| "execution_count": 21, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<Figure size 640x480 with 1 Axes>" | |
| ], | |
| "image/png": "\n" | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Test the model (sampling)\n", | |
| "print(\"\\nSampling from the model:\")\n", | |
| "# Get the index for our seed character 'h'\n", | |
| "seed_char_idx = char_to_idx['h']\n", | |
| "\n", | |
| "h_sample = np.zeros((hidden_size, 1))\n", | |
| "\n", | |
| "generated_indices = gru.sample(seed_char_idx, h_sample, length=10)\n", | |
| "generated_text = 'h' + ''.join(idx_to_char[idx] for idx in generated_indices)\n", | |
| "\n", | |
| "print(f\"Generated text: '{generated_text}'\")\n", | |
| "\n" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "-3i1shi_9__H", | |
| "outputId": "ae0c28bc-ecfc-45bd-93cd-2ff48b785b5d" | |
| }, | |
| "execution_count": 22, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "\n", | |
| "Sampling from the model:\n", | |
| "Generated text: 'helloahmedd'\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [], | |
| "metadata": { | |
| "id": "nmu8yfiu-Ul5" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment