Created
November 18, 2025 00:09
-
-
Save AhmedCoolProjects/a0fc5bc563c2ca38dc5437c3e6a27932 to your computer and use it in GitHub Desktop.
LSTM Application.ipynb
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "authorship_tag": "ABX9TyMqNwi7Ib84ZzLhFju2We1M", | |
| "include_colab_link": true | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": { | |
| "id": "view-in-github", | |
| "colab_type": "text" | |
| }, | |
| "source": [ | |
| "<a href=\"https://colab.research.google.com/gist/AhmedCoolProjects/a0fc5bc563c2ca38dc5437c3e6a27932/lstm-application.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": { | |
| "id": "2X8dfAMXRS-N" | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import numpy as np\n", | |
| "import matplotlib.pyplot as plt" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "class LSTM:\n", | |
| " def __init__(self, hidden_size, vocab_size):\n", | |
| " self.H = hidden_size\n", | |
| " self.V = vocab_size\n", | |
| "\n", | |
| " # Initialize weights (4 sets)\n", | |
| " # f: Forget, i: Input, c: Candidate, o: Output\n", | |
| " self._W_size = (self.H, self.H + self.V) # weights shape for all gates\n", | |
| " # Forget Gate\n", | |
| " self.Wf = np.random.randn(*self._W_size) * 0.01\n", | |
| " self.bf = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " # Input Gate\n", | |
| " self.Wi = np.random.randn(*self._W_size) * 0.01\n", | |
| " self.bi = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " # Candidate Layer\n", | |
| " self.Wc = np.random.randn(*self._W_size) * 0.01\n", | |
| " self.bc = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " # Output Gate\n", | |
| " self.Wo = np.random.randn(*self._W_size) * 0.01\n", | |
| " self.bo = np.zeros((self.H, 1))\n", | |
| "\n", | |
| " # Final Output Layer\n", | |
| " self.Why = np.random.randn(self.V, self.H) * 0.01\n", | |
| " self.by = np.zeros((self.V, 1))\n", | |
| "\n", | |
| " def _sigmoid(self, z):\n", | |
| " return 1 / (1 + np.exp(-z))\n", | |
| "\n", | |
| " def _tanh(self, z):\n", | |
| " return np.tanh(z)\n", | |
| "\n", | |
| " def _softmax(self, z):\n", | |
| " e_z = np.exp(z - np.max(z))\n", | |
| " return e_z / np.sum(e_z, axis=0)\n", | |
| "\n", | |
| " def forward(self, inputs, h_prev, c_prev):\n", | |
| " '''\n", | |
| " inputs: list of input indices\n", | |
| " h_prev: previous hidden state (H x 1)\n", | |
| " c_prev: previous cell state (H x 1)\n", | |
| " '''\n", | |
| "\n", | |
| " xs, hs, cs, zs, ys = {}, {}, {}, {}, {} # store values for each time step\n", | |
| " fs, is_, cs_tilde, os = {}, {}, {}, {} # gate activations\n", | |
| " concat_inputs = {}\n", | |
| "\n", | |
| " hs[-1] = np.copy(h_prev)\n", | |
| " cs[-1] = np.copy(c_prev)\n", | |
| "\n", | |
| " total_cost = 0\n", | |
| "\n", | |
| " for t in range(len(inputs)):\n", | |
| " # 1. One-hot encode the input character\n", | |
| " xs[t] = np.zeros((self.V, 1))\n", | |
| " xs[t][inputs[t]] = 1\n", | |
| "\n", | |
| " # 2. Concatenate h_prev and x_t\n", | |
| " concat_inputs[t] = np.vstack((hs[t-1], xs[t])) # (H + V) x 1\n", | |
| "\n", | |
| " # 3. Forget Gate\n", | |
| " fs[t] = self._sigmoid(np.dot(self.Wf, concat_inputs[t]) + self.bf)\n", | |
| "\n", | |
| " # 4. Input Gate\n", | |
| " is_[t] = self._sigmoid(np.dot(self.Wi, concat_inputs[t]) + self.bi)\n", | |
| "\n", | |
| " # 5. Candidate Layer\n", | |
| " cs_tilde[t] = self._tanh(np.dot(self.Wc, concat_inputs[t]) + self.bc)\n", | |
| "\n", | |
| " # 6. Output Gate\n", | |
| " os[t] = self._sigmoid(np.dot(self.Wo, concat_inputs[t]) + self.bo)\n", | |
| "\n", | |
| " # 7. Update Cell State\n", | |
| " cs[t] = fs[t] * cs[t-1] + is_[t] * cs_tilde[t] # (H x 1)\n", | |
| "\n", | |
| " # 8. Compute Hidden State\n", | |
| " hs[t] = os[t] * self._tanh(cs[t]) # (H x 1)\n", | |
| "\n", | |
| " # 9. Compute Output\n", | |
| " zs[t] = np.dot(self.Why, hs[t]) + self.by\n", | |
| " ys[t] = self._softmax(zs[t]) # (V x 1)\n", | |
| "\n", | |
| " cache = (xs, hs, cs, fs, is_, cs_tilde, os, zs, ys, concat_inputs)\n", | |
| "\n", | |
| " return ys, hs[len(inputs)-1], cs[len(inputs)-1], cache\n", | |
| "\n", | |
| " def compute_cost(self, y_preds, targets):\n", | |
| " total_cost = 0\n", | |
| " for t in range(len(targets)):\n", | |
| " prob_of_target = y_preds[t][targets[t], 0] # [target_index, 0] gives the prob of that target and the 0 is to get the scalar from the (1,1) array\n", | |
| " total_cost += -np.log(prob_of_target + 1e-9) # add small value to avoid log(0)\n", | |
| " return total_cost / len(targets)\n", | |
| "\n", | |
| " def backpropagation(self, targets, cache):\n", | |
| " xs, hs, cs, fs, is_, cs_tilde, os, zs, ys, concat_inputs = cache\n", | |
| " # Initialize gradients\n", | |
| " self.dWf = np.zeros_like(self.Wf)\n", | |
| " self.dbf = np.zeros_like(self.bf)\n", | |
| " self.dWi = np.zeros_like(self.Wi)\n", | |
| " self.dbi = np.zeros_like(self.bi)\n", | |
| " self.dWc = np.zeros_like(self.Wc)\n", | |
| " self.dbc = np.zeros_like(self.bc)\n", | |
| " self.dWo = np.zeros_like(self.Wo)\n", | |
| " self.dbo = np.zeros_like(self.bo)\n", | |
| " self.dWhy = np.zeros_like(self.Why)\n", | |
| " self.dby = np.zeros_like(self.by)\n", | |
| "\n", | |
| " dh_next = np.zeros_like(hs[0])\n", | |
| " dc_next = np.zeros_like(cs[0])\n", | |
| "\n", | |
| " for t in reversed(range(len(targets))):\n", | |
| " # 1. Output layer\n", | |
| " dy = np.copy(ys[t])\n", | |
| " dy[targets[t]] -= 1 # y_pred - y_true\n", | |
| " self.dWhy += np.dot(dy, hs[t].T)\n", | |
| " self.dby += dy\n", | |
| "\n", | |
| " # 2. Gradient for hidden state dh_t\n", | |
| " dh = np.dot(self.Why.T, dy) + dh_next\n", | |
| "\n", | |
| " # 3. Gradient for output gate\n", | |
| " do = dh * self._tanh(cs[t]) * os[t] * (1 - os[t])\n", | |
| " self.dWo += np.dot(do, concat_inputs[t].T)\n", | |
| " self.dbo += do\n", | |
| "\n", | |
| " # 4. Gradient for cell state\n", | |
| " dc = dh * os[t] * (1 - self._tanh(cs[t])**2) + dc_next\n", | |
| "\n", | |
| " # 5. Gradient for forget gate\n", | |
| " df = dc * cs[t-1] * fs[t] * (1 - fs[t])\n", | |
| " self.dWf += np.dot(df, concat_inputs[t].T)\n", | |
| " self.dbf += df\n", | |
| "\n", | |
| " # 6. Gradient for input gate\n", | |
| " di = dc * cs_tilde[t] * is_[t] * (1 - is_[t])\n", | |
| " self.dWi += np.dot(di, concat_inputs[t].T)\n", | |
| " self.dbi += di\n", | |
| "\n", | |
| " # 7. Gradient for candidate layer\n", | |
| " dc_tilde = dc * is_[t] * (1 - cs_tilde[t]**2)\n", | |
| " self.dWc += np.dot(dc_tilde, concat_inputs[t].T)\n", | |
| " self.dbc += dc_tilde\n", | |
| "\n", | |
| " # 8. Gradient for concatenated input\n", | |
| " dconcat = (np.dot(self.Wf.T, df) +\n", | |
| " np.dot(self.Wi.T, di) +\n", | |
| " np.dot(self.Wc.T, dc_tilde) +\n", | |
| " np.dot(self.Wo.T, do))\n", | |
| " dh_next = dconcat[:self.H, :] # Gradient for h_(t-1)\n", | |
| " dc_next = dc * fs[t] # Gradient for C_(t-1)\n", | |
| "\n", | |
| " # Gradient clipping to prevent exploding gradients\n", | |
| " for grad in [self.dWf, self.dbf, self.dWi, self.dbi,\n", | |
| " self.dWc, self.dbc, self.dWo, self.dbo,\n", | |
| " self.dWhy, self.dby]:\n", | |
| " np.clip(grad, -5, 5, out=grad)\n", | |
| "\n", | |
| " def update_parameters(self, learning_rate=0.01):\n", | |
| " self.Wf -= learning_rate * self.dWf\n", | |
| " self.bf -= learning_rate * self.dbf\n", | |
| " self.Wi -= learning_rate * self.dWi\n", | |
| " self.bi -= learning_rate * self.dbi\n", | |
| " self.Wc -= learning_rate * self.dWc\n", | |
| " self.bc -= learning_rate * self.dbc\n", | |
| " self.Wo -= learning_rate * self.dWo\n", | |
| " self.bo -= learning_rate * self.dbo\n", | |
| " self.Why -= learning_rate * self.dWhy\n", | |
| " self.by -= learning_rate * self.dby\n", | |
| "\n", | |
| " def sample(self, seed_idx, h_prev, c_prev, length=20):\n", | |
| " x = np.zeros((self.V, 1))\n", | |
| " x[seed_idx] = 1\n", | |
| " indices = []\n", | |
| "\n", | |
| " for t in range(length):\n", | |
| " concat_input = np.vstack((h_prev, x))\n", | |
| "\n", | |
| " # Gates\n", | |
| " f = self._sigmoid(np.dot(self.Wf, concat_input) + self.bf)\n", | |
| " i = self._sigmoid(np.dot(self.Wi, concat_input) + self.bi)\n", | |
| " c_tilde = self._tanh(np.dot(self.Wc, concat_input) + self.bc)\n", | |
| " o = self._sigmoid(np.dot(self.Wo, concat_input) + self.bo)\n", | |
| "\n", | |
| " # Update cell state\n", | |
| " c = f * c_prev + i * c_tilde\n", | |
| "\n", | |
| " # Compute hidden state\n", | |
| " h = o * self._tanh(c)\n", | |
| "\n", | |
| " z = np.dot(self.Why, h) + self.by\n", | |
| " y = self._softmax(z)\n", | |
| "\n", | |
| " # Sample from the probability distribution\n", | |
| " idx = np.random.choice(range(self.V), p=y.ravel()) # this gives us a scalar index meaning the index of the predicted character. the .ravel() is to convert the (V,1) shape to (V,) shape which is required by np.random.choice, this .choice samples according to the probabilities in y\n", | |
| " x = np.zeros((self.V, 1))\n", | |
| " x[idx] = 1\n", | |
| " indices.append(idx)\n", | |
| " h_prev = h\n", | |
| " c_prev = c\n", | |
| "\n", | |
| " return indices" | |
| ], | |
| "metadata": { | |
| "id": "KUqVTeIUR1Hr" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 1. prepare data\n", | |
| "data = \"helloahmed\"\n", | |
| "chars = list(set(data))\n", | |
| "vocab_size = len(chars)\n", | |
| "char_to_idx = { ch:i for i,ch in enumerate(chars)}\n", | |
| "idx_to_char = { i:ch for i,ch in enumerate(chars)}\n", | |
| "\n", | |
| "print(f\"Data: {data}\")\n", | |
| "print(f\"Vocabulary: {chars}\")\n", | |
| "print(f\"Vocab Size: {vocab_size}\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "W1FFDodJR351", | |
| "outputId": "a65af114-3d62-47a1-ccd8-20234e1bb656" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Data: helloahmed\n", | |
| "Vocabulary: ['l', 'a', 'e', 'd', 'o', 'h', 'm']\n", | |
| "Vocab Size: 7\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 2. create model\n", | |
| "hidden_size = 25\n", | |
| "epochs = 3000\n", | |
| "\n", | |
| "lstm = LSTM(hidden_size, vocab_size)" | |
| ], | |
| "metadata": { | |
| "id": "3WtLKbh8R8GZ" | |
| }, | |
| "execution_count": 5, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# 3. training loop\n", | |
| "print(\"Training LSTM...\")\n", | |
| "costs = []\n", | |
| "\n", | |
| "inputs = [char_to_idx[ch] for ch in data[:-1]] # all chars except last\n", | |
| "targets = [char_to_idx[ch] for ch in data[1:]] # all chars except first\n", | |
| "\n", | |
| "for epoch in range(epochs):\n", | |
| " # We reset the memory at the start of each epoch\n", | |
| " h_prev = np.zeros((hidden_size, 1))\n", | |
| " c_prev = np.zeros((hidden_size, 1))\n", | |
| "\n", | |
| " # Forward pass\n", | |
| " y_preds, h_final, c_final, cache = lstm.forward(inputs, h_prev, c_prev)\n", | |
| "\n", | |
| " cost = lstm.compute_cost(y_preds, targets)\n", | |
| "\n", | |
| " lstm.backpropagation(targets, cache)\n", | |
| "\n", | |
| " lstm.update_parameters()\n", | |
| "\n", | |
| " if epoch % 200 == 0:\n", | |
| " print(f\"Epoch {epoch}, Cost: {cost}\")\n", | |
| " costs.append(cost) # Append cost for plotting\n", | |
| "\n", | |
| "print(\"Training complete.\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "U3DNz_TtSEBF", | |
| "outputId": "21f2a66f-3142-466b-cd74-864fc3b99e0d" | |
| }, | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "Training LSTM...\n", | |
| "Epoch 0, Cost: 1.9459308553119037\n", | |
| "Epoch 200, Cost: 1.888694596930537\n", | |
| "Epoch 400, Cost: 1.885747141311822\n", | |
| "Epoch 600, Cost: 1.869165401158513\n", | |
| "Epoch 800, Cost: 1.7772037343293763\n", | |
| "Epoch 1000, Cost: 1.4138688219022517\n", | |
| "Epoch 1200, Cost: 0.7320503793476106\n", | |
| "Epoch 1400, Cost: 0.3883764333872252\n", | |
| "Epoch 1600, Cost: 0.21691620735344425\n", | |
| "Epoch 1800, Cost: 0.12986328517492585\n", | |
| "Epoch 2000, Cost: 0.08541260344701034\n", | |
| "Epoch 2200, Cost: 0.06066953628544073\n", | |
| "Epoch 2400, Cost: 0.04568582684125432\n", | |
| "Epoch 2600, Cost: 0.035955796413271686\n", | |
| "Epoch 2800, Cost: 0.029270449962368588\n", | |
| "Training complete.\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "plt.plot(np.squeeze(costs))\n", | |
| "plt.ylabel('Cost')\n", | |
| "plt.xlabel('Epochs (per 200)')\n", | |
| "plt.title(f\"Learning rate = {0.01}\")\n", | |
| "plt.show()" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/", | |
| "height": 472 | |
| }, | |
| "id": "hiBCQMBKSQ0I", | |
| "outputId": "78d5db12-5c69-4884-bc82-25d38017481f" | |
| }, | |
| "execution_count": 9, | |
| "outputs": [ | |
| { | |
| "output_type": "display_data", | |
| "data": { | |
| "text/plain": [ | |
| "<Figure size 640x480 with 1 Axes>" | |
| ], | |
| "image/png": "\n" | |
| }, | |
| "metadata": {} | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Test the model (sampling)\n", | |
| "print(\"\\nSampling from the model:\")\n", | |
| "# Get the index for our seed character 'h'\n", | |
| "seed_char_idx = char_to_idx['h']\n", | |
| "\n", | |
| "h_sample = np.zeros((hidden_size, 1))\n", | |
| "c_sample = np.zeros((hidden_size, 1))\n", | |
| "\n", | |
| "generated_indices = lstm.sample(seed_char_idx, h_sample, c_sample, length=10)\n", | |
| "generated_text = 'h' + ''.join(idx_to_char[idx] for idx in generated_indices)\n", | |
| "\n", | |
| "print(f\"Generated text: '{generated_text}'\")" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "44jjYmw5Snwz", | |
| "outputId": "d9cecd0a-3012-4bae-8e35-6665dd112154" | |
| }, | |
| "execution_count": 10, | |
| "outputs": [ | |
| { | |
| "output_type": "stream", | |
| "name": "stdout", | |
| "text": [ | |
| "\n", | |
| "Sampling from the model:\n", | |
| "Generated text: 'helloahmedl'\n" | |
| ] | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [], | |
| "metadata": { | |
| "id": "qgZo2hekSxAH" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment