Created
May 20, 2025 17:38
-
-
Save romanmichaelpaolucci/478f3ed921188dba54e644f649266c07 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [] | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "language_info": { | |
| "name": "python" | |
| } | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Generating a Data Matrix" | |
| ], | |
| "metadata": { | |
| "id": "Mr6rWz6nv5Af" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "import numpy as np\n", | |
| "\n", | |
| "# Set the random seed for reproducibility\n", | |
| "np.random.seed(42)\n", | |
| "\n", | |
| "# Define the desired number of features (columns)\n", | |
| "n_features = 3\n", | |
| "\n", | |
| "# Define the desired number of samples (rows)\n", | |
| "n_samples = 100\n", | |
| "\n", | |
| "# Generate a random positive semi-definite covariance matrix\n", | |
| "# Start with a random matrix\n", | |
| "random_matrix = np.random.rand(n_features, n_features)\n", | |
| "\n", | |
| "# Make it symmetric\n", | |
| "cov_matrix = np.dot(random_matrix, random_matrix.T)\n", | |
| "\n", | |
| "# Ensure it's positive semi-definite (by adding a small diagonal value if needed, though the dot product already helps)\n", | |
| "cov_matrix += np.eye(n_features) * 1e-6\n", | |
| "\n", | |
| "# Generate data points from a multivariate normal distribution with the specified covariance matrix\n", | |
| "mean_vector = np.zeros(n_features) # Assuming a mean of zero for simplicity\n", | |
| "\n", | |
| "X = np.random.multivariate_normal(mean_vector, cov_matrix, size=n_samples)\n", | |
| "X.shape" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "hrhNukDev67H", | |
| "outputId": "d08844c5-1d47-4bb3-8771-4481f22ba275" | |
| }, | |
| "execution_count": 2, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "(100, 3)" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 2 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### $X^T X$" | |
| ], | |
| "metadata": { | |
| "id": "AX3ua07NwJMK" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "X.T @ X" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "Doanox_GwLx7", | |
| "outputId": "46d9cedb-9962-468c-a71e-1b04b3497567" | |
| }, | |
| "execution_count": 3, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "array([[146.34187596, 52.18547078, 115.66137461],\n", | |
| " [ 52.18547078, 42.30274569, 28.7360541 ],\n", | |
| " [115.66137461, 28.7360541 , 98.17212993]])" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 3 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### Covariance Matrix from Numpy ($\\Sigma$)\n", | |
| "\n", | |
| "Notice this is not equal to the previous matrix and it is not proportional to it either!" | |
| ], | |
| "metadata": { | |
| "id": "AdOVy2lOwNqW" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "np.cov(X.T)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "9X2AkTAWwQ1e", | |
| "outputId": "f50fc563-207b-40d3-9595-8e37fa4e4e56" | |
| }, | |
| "execution_count": 4, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "array([[1.47788725, 0.52838454, 1.16726139],\n", | |
| " [0.52838454, 0.42224816, 0.29441931],\n", | |
| " [1.16726139, 0.29441931, 0.98821875]])" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 4 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### $\\tilde{X} = (X - \\bar{X})$" | |
| ], | |
| "metadata": { | |
| "id": "NqdadYHawVy7" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "X_tilde = X - np.mean(X, axis=0)" | |
| ], | |
| "metadata": { | |
| "id": "CK7blcejwccm" | |
| }, | |
| "execution_count": 6, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### $\\tilde{X}^T\\tilde{X} \\propto \\Sigma$" | |
| ], | |
| "metadata": { | |
| "id": "15ErQgvlwfHR" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "X_tilde.T @ X_tilde" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "vNSrg6wBwlRu", | |
| "outputId": "fd907d94-9fad-49fe-e5b5-fc8af4756031" | |
| }, | |
| "execution_count": 7, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "array([[146.31083755, 52.31006904, 115.55887737],\n", | |
| " [ 52.31006904, 41.80256773, 29.14751134],\n", | |
| " [115.55887737, 29.14751134, 97.83365627]])" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 7 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "source": [ | |
| "### $\\frac{\\tilde{X}^T\\tilde{X}}{n-1} = \\Sigma$" | |
| ], | |
| "metadata": { | |
| "id": "yJXx4LxqwpB6" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "X_tilde.T @ X_tilde / (n_samples - 1)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "pNEZoQcNwosT", | |
| "outputId": "f105f135-5579-4f0d-d749-7af2c1398c6b" | |
| }, | |
| "execution_count": 8, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "array([[1.47788725, 0.52838454, 1.16726139],\n", | |
| " [0.52838454, 0.42224816, 0.29441931],\n", | |
| " [1.16726139, 0.29441931, 0.98821875]])" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 8 | |
| } | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "np.cov(X.T)" | |
| ], | |
| "metadata": { | |
| "colab": { | |
| "base_uri": "https://localhost:8080/" | |
| }, | |
| "id": "GL8eD0LswyXV", | |
| "outputId": "42455ad9-95ed-4843-9eca-2f23ece16db2" | |
| }, | |
| "execution_count": 9, | |
| "outputs": [ | |
| { | |
| "output_type": "execute_result", | |
| "data": { | |
| "text/plain": [ | |
| "array([[1.47788725, 0.52838454, 1.16726139],\n", | |
| " [0.52838454, 0.42224816, 0.29441931],\n", | |
| " [1.16726139, 0.29441931, 0.98821875]])" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "execution_count": 9 | |
| } | |
| ] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment