romanmichaelpaolucci · May 20, 2025 17:38
diff --git a/matrix_product_equals_covariance_matrix.ipynb b/matrix_product_equals_covariance_matrix.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "### Generating a Data Matrix"
      ],
      "metadata": {
        "id": "Mr6rWz6nv5Af"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "\n",
        "# Set the random seed for reproducibility\n",
        "np.random.seed(42)\n",
        "\n",
        "# Define the desired number of features (columns)\n",
        "n_features = 3\n",
        "\n",
        "# Define the desired number of samples (rows)\n",
        "n_samples = 100\n",
        "\n",
        "# Generate a random positive semi-definite covariance matrix\n",
        "# Start with a random matrix\n",
        "random_matrix = np.random.rand(n_features, n_features)\n",
        "\n",
        "# Make it symmetric\n",
        "cov_matrix = np.dot(random_matrix, random_matrix.T)\n",
        "\n",
        "# Ensure it's positive semi-definite (by adding a small diagonal value if needed, though the dot product already helps)\n",
        "cov_matrix += np.eye(n_features) * 1e-6\n",
        "\n",
        "# Generate data points from a multivariate normal distribution with the specified covariance matrix\n",
        "mean_vector = np.zeros(n_features) # Assuming a mean of zero for simplicity\n",
        "\n",
        "X = np.random.multivariate_normal(mean_vector, cov_matrix, size=n_samples)\n",
        "X.shape"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "hrhNukDev67H",
        "outputId": "d08844c5-1d47-4bb3-8771-4481f22ba275"
      },
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(100, 3)"
            ]
          },
          "metadata": {},
          "execution_count": 2
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### $X^T X$"
      ],
      "metadata": {
        "id": "AX3ua07NwJMK"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X.T @ X"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Doanox_GwLx7",
        "outputId": "46d9cedb-9962-468c-a71e-1b04b3497567"
      },
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[146.34187596,  52.18547078, 115.66137461],\n",
              "       [ 52.18547078,  42.30274569,  28.7360541 ],\n",
              "       [115.66137461,  28.7360541 ,  98.17212993]])"
            ]
          },
          "metadata": {},
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### Covariance Matrix from Numpy ($\\Sigma$)\n",
        "\n",
        "Notice this is not equal to the previous matrix and it is not proportional to it either!"
      ],
      "metadata": {
        "id": "AdOVy2lOwNqW"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "np.cov(X.T)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "9X2AkTAWwQ1e",
        "outputId": "f50fc563-207b-40d3-9595-8e37fa4e4e56"
      },
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[1.47788725, 0.52838454, 1.16726139],\n",
              "       [0.52838454, 0.42224816, 0.29441931],\n",
              "       [1.16726139, 0.29441931, 0.98821875]])"
            ]
          },
          "metadata": {},
          "execution_count": 4
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### $\\tilde{X} = (X - \\bar{X})$"
      ],
      "metadata": {
        "id": "NqdadYHawVy7"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X_tilde = X - np.mean(X, axis=0)"
      ],
      "metadata": {
        "id": "CK7blcejwccm"
      },
      "execution_count": 6,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "### $\\tilde{X}^T\\tilde{X} \\propto \\Sigma$"
      ],
      "metadata": {
        "id": "15ErQgvlwfHR"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X_tilde.T @ X_tilde"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "vNSrg6wBwlRu",
        "outputId": "fd907d94-9fad-49fe-e5b5-fc8af4756031"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[146.31083755,  52.31006904, 115.55887737],\n",
              "       [ 52.31006904,  41.80256773,  29.14751134],\n",
              "       [115.55887737,  29.14751134,  97.83365627]])"
            ]
          },
          "metadata": {},
          "execution_count": 7
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "### $\\frac{\\tilde{X}^T\\tilde{X}}{n-1} = \\Sigma$"
      ],
      "metadata": {
        "id": "yJXx4LxqwpB6"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X_tilde.T @ X_tilde / (n_samples - 1)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "pNEZoQcNwosT",
        "outputId": "f105f135-5579-4f0d-d749-7af2c1398c6b"
      },
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[1.47788725, 0.52838454, 1.16726139],\n",
              "       [0.52838454, 0.42224816, 0.29441931],\n",
              "       [1.16726139, 0.29441931, 0.98821875]])"
            ]
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "np.cov(X.T)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "GL8eD0LswyXV",
        "outputId": "42455ad9-95ed-4843-9eca-2f23ece16db2"
      },
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "array([[1.47788725, 0.52838454, 1.16726139],\n",
              "       [0.52838454, 0.42224816, 0.29441931],\n",
              "       [1.16726139, 0.29441931, 0.98821875]])"
            ]
          },
          "metadata": {},
          "execution_count": 9
        }
      ]
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": []
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"source": [
	"### Generating a Data Matrix"
	],
	"metadata": {
	"id": "Mr6rWz6nv5Af"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"import numpy as np\n",
	"\n",
	"# Set the random seed for reproducibility\n",
	"np.random.seed(42)\n",
	"\n",
	"# Define the desired number of features (columns)\n",
	"n_features = 3\n",
	"\n",
	"# Define the desired number of samples (rows)\n",
	"n_samples = 100\n",
	"\n",
	"# Generate a random positive semi-definite covariance matrix\n",
	"# Start with a random matrix\n",
	"random_matrix = np.random.rand(n_features, n_features)\n",
	"\n",
	"# Make it symmetric\n",
	"cov_matrix = np.dot(random_matrix, random_matrix.T)\n",
	"\n",
	"# Ensure it's positive semi-definite (by adding a small diagonal value if needed, though the dot product already helps)\n",
	"cov_matrix += np.eye(n_features) * 1e-6\n",
	"\n",
	"# Generate data points from a multivariate normal distribution with the specified covariance matrix\n",
	"mean_vector = np.zeros(n_features) # Assuming a mean of zero for simplicity\n",
	"\n",
	"X = np.random.multivariate_normal(mean_vector, cov_matrix, size=n_samples)\n",
	"X.shape"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "hrhNukDev67H",
	"outputId": "d08844c5-1d47-4bb3-8771-4481f22ba275"
	},
	"execution_count": 2,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"(100, 3)"
	]
	},
	"metadata": {},
	"execution_count": 2
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"### $X^T X$"
	],
	"metadata": {
	"id": "AX3ua07NwJMK"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"X.T @ X"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "Doanox_GwLx7",
	"outputId": "46d9cedb-9962-468c-a71e-1b04b3497567"
	},
	"execution_count": 3,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([[146.34187596, 52.18547078, 115.66137461],\n",
	" [ 52.18547078, 42.30274569, 28.7360541 ],\n",
	" [115.66137461, 28.7360541 , 98.17212993]])"
	]
	},
	"metadata": {},
	"execution_count": 3
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"### Covariance Matrix from Numpy ($\\Sigma$)\n",
	"\n",
	"Notice this is not equal to the previous matrix and it is not proportional to it either!"
	],
	"metadata": {
	"id": "AdOVy2lOwNqW"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"np.cov(X.T)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "9X2AkTAWwQ1e",
	"outputId": "f50fc563-207b-40d3-9595-8e37fa4e4e56"
	},
	"execution_count": 4,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([[1.47788725, 0.52838454, 1.16726139],\n",
	" [0.52838454, 0.42224816, 0.29441931],\n",
	" [1.16726139, 0.29441931, 0.98821875]])"
	]
	},
	"metadata": {},
	"execution_count": 4
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"### $\\tilde{X} = (X - \\bar{X})$"
	],
	"metadata": {
	"id": "NqdadYHawVy7"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"X_tilde = X - np.mean(X, axis=0)"
	],
	"metadata": {
	"id": "CK7blcejwccm"
	},
	"execution_count": 6,
	"outputs": []
	},
	{
	"cell_type": "markdown",
	"source": [
	"### $\\tilde{X}^T\\tilde{X} \\propto \\Sigma$"
	],
	"metadata": {
	"id": "15ErQgvlwfHR"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"X_tilde.T @ X_tilde"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "vNSrg6wBwlRu",
	"outputId": "fd907d94-9fad-49fe-e5b5-fc8af4756031"
	},
	"execution_count": 7,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([[146.31083755, 52.31006904, 115.55887737],\n",
	" [ 52.31006904, 41.80256773, 29.14751134],\n",
	" [115.55887737, 29.14751134, 97.83365627]])"
	]
	},
	"metadata": {},
	"execution_count": 7
	}
	]
	},
	{
	"cell_type": "markdown",
	"source": [
	"### $\\frac{\\tilde{X}^T\\tilde{X}}{n-1} = \\Sigma$"
	],
	"metadata": {
	"id": "yJXx4LxqwpB6"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"X_tilde.T @ X_tilde / (n_samples - 1)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "pNEZoQcNwosT",
	"outputId": "f105f135-5579-4f0d-d749-7af2c1398c6b"
	},
	"execution_count": 8,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([[1.47788725, 0.52838454, 1.16726139],\n",
	" [0.52838454, 0.42224816, 0.29441931],\n",
	" [1.16726139, 0.29441931, 0.98821875]])"
	]
	},
	"metadata": {},
	"execution_count": 8
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"np.cov(X.T)"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "GL8eD0LswyXV",
	"outputId": "42455ad9-95ed-4843-9eca-2f23ece16db2"
	},
	"execution_count": 9,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"array([[1.47788725, 0.52838454, 1.16726139],\n",
	" [0.52838454, 0.42224816, 0.29441931],\n",
	" [1.16726139, 0.29441931, 0.98821875]])"
	]
	},
	"metadata": {},
	"execution_count": 9
	}
	]
	}
	]
	}
No results found