Created
August 5, 2024 12:09
-
-
Save mikk-c/163b515067a1da624c53adb28fdd16f2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "690e3648-8352-4576-a76a-885edac5f8ef", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import networkx as nx\n", | |
| "from gensim.models import Word2Vec\n", | |
| "from sklearn.manifold import TSNE\n", | |
| "from sklearn.cluster import KMeans\n", | |
| "from sklearn.metrics import normalized_mutual_info_score" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "0405b4a3-e629-483c-b912-15954165156a", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "H = nx.read_edgelist(\"1/data.txt\", create_using = nx.Graph(), delimiter = \"\\t\", nodetype = int)\n", | |
| "G = nx.Graph()\n", | |
| "G.add_nodes_from(sorted(H.nodes))\n", | |
| "G.add_edges_from(H.edges)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "8623084e-dba5-49db-aa23-3582ec362681", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "array([2, 2, 2, 3, 1, 2, 0, 2, 3, 0, 2, 1, 1, 1, 2, 2, 1, 2, 2, 3, 1, 1,\n", | |
| " 0, 1, 1, 2, 3, 1, 2, 1, 2, 0, 2, 2, 0, 2, 2, 0, 3, 2, 1, 0, 0, 1,\n", | |
| " 3, 0, 0, 1, 2, 1, 3, 0, 3, 2, 3, 0, 3, 3, 0, 0, 3, 1, 3, 3, 2, 1,\n", | |
| " 1, 0, 3, 2, 3, 3, 3, 0, 1, 2, 2, 2, 3, 0, 0, 3, 2, 3, 1, 0, 1, 1,\n", | |
| " 0, 0, 0, 1, 2, 3, 0, 0, 2, 3, 2, 1, 2, 1, 3, 1, 2, 1, 1, 1, 0, 0,\n", | |
| " 2, 1, 1, 3, 2, 0, 3, 3, 3, 0, 2, 0, 2, 3, 2, 0, 1, 3, 3, 3, 2, 2,\n", | |
| " 1, 3, 3, 1, 1, 1, 1, 2, 1, 2, 3, 2, 2, 3, 1, 1, 0, 3, 0, 2, 0, 2,\n", | |
| " 3, 1, 1, 0, 3, 1, 2, 0, 0, 1, 0, 2, 2, 1, 0, 0, 2, 0, 0, 2, 0, 2,\n", | |
| " 2, 1, 1, 2, 1, 2, 1, 0, 0, 2, 2, 2, 0, 2, 0, 1, 0, 3, 3, 1, 0, 3,\n", | |
| " 2, 1, 0, 3, 1, 2, 3, 1, 2, 2, 3, 0, 2, 1, 1, 2, 2, 1, 0, 0, 2, 0,\n", | |
| " 2, 1, 3, 2, 1, 1, 1, 3, 1, 3, 3, 1, 2, 1, 2, 1, 1, 0, 3, 1, 0, 1,\n", | |
| " 1, 2, 0, 0, 1, 2, 2, 2, 3, 2, 3, 0, 3, 3, 3, 3, 3, 0, 3, 2, 1, 1,\n", | |
| " 0, 0, 1, 1, 1, 3, 3, 0, 3, 1, 1, 1, 2, 1, 1, 1, 2, 0, 2, 1, 3, 0,\n", | |
| " 0, 0, 0, 0, 3, 3, 3, 1, 3, 0, 2, 3, 0, 3, 1, 2, 0, 0, 1, 2, 2, 3,\n", | |
| " 0, 3, 1, 3, 1, 0, 3, 3, 1, 0, 1, 3, 0, 2, 2, 2, 1, 1, 1, 3, 3, 2,\n", | |
| " 1, 1, 2, 1, 0, 3, 1, 2, 1, 3, 1, 1, 3, 1, 2, 1, 1, 1, 3, 0, 2, 3,\n", | |
| " 0, 2, 0, 2, 1, 1, 2, 1, 3, 2, 0, 3, 1, 0, 3, 2, 1, 2, 2, 2, 3, 2,\n", | |
| " 1, 1, 0, 0, 3, 0, 2, 3, 0, 1, 2, 1, 2, 2, 2, 1, 2, 0, 3, 2, 0, 2,\n", | |
| " 1, 3, 0, 3, 1, 0, 1, 1, 0, 3, 2, 3, 3, 2, 3, 2, 0, 3, 3, 1, 0, 0,\n", | |
| " 3, 1, 1, 0, 3, 2, 3, 2, 3, 3, 0, 1, 3, 2, 0, 1, 1, 0, 3, 3, 1, 0,\n", | |
| " 3, 3, 2, 3, 1, 1, 2, 0, 0, 2, 2, 3, 2, 1, 0, 0, 3, 0, 1, 2, 0, 0,\n", | |
| " 3, 1, 3, 2, 3, 3, 0, 1, 0, 2, 1, 1, 1, 3, 1, 2, 2, 3, 3, 3, 0, 3,\n", | |
| " 1, 1, 2, 3, 2, 3, 1, 0, 1, 0, 1, 0, 3, 3, 3, 0, 3, 1, 2, 1, 3, 0,\n", | |
| " 2, 3, 2, 3, 1, 2, 1, 0, 0, 2, 2, 3, 3, 0, 2, 2, 1, 2, 1, 0, 2, 3,\n", | |
| " 1, 0, 1, 1, 3, 2, 2, 1, 2, 1, 0, 3, 1, 2, 0, 1, 3, 0, 3, 0, 0, 2,\n", | |
| " 0, 3, 3, 1, 0, 0, 3, 2, 3, 1, 3, 3, 1, 1, 2, 1, 0, 3, 1, 1, 2, 2,\n", | |
| " 2, 1, 3, 2, 3, 0, 2, 1, 3, 0, 1, 1, 2, 0, 3, 3, 3, 0, 3, 0, 3, 2,\n", | |
| " 1, 2, 2, 2, 1, 0, 1, 0, 3, 0, 1, 0, 2, 2, 3, 3, 0, 1, 1, 1, 1, 2,\n", | |
| " 3, 2, 0, 3, 1, 1, 3, 3, 2, 1, 2, 2, 2, 3, 0, 3, 2, 1, 1, 0, 3, 3,\n", | |
| " 0, 0, 1, 0, 1, 1, 1, 0, 0, 2, 1, 3, 3, 1, 1, 3, 1, 0, 3, 2, 0, 1,\n", | |
| " 2, 3, 1, 0, 3, 3, 3, 1, 2, 0, 3, 1, 1, 1, 2, 2, 1, 2, 1, 0, 3, 0,\n", | |
| " 1, 2, 3, 3, 3, 0, 2, 2, 0, 3, 1, 1, 2, 1, 1, 3, 3, 1, 1, 2, 3, 3,\n", | |
| " 2, 2, 2, 0, 2, 3, 2, 1, 2, 1, 0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 0, 2,\n", | |
| " 1, 2, 3, 2, 0, 1, 1, 0, 3, 0, 1, 2, 1, 0, 2, 0, 3, 3, 3, 0, 2, 2,\n", | |
| " 2, 1, 1, 3, 1, 1, 2, 3, 1, 2, 1, 1, 3, 3, 2, 1, 2, 0, 3, 0, 1, 0,\n", | |
| " 1, 2, 0, 2, 1, 1, 2, 0, 0, 2, 3, 0, 1, 3, 3, 0, 1, 2, 1, 1, 0, 1,\n", | |
| " 1, 0, 1, 1, 2, 1, 0, 3, 1, 0, 0, 3, 2, 0, 3, 1, 3, 0, 3, 1, 0, 3,\n", | |
| " 0, 2, 1, 2, 2, 1, 2, 0, 2, 3, 2, 0, 1, 0, 1, 2, 3, 1, 3, 0, 0, 1,\n", | |
| " 3, 1, 0, 1, 2, 1, 0, 2, 1, 1, 3, 2, 1, 1, 0, 2, 0, 1, 3, 2, 0, 0,\n", | |
| " 1, 2, 1, 1, 2, 0, 1, 1, 0, 3, 0, 3, 0, 1, 1, 1, 2, 3, 1, 3, 3, 0,\n", | |
| " 1, 1, 1, 1, 1, 2, 1, 1, 0, 2, 2, 1, 0, 2, 2, 1, 2, 1, 1, 3, 3, 2,\n", | |
| " 2, 3, 1, 3, 3, 1, 1, 2, 3, 0, 1, 3, 3, 1, 2, 2, 0, 3, 1, 1, 3, 1,\n", | |
| " 0, 0, 1, 1, 3, 0, 0, 1, 1, 2, 2, 0, 2, 3, 2, 0, 3, 2, 2, 1, 0, 2,\n", | |
| " 3, 0, 0, 0, 1, 3, 1, 2, 0, 2, 2, 2, 1, 2, 1, 2, 1, 2, 3, 0, 0, 0,\n", | |
| " 3, 1, 1, 2, 1, 1, 2, 2, 1, 0, 2, 3, 1, 3, 0, 3, 0, 2, 1, 1, 3, 1,\n", | |
| " 3, 1, 1, 1, 0, 3, 3, 3, 2, 1], dtype=int32)" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "rndwalks = list(nx.generate_random_paths(G, 10000, path_length = 6))\n", | |
| "model = Word2Vec(sentences = rndwalks, vector_size = 32, min_count = 1, workers = 8)\n", | |
| "\n", | |
| "nodemap = [None] * len(G.nodes)\n", | |
| "for k in model.wv.key_to_index:\n", | |
| " nodemap[k] = model.wv.key_to_index[k]\n", | |
| "\n", | |
| "reducer = TSNE(n_components = 2, init = \"pca\")\n", | |
| "embeddings = reducer.fit_transform(model.wv.vectors[nodemap])\n", | |
| "reducer = KMeans(n_clusters = 4)\n", | |
| "clusters = reducer.fit(embeddings).labels_\n", | |
| "\n", | |
| "clusters" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "f00c1779-b5c2-4f40-8ca7-dbf9ed52464c", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "nodes = list(G.nodes)\n", | |
| "\n", | |
| "ground_truth = {}\n", | |
| "with open(\"1/nodes.txt\", 'r') as f:\n", | |
| " for line in f:\n", | |
| " fields = line.strip().split('\\t')\n", | |
| " ground_truth[int(fields[0])] = int(fields[1])\n", | |
| "\n", | |
| "ground_truth = [ground_truth[i] for i in nodes]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "f4cc816f-e8fb-4186-b82e-e20edcb23d2c", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[0, 0, 0, 1, 0, 0, 2, 0, 1, 2]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "lp = list(nx.algorithms.community.asyn_lpa_communities(G))\n", | |
| "lp = {n: c for c in range(len(lp)) for n in lp[c]}\n", | |
| "lp_array = []\n", | |
| "for n in G.nodes:\n", | |
| " lp_array.append(lp[n])\n", | |
| "\n", | |
| "lp_array[:10]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "36cda6a2-2c92-497a-b237-59b47224fae0", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "0.9439976142363943\n", | |
| "0.8323939993712498\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print(normalized_mutual_info_score(clusters, ground_truth))\n", | |
| "print(normalized_mutual_info_score(lp_array, ground_truth))" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3 (ipykernel)", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment