Skip to content

Instantly share code, notes, and snippets.

@mikk-c
Created August 5, 2024 12:08
Show Gist options
  • Select an option

  • Save mikk-c/533380efd8ca05149f3bebf6e5c5f6f0 to your computer and use it in GitHub Desktop.

Select an option

Save mikk-c/533380efd8ca05149f3bebf6e5c5f6f0 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "0351f420-48f4-4b80-9187-eb4b8bc149b4",
"metadata": {},
"outputs": [],
"source": [
"import networkx as nx\n",
"from gensim.models import Word2Vec\n",
"from sklearn.manifold import TSNE\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.metrics import normalized_mutual_info_score"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "16f3caf7-48bc-406f-8833-17a24debbbee",
"metadata": {},
"outputs": [],
"source": [
"H = nx.read_edgelist(\"1/data.txt\", create_using = nx.Graph(), delimiter = \"\\t\", nodetype = int)\n",
"G = nx.Graph()\n",
"G.add_nodes_from(sorted(H.nodes))\n",
"G.add_edges_from(H.edges)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "715e1fed-4e9f-468e-97bd-97761f9a772a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 12.677454 , -21.878696 ],\n",
" [ 6.382581 , -7.3105035],\n",
" [ 6.96561 , -14.762308 ],\n",
" ...,\n",
" [ 38.403862 , 22.484592 ],\n",
" [ 21.083256 , -28.476921 ],\n",
" [-36.36369 , -10.843703 ]], dtype=float32)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rndwalks = list(nx.generate_random_paths(G, 10000, path_length = 6))\n",
"model = Word2Vec(sentences = rndwalks, vector_size = 32, min_count = 1, workers = 8)\n",
"\n",
"nodemap = [None] * len(G.nodes)\n",
"for k in model.wv.key_to_index:\n",
" nodemap[k] = model.wv.key_to_index[k]\n",
"\n",
"reducer = TSNE(n_components = 2, init = \"pca\")\n",
"embeddings = reducer.fit_transform(model.wv.vectors[nodemap])\n",
"\n",
"embeddings"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9b127f46-9660-44ee-a3de-9a3fb6077c92",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 2, 2, 0, 1, 2, 3, 2, 0, 3, 2, 3, 1, 1, 2, 2, 3, 2, 2, 1, 1, 1,\n",
" 3, 1, 1, 2, 0, 1, 2, 1, 2, 3, 2, 2, 3, 2, 2, 3, 0, 2, 1, 3, 3, 1,\n",
" 0, 3, 3, 1, 2, 3, 0, 3, 0, 2, 3, 3, 0, 0, 3, 3, 0, 1, 0, 0, 2, 1,\n",
" 1, 3, 0, 2, 0, 1, 0, 3, 1, 2, 2, 2, 0, 3, 3, 0, 2, 0, 3, 3, 1, 1,\n",
" 3, 3, 3, 1, 2, 0, 3, 3, 2, 0, 2, 1, 2, 1, 0, 3, 2, 1, 1, 1, 3, 0,\n",
" 2, 1, 1, 0, 2, 3, 0, 0, 0, 3, 2, 3, 2, 0, 2, 3, 3, 0, 0, 0, 2, 2,\n",
" 1, 0, 0, 1, 1, 1, 1, 2, 1, 2, 0, 2, 2, 0, 1, 1, 3, 0, 3, 2, 3, 2,\n",
" 0, 1, 1, 3, 0, 1, 2, 3, 3, 1, 3, 2, 2, 1, 3, 3, 2, 3, 3, 2, 3, 2,\n",
" 2, 3, 1, 2, 3, 2, 1, 3, 3, 2, 2, 2, 3, 2, 3, 1, 3, 0, 0, 3, 3, 0,\n",
" 2, 1, 3, 3, 3, 2, 0, 1, 2, 2, 0, 3, 2, 1, 1, 2, 2, 1, 3, 3, 2, 3,\n",
" 2, 1, 3, 2, 1, 1, 3, 0, 1, 0, 0, 1, 2, 1, 2, 1, 1, 3, 0, 1, 3, 1,\n",
" 1, 2, 3, 3, 1, 2, 2, 2, 0, 2, 0, 3, 0, 0, 0, 0, 0, 3, 0, 2, 1, 1,\n",
" 3, 3, 1, 1, 3, 0, 0, 3, 0, 1, 1, 1, 2, 1, 1, 1, 2, 3, 2, 1, 1, 3,\n",
" 3, 3, 2, 3, 1, 0, 0, 1, 0, 3, 2, 0, 3, 3, 1, 2, 3, 3, 1, 2, 2, 0,\n",
" 3, 0, 1, 0, 3, 3, 0, 0, 1, 3, 1, 0, 3, 2, 2, 2, 1, 3, 1, 0, 0, 2,\n",
" 1, 1, 2, 1, 3, 0, 1, 2, 1, 0, 1, 1, 0, 1, 2, 1, 1, 1, 0, 3, 2, 0,\n",
" 3, 2, 3, 2, 1, 1, 2, 1, 0, 2, 3, 0, 1, 3, 0, 2, 1, 2, 2, 2, 0, 2,\n",
" 1, 1, 3, 3, 0, 3, 2, 3, 3, 1, 2, 3, 2, 2, 2, 1, 2, 3, 0, 2, 3, 2,\n",
" 1, 0, 1, 0, 1, 3, 1, 1, 3, 0, 2, 0, 0, 2, 0, 2, 1, 0, 0, 1, 3, 3,\n",
" 0, 1, 1, 3, 0, 1, 0, 2, 0, 0, 3, 1, 0, 2, 3, 3, 1, 3, 0, 0, 1, 3,\n",
" 0, 0, 2, 0, 1, 1, 2, 3, 3, 2, 2, 0, 2, 1, 3, 3, 0, 3, 1, 2, 3, 3,\n",
" 0, 1, 0, 2, 0, 0, 3, 1, 3, 2, 1, 1, 1, 0, 1, 2, 2, 0, 0, 0, 3, 0,\n",
" 1, 1, 2, 0, 2, 0, 1, 3, 1, 3, 1, 3, 0, 0, 0, 3, 0, 1, 2, 1, 0, 3,\n",
" 2, 3, 2, 0, 1, 2, 1, 3, 3, 2, 2, 0, 0, 3, 2, 2, 1, 2, 1, 3, 2, 0,\n",
" 1, 3, 1, 1, 0, 2, 2, 1, 2, 1, 3, 0, 1, 2, 3, 1, 0, 3, 0, 3, 3, 2,\n",
" 3, 0, 0, 1, 3, 3, 0, 2, 0, 1, 0, 0, 3, 1, 2, 1, 3, 0, 1, 1, 2, 2,\n",
" 2, 1, 0, 2, 0, 3, 2, 1, 0, 3, 1, 1, 2, 3, 0, 0, 0, 3, 0, 3, 0, 2,\n",
" 3, 2, 2, 2, 1, 3, 1, 3, 0, 3, 1, 3, 2, 2, 0, 0, 3, 1, 1, 1, 1, 2,\n",
" 0, 2, 3, 0, 1, 1, 1, 0, 2, 1, 2, 2, 2, 0, 3, 0, 2, 1, 1, 3, 0, 0,\n",
" 3, 3, 1, 3, 1, 1, 1, 3, 3, 2, 1, 0, 0, 1, 3, 0, 1, 3, 0, 2, 3, 1,\n",
" 2, 0, 1, 3, 0, 0, 0, 1, 2, 3, 0, 1, 1, 1, 2, 2, 1, 2, 1, 3, 0, 3,\n",
" 1, 2, 0, 0, 0, 3, 2, 2, 3, 0, 1, 1, 2, 1, 1, 0, 0, 1, 1, 2, 0, 0,\n",
" 2, 2, 2, 3, 2, 0, 2, 1, 2, 1, 3, 1, 2, 2, 1, 2, 1, 1, 1, 3, 3, 2,\n",
" 1, 2, 0, 2, 3, 1, 1, 3, 0, 3, 1, 2, 1, 3, 2, 3, 0, 0, 0, 3, 2, 2,\n",
" 2, 1, 1, 0, 1, 1, 2, 0, 1, 2, 1, 1, 0, 0, 2, 1, 2, 3, 0, 3, 1, 3,\n",
" 1, 2, 3, 2, 1, 1, 2, 3, 3, 2, 0, 3, 1, 0, 0, 3, 1, 2, 1, 1, 3, 1,\n",
" 1, 3, 1, 1, 2, 1, 3, 0, 1, 3, 3, 0, 2, 3, 0, 1, 0, 3, 0, 1, 3, 0,\n",
" 3, 2, 1, 2, 2, 1, 2, 3, 2, 0, 2, 3, 1, 3, 1, 2, 0, 1, 0, 3, 3, 1,\n",
" 0, 1, 3, 1, 2, 1, 3, 2, 1, 1, 0, 2, 1, 1, 3, 2, 3, 1, 0, 2, 3, 3,\n",
" 1, 2, 1, 1, 2, 3, 1, 1, 3, 0, 3, 0, 3, 1, 1, 1, 2, 0, 1, 0, 0, 3,\n",
" 1, 1, 1, 1, 1, 2, 1, 1, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0, 0, 2,\n",
" 2, 0, 1, 0, 0, 1, 1, 2, 0, 3, 1, 0, 0, 1, 2, 2, 3, 0, 1, 1, 0, 1,\n",
" 3, 3, 1, 1, 0, 3, 3, 1, 1, 2, 2, 3, 2, 0, 2, 3, 0, 2, 2, 1, 3, 2,\n",
" 0, 3, 3, 3, 1, 0, 1, 2, 3, 2, 2, 2, 1, 2, 1, 2, 1, 2, 0, 3, 3, 3,\n",
" 0, 1, 1, 2, 1, 1, 2, 2, 1, 3, 2, 0, 1, 0, 3, 0, 3, 2, 1, 1, 0, 1,\n",
" 0, 1, 1, 1, 3, 0, 0, 0, 2, 1], dtype=int32)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reducer = KMeans(n_clusters = 4)\n",
"clusters = reducer.fit(embeddings).labels_\n",
"\n",
"clusters"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f40c6d77-cb8c-4473-ad77-bc8103fe4622",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9352377213494278"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"nodes = list(G.nodes)\n",
"\n",
"ground_truth = {}\n",
"with open(\"1/nodes.txt\", 'r') as f:\n",
" for line in f:\n",
" fields = line.strip().split('\\t')\n",
" ground_truth[int(fields[0])] = int(fields[1])\n",
"\n",
"ground_truth = [ground_truth[i] for i in nodes]\n",
"\n",
"normalized_mutual_info_score(clusters, ground_truth)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment