Last active
December 25, 2024 05:25
-
-
Save dsignr/c3f7a67fcfb1fb93698a507f4cce8eef to your computer and use it in GitHub Desktop.
A python script to extract data from CSV and convert it into Gephi compatible GML.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "import sys, time\n", | |
| "import pandas as pd\n", | |
| "import datetime as dt\n", | |
| "from IPython.display import display\n", | |
| "\n", | |
| "import plotly.plotly as py # interactive graphing\n", | |
| "from plotly.graph_objs import Bar, Scatter, Marker, Layout " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "FILE_NAME = \"output.gml\"" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "authors = pd.read_csv('authors.csv', sep=' ')\n", | |
| "occurrence = pd.read_csv('occurrence.csv', sep=' ')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [], | |
| "source": [ | |
| "#Utility functions\n", | |
| "def progress(v):\n", | |
| " v = str(v)\n", | |
| " sys.stdout.flush()\n", | |
| " sys.stdout.write('\\r')\n", | |
| " sys.stdout.flush()\n", | |
| " sys.stdout.write(v)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>AUTHOR_ID</th>\n", | |
| " <th>CO-AUTHOR_ID</th>\n", | |
| " <th>NO_OF_BOOKS</th>\n", | |
| " <th>AUTHOR</th>\n", | |
| " <th>CO-AUTHOR</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>1</td>\n", | |
| " <td>190</td>\n", | |
| " <td>7</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>TONER, J</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>66</th>\n", | |
| " <td>1</td>\n", | |
| " <td>2281</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>FREY, E</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>93</th>\n", | |
| " <td>1</td>\n", | |
| " <td>3896</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>GINZBURG, VV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>101</th>\n", | |
| " <td>1</td>\n", | |
| " <td>3897</td>\n", | |
| " <td>2</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>CLARK, NA</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>110</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12347</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>JACOBSEN, B</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>113</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12348</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>SAUNDERS, K</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>116</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12700</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>LINK, DR</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>123</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12701</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>NATALE, G</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>130</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12702</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>MACLENNAN, JE</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>137</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12703</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>WALSH, M</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>144</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12704</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>KEAST, SS</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>151</th>\n", | |
| " <td>1</td>\n", | |
| " <td>12705</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>NEUBERT, ME</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>55</th>\n", | |
| " <td>1</td>\n", | |
| " <td>1075</td>\n", | |
| " <td>3</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>MARCHETTI, MC</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>35</th>\n", | |
| " <td>1</td>\n", | |
| " <td>562</td>\n", | |
| " <td>2</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>NELSON, DR</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>1</td>\n", | |
| " <td>201</td>\n", | |
| " <td>3</td>\n", | |
| " <td>RADZIHOVSKY, L</td>\n", | |
| " <td>BALENTS, L</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>159</th>\n", | |
| " <td>2</td>\n", | |
| " <td>1237</td>\n", | |
| " <td>2</td>\n", | |
| " <td>FRISCHAT, SD</td>\n", | |
| " <td>DORON, E</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>158</th>\n", | |
| " <td>2</td>\n", | |
| " <td>3</td>\n", | |
| " <td>1</td>\n", | |
| " <td>FRISCHAT, SD</td>\n", | |
| " <td>KUHN, R</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>160</th>\n", | |
| " <td>3</td>\n", | |
| " <td>2</td>\n", | |
| " <td>1</td>\n", | |
| " <td>KUHN, R</td>\n", | |
| " <td>FRISCHAT, SD</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>311</th>\n", | |
| " <td>4</td>\n", | |
| " <td>10757</td>\n", | |
| " <td>9</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>PATRA, M</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>215</th>\n", | |
| " <td>4</td>\n", | |
| " <td>891</td>\n", | |
| " <td>2</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>LEYRONAS, X</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>219</th>\n", | |
| " <td>4</td>\n", | |
| " <td>1785</td>\n", | |
| " <td>1</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>BUTTIKER, M</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>243</th>\n", | |
| " <td>4</td>\n", | |
| " <td>2212</td>\n", | |
| " <td>5</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>LANGEN, SAV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>200</th>\n", | |
| " <td>4</td>\n", | |
| " <td>722</td>\n", | |
| " <td>1</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>FRAHM, K</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>255</th>\n", | |
| " <td>4</td>\n", | |
| " <td>2264</td>\n", | |
| " <td>1</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>BLANTER, YM</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>196</th>\n", | |
| " <td>4</td>\n", | |
| " <td>7</td>\n", | |
| " <td>4</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>JONG, MJMD</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>268</th>\n", | |
| " <td>4</td>\n", | |
| " <td>2876</td>\n", | |
| " <td>9</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>SCHOMERUS, H</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>290</th>\n", | |
| " <td>4</td>\n", | |
| " <td>3232</td>\n", | |
| " <td>5</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>MISIRPASHAEV, TS</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>306</th>\n", | |
| " <td>4</td>\n", | |
| " <td>8994</td>\n", | |
| " <td>2</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>TWORZYDLO, J</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>293</th>\n", | |
| " <td>4</td>\n", | |
| " <td>3478</td>\n", | |
| " <td>7</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>FRAHM, KM</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>162</th>\n", | |
| " <td>4</td>\n", | |
| " <td>5</td>\n", | |
| " <td>5</td>\n", | |
| " <td>BEENAKKER, CWJ</td>\n", | |
| " <td>MELSEN, JA</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>...</th>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " <td>...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>67404</th>\n", | |
| " <td>16718</td>\n", | |
| " <td>8621</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RENZ, F</td>\n", | |
| " <td>JAKOB, G</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87838</th>\n", | |
| " <td>16718</td>\n", | |
| " <td>16716</td>\n", | |
| " <td>1</td>\n", | |
| " <td>RENZ, F</td>\n", | |
| " <td>TREMEL, W</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87566</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>13396</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>RITTER, C</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>83218</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>13170</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>WESTERBURG, W</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87855</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>16718</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>RENZ, F</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87847</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>16717</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>WALDECK, M</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>67405</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>8621</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>JAKOB, G</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87839</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>16716</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>TREMEL, W</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87831</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>16715</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>FELSER, C</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>87823</th>\n", | |
| " <td>16719</td>\n", | |
| " <td>16714</td>\n", | |
| " <td>1</td>\n", | |
| " <td>GUETLICH, P</td>\n", | |
| " <td>LANG, O</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80930</th>\n", | |
| " <td>16720</td>\n", | |
| " <td>11553</td>\n", | |
| " <td>1</td>\n", | |
| " <td>HAEUSSLER, R</td>\n", | |
| " <td>LOEHNEYSEN, HV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>51427</th>\n", | |
| " <td>16720</td>\n", | |
| " <td>15834</td>\n", | |
| " <td>1</td>\n", | |
| " <td>HAEUSSLER, R</td>\n", | |
| " <td>SCHEER, E</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>91849</th>\n", | |
| " <td>16720</td>\n", | |
| " <td>16721</td>\n", | |
| " <td>1</td>\n", | |
| " <td>HAEUSSLER, R</td>\n", | |
| " <td>WEBER, HB</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>51428</th>\n", | |
| " <td>16721</td>\n", | |
| " <td>15834</td>\n", | |
| " <td>1</td>\n", | |
| " <td>WEBER, HB</td>\n", | |
| " <td>SCHEER, E</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80931</th>\n", | |
| " <td>16721</td>\n", | |
| " <td>11553</td>\n", | |
| " <td>1</td>\n", | |
| " <td>WEBER, HB</td>\n", | |
| " <td>LOEHNEYSEN, HV</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>91846</th>\n", | |
| " <td>16721</td>\n", | |
| " <td>16720</td>\n", | |
| " <td>1</td>\n", | |
| " <td>WEBER, HB</td>\n", | |
| " <td>HAEUSSLER, R</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>74577</th>\n", | |
| " <td>16723</td>\n", | |
| " <td>14983</td>\n", | |
| " <td>1</td>\n", | |
| " <td>LEUNG, MA</td>\n", | |
| " <td>CARR, LD</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>68050</th>\n", | |
| " <td>16723</td>\n", | |
| " <td>4256</td>\n", | |
| " <td>1</td>\n", | |
| " <td>LEUNG, MA</td>\n", | |
| " <td>REINHARDT, WP</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41242</th>\n", | |
| " <td>16724</td>\n", | |
| " <td>8897</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CORNISH, SL</td>\n", | |
| " <td>WIEMAN, CE</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80425</th>\n", | |
| " <td>16724</td>\n", | |
| " <td>16725</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CORNISH, SL</td>\n", | |
| " <td>CLAUSSEN, NR</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80429</th>\n", | |
| " <td>16724</td>\n", | |
| " <td>16726</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CORNISH, SL</td>\n", | |
| " <td>ROBERTS, JL</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41176</th>\n", | |
| " <td>16724</td>\n", | |
| " <td>5350</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CORNISH, SL</td>\n", | |
| " <td>CORNELL, EA</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41177</th>\n", | |
| " <td>16725</td>\n", | |
| " <td>5350</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CLAUSSEN, NR</td>\n", | |
| " <td>CORNELL, EA</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80430</th>\n", | |
| " <td>16725</td>\n", | |
| " <td>16726</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CLAUSSEN, NR</td>\n", | |
| " <td>ROBERTS, JL</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41243</th>\n", | |
| " <td>16725</td>\n", | |
| " <td>8897</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CLAUSSEN, NR</td>\n", | |
| " <td>WIEMAN, CE</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80421</th>\n", | |
| " <td>16725</td>\n", | |
| " <td>16724</td>\n", | |
| " <td>1</td>\n", | |
| " <td>CLAUSSEN, NR</td>\n", | |
| " <td>CORNISH, SL</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41244</th>\n", | |
| " <td>16726</td>\n", | |
| " <td>8897</td>\n", | |
| " <td>1</td>\n", | |
| " <td>ROBERTS, JL</td>\n", | |
| " <td>WIEMAN, CE</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80426</th>\n", | |
| " <td>16726</td>\n", | |
| " <td>16725</td>\n", | |
| " <td>1</td>\n", | |
| " <td>ROBERTS, JL</td>\n", | |
| " <td>CLAUSSEN, NR</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>80422</th>\n", | |
| " <td>16726</td>\n", | |
| " <td>16724</td>\n", | |
| " <td>1</td>\n", | |
| " <td>ROBERTS, JL</td>\n", | |
| " <td>CORNISH, SL</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41178</th>\n", | |
| " <td>16726</td>\n", | |
| " <td>5350</td>\n", | |
| " <td>1</td>\n", | |
| " <td>ROBERTS, JL</td>\n", | |
| " <td>CORNELL, EA</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "<p>95188 rows × 5 columns</p>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " AUTHOR_ID CO-AUTHOR_ID NO_OF_BOOKS AUTHOR CO-AUTHOR\n", | |
| "0 1 190 7 RADZIHOVSKY, L TONER, J\n", | |
| "66 1 2281 1 RADZIHOVSKY, L FREY, E\n", | |
| "93 1 3896 1 RADZIHOVSKY, L GINZBURG, VV\n", | |
| "101 1 3897 2 RADZIHOVSKY, L CLARK, NA\n", | |
| "110 1 12347 1 RADZIHOVSKY, L JACOBSEN, B\n", | |
| "113 1 12348 1 RADZIHOVSKY, L SAUNDERS, K\n", | |
| "116 1 12700 1 RADZIHOVSKY, L LINK, DR\n", | |
| "123 1 12701 1 RADZIHOVSKY, L NATALE, G\n", | |
| "130 1 12702 1 RADZIHOVSKY, L MACLENNAN, JE\n", | |
| "137 1 12703 1 RADZIHOVSKY, L WALSH, M\n", | |
| "144 1 12704 1 RADZIHOVSKY, L KEAST, SS\n", | |
| "151 1 12705 1 RADZIHOVSKY, L NEUBERT, ME\n", | |
| "55 1 1075 3 RADZIHOVSKY, L MARCHETTI, MC\n", | |
| "35 1 562 2 RADZIHOVSKY, L NELSON, DR\n", | |
| "11 1 201 3 RADZIHOVSKY, L BALENTS, L\n", | |
| "159 2 1237 2 FRISCHAT, SD DORON, E\n", | |
| "158 2 3 1 FRISCHAT, SD KUHN, R\n", | |
| "160 3 2 1 KUHN, R FRISCHAT, SD\n", | |
| "311 4 10757 9 BEENAKKER, CWJ PATRA, M\n", | |
| "215 4 891 2 BEENAKKER, CWJ LEYRONAS, X\n", | |
| "219 4 1785 1 BEENAKKER, CWJ BUTTIKER, M\n", | |
| "243 4 2212 5 BEENAKKER, CWJ LANGEN, SAV\n", | |
| "200 4 722 1 BEENAKKER, CWJ FRAHM, K\n", | |
| "255 4 2264 1 BEENAKKER, CWJ BLANTER, YM\n", | |
| "196 4 7 4 BEENAKKER, CWJ JONG, MJMD\n", | |
| "268 4 2876 9 BEENAKKER, CWJ SCHOMERUS, H\n", | |
| "290 4 3232 5 BEENAKKER, CWJ MISIRPASHAEV, TS\n", | |
| "306 4 8994 2 BEENAKKER, CWJ TWORZYDLO, J\n", | |
| "293 4 3478 7 BEENAKKER, CWJ FRAHM, KM\n", | |
| "162 4 5 5 BEENAKKER, CWJ MELSEN, JA\n", | |
| "... ... ... ... ... ...\n", | |
| "67404 16718 8621 1 RENZ, F JAKOB, G\n", | |
| "87838 16718 16716 1 RENZ, F TREMEL, W\n", | |
| "87566 16719 13396 1 GUETLICH, P RITTER, C\n", | |
| "83218 16719 13170 1 GUETLICH, P WESTERBURG, W\n", | |
| "87855 16719 16718 1 GUETLICH, P RENZ, F\n", | |
| "87847 16719 16717 1 GUETLICH, P WALDECK, M\n", | |
| "67405 16719 8621 1 GUETLICH, P JAKOB, G\n", | |
| "87839 16719 16716 1 GUETLICH, P TREMEL, W\n", | |
| "87831 16719 16715 1 GUETLICH, P FELSER, C\n", | |
| "87823 16719 16714 1 GUETLICH, P LANG, O\n", | |
| "80930 16720 11553 1 HAEUSSLER, R LOEHNEYSEN, HV\n", | |
| "51427 16720 15834 1 HAEUSSLER, R SCHEER, E\n", | |
| "91849 16720 16721 1 HAEUSSLER, R WEBER, HB\n", | |
| "51428 16721 15834 1 WEBER, HB SCHEER, E\n", | |
| "80931 16721 11553 1 WEBER, HB LOEHNEYSEN, HV\n", | |
| "91846 16721 16720 1 WEBER, HB HAEUSSLER, R\n", | |
| "74577 16723 14983 1 LEUNG, MA CARR, LD\n", | |
| "68050 16723 4256 1 LEUNG, MA REINHARDT, WP\n", | |
| "41242 16724 8897 1 CORNISH, SL WIEMAN, CE\n", | |
| "80425 16724 16725 1 CORNISH, SL CLAUSSEN, NR\n", | |
| "80429 16724 16726 1 CORNISH, SL ROBERTS, JL\n", | |
| "41176 16724 5350 1 CORNISH, SL CORNELL, EA\n", | |
| "41177 16725 5350 1 CLAUSSEN, NR CORNELL, EA\n", | |
| "80430 16725 16726 1 CLAUSSEN, NR ROBERTS, JL\n", | |
| "41243 16725 8897 1 CLAUSSEN, NR WIEMAN, CE\n", | |
| "80421 16725 16724 1 CLAUSSEN, NR CORNISH, SL\n", | |
| "41244 16726 8897 1 ROBERTS, JL WIEMAN, CE\n", | |
| "80426 16726 16725 1 ROBERTS, JL CLAUSSEN, NR\n", | |
| "80422 16726 16724 1 ROBERTS, JL CORNISH, SL\n", | |
| "41178 16726 5350 1 ROBERTS, JL CORNELL, EA\n", | |
| "\n", | |
| "[95188 rows x 5 columns]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a = authors.assign(CO_AUTHOR_ID=authors['AUTHOR_ID']).assign(CO_AUTHOR_NAME=authors['AUTHOR_NAME'])\n", | |
| "b = occurrence.merge(authors, how='inner', on='AUTHOR_ID')\n", | |
| "#.merge(a, how='inner', on='AUTHOR_ID')\n", | |
| "df = b.merge(a, how='inner', on='CO_AUTHOR_ID') \\\n", | |
| " .sort_values(by='AUTHOR_ID_x') \\\n", | |
| " .drop('AUTHOR_ID_y',1) \\\n", | |
| " .drop('AUTHOR_NAME_y',1)\n", | |
| "df.columns = ['AUTHOR_ID', 'CO-AUTHOR_ID', 'NO_OF_BOOKS', 'AUTHOR', 'CO-AUTHOR']\n", | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": { | |
| "collapsed": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "95188\n", | |
| "Printing nodes over\n", | |
| "95188\n", | |
| "Printing nodes and edges over\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "f = open(FILE_NAME, \"w\")\n", | |
| "#helpers\n", | |
| "s = \" \"\n", | |
| "ss = s+s\n", | |
| "sss = s+s+s\n", | |
| "ssss = s+s+s+s\n", | |
| "nl = \"\\n\"\n", | |
| "\n", | |
| "#loop helpers\n", | |
| "added = []\n", | |
| "ind = 0\n", | |
| "\n", | |
| "#Root node\n", | |
| "f.write(\"graph\"+nl)\n", | |
| "f.write(\"[\"+nl)\n", | |
| "\n", | |
| "#Write an edge\n", | |
| "def write_edge(r):\n", | |
| " f.write( ss + \"edge\" + nl)\n", | |
| " f.write( ss + \"[\" + nl)\n", | |
| " f.write( ssss + \"source\" + s + '\"' + str(r['AUTHOR_ID']) + '\"' + nl)\n", | |
| " f.write( ssss + \"target\" + s + '\"' + str(r['CO-AUTHOR_ID']) + '\"' + nl)\n", | |
| " f.write( ssss + \"value\" + s + str(r['NO_OF_BOOKS']) + '\"' + nl)\n", | |
| " f.write( ss + \"]\"+ nl)\n", | |
| "\n", | |
| "#Write a node\n", | |
| "def write_node(r):\n", | |
| " f.write( ss + \"node\" + nl)\n", | |
| " f.write( ss + \"[\" + nl)\n", | |
| " f.write( ssss + \"id\" + s + '\"' + str(r['AUTHOR_ID']) + '\"' + nl)\n", | |
| " f.write( ssss + \"label\" + s + '\"' + str(r['AUTHOR']) + '\"' + nl)\n", | |
| " f.write( ss + \"]\"+ nl)\n", | |
| "\n", | |
| "#Generate nodes\n", | |
| "for i, r in df.iterrows():\n", | |
| " #increment, as index not reliable\n", | |
| " ind += 1\n", | |
| " #Check for duplicates\n", | |
| " if (r['AUTHOR_ID'] not in added):\n", | |
| " #Add to list\n", | |
| " added.append(r['AUTHOR_ID'])\n", | |
| " write_node(r)\n", | |
| " #print the progress \n", | |
| " progress(ind)\n", | |
| "\n", | |
| "print(nl+\"Printing nodes over\")\n", | |
| "\n", | |
| "#flush index\n", | |
| "ind = 0 \n", | |
| "#Generate edges \n", | |
| "for i, r in df.iterrows():\n", | |
| " #increment, as index not reliable\n", | |
| " ind += 1\n", | |
| " if(r['AUTHOR_ID'] < r['CO-AUTHOR_ID']):\n", | |
| " write_edge(r)\n", | |
| " #print the progress \n", | |
| " progress(ind)\n", | |
| "\n", | |
| "print(nl+\"Printing nodes and edges over\")\n", | |
| "\n", | |
| "#closing node\n", | |
| "f.write(\"]\"+nl)\n", | |
| "f.close()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": { | |
| "collapsed": true | |
| }, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.5.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 0 | |
| } |
can i have the dataset of the csv file because i want to see the structure of the csv file
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
xml2csv
import sys, time
import pandas as pd
import datetime as dt
from IPython.display import display
import csv
import xml.etree.ElementTree as ET
def xml_to_csv(file_path,csv_name) -> None:
tree = ET.parse(file_path)
root = tree.getroot()
if name = 'main':