Created
February 22, 2020 18:58
-
-
Save srinivas946/fa24dfafa08281604ff4515081a59515 to your computer and use it in GitHub Desktop.
Read the Information from a CSV file and perform data visualization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import csv # load csv module to handle csv data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>create logic for pivot table</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<p>Download sample Information file - <a href=\"https://drive.google.com/open?id=1EinQJ7Z3WAKWqZvUsEZJXg_HlJ_On0J4\">link</a></p>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "{'Source IP': '129.144.60.201', 'Source Port': '45121', 'Destination IP': '10.0.0.1', 'Destination Port': '80', 'Count': '1'}\n", | |
| "{'Source IP': '60.248.89.69', 'Source Port': '25452', 'Destination IP': '10.0.2.1', 'Destination Port': '443', 'Count': '2'}\n", | |
| "{'Source IP': '122.170.12.200', 'Source Port': '245', 'Destination IP': '192.168.3.2', 'Destination Port': '443', 'Count': '1'}\n", | |
| "{'Source IP': '179.181.108.72', 'Source Port': '25455', 'Destination IP': '172.15.54.35', 'Destination Port': '443', 'Count': '1'}\n", | |
| "{'Source IP': '87.103.245.190', 'Source Port': '14785', 'Destination IP': '10.0.0.5', 'Destination Port': '80', 'Count': '1'}\n", | |
| "{'Source IP': '196.52.43.95', 'Source Port': '2543', 'Destination IP': '10.0.0.1', 'Destination Port': '80', 'Count': '2'}\n", | |
| "{'Source IP': '60.248.89.69', 'Source Port': '1159', 'Destination IP': '10.0.2.1', 'Destination Port': '80', 'Count': '2'}\n", | |
| "{'Source IP': '129.144.60.201', 'Source Port': '3645', 'Destination IP': '192.168.2.4', 'Destination Port': '80', 'Count': '1'}\n", | |
| "{'Source IP': '179.181.108.72', 'Source Port': '25', 'Destination IP': '172.18.25.36', 'Destination Port': '80', 'Count': '2'}\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "with open('pivot_table.csv', 'r') as csvfile:\n", | |
| " csv_reader = csv.DictReader(csvfile)\n", | |
| " for row in csv_reader:\n", | |
| " print(row)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "{'179.181.108.72': 2,\n", | |
| " '60.248.89.69': 2,\n", | |
| " '87.103.245.190': 1,\n", | |
| " '122.170.12.200': 1,\n", | |
| " '196.52.43.95': 1,\n", | |
| " '129.144.60.201': 2}" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "source_set, data_list = set(), []\n", | |
| "with open('pivot_table.csv', 'r') as csvfile:\n", | |
| " csv_reader = csv.DictReader(csvfile)\n", | |
| " for row in csv_reader:\n", | |
| " source_set.add(row['Source IP'])\n", | |
| " data_list.append(row['Source IP'])\n", | |
| "pivot_data = {}\n", | |
| "for ip in source_set:\n", | |
| " repeat_count = 0\n", | |
| " for data in data_list:\n", | |
| " if ip == data: repeat_count += 1\n", | |
| " pivot_data[ip] = repeat_count\n", | |
| "pivot_data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>visualize the obtained data</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import matplotlib.pyplot as plt # load matplotlib module to handle charts" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<li>Matplotlib is a Python Package used for data visualization</li>\n", | |
| "<li>Official documentation of matplotlib - <a href=\"https://matplotlib.org/\">link</a></li>\n", | |
| "<li>Easy way to learn data visualization - Refer my Tutorials <a href=\"https://github.com/srinivas946/Matplotlib_Tuts\">link</a></li>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "x = list(pivot_data.keys())\n", | |
| "y = list(pivot_data.values())\n", | |
| "plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n", | |
| "plt.xlabel('IPAddress')\n", | |
| "plt.xticks(rotation=90)\n", | |
| "plt.ylabel('Count')\n", | |
| "plt.title('IP Address Statistics', pad=25)\n", | |
| "plt.gca().spines['right'].set_visible(False) # remove right spine\n", | |
| "plt.gca().spines['top'].set_visible(False) # remove left spine\n", | |
| "for i, j in enumerate(y):\n", | |
| " plt.text(x=i, y=j+0.1, s=str(j))\n", | |
| "plt.show()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>Real Time Process Script</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "import csv\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "\n", | |
| "# =========================================\n", | |
| "# CREATE PIVOT TABLE LOGIC AND VISUALIZE\n", | |
| "# =========================================\n", | |
| "class Pivot_Visualize:\n", | |
| " \n", | |
| " # ===================================\n", | |
| " # READ FILE PATH TO LOAD CSV DATA\n", | |
| " # ===================================\n", | |
| " def __init__(self, read_file_path):\n", | |
| " self._read_file_path = read_file_path\n", | |
| " \n", | |
| " # =================================\n", | |
| " # READ CSV FILE TO EXTRACT DATA\n", | |
| " # =================================\n", | |
| " def read_csv_file(self):\n", | |
| " source_set, data_list = set(), []\n", | |
| " with open('pivot_table.csv', 'r') as csvfile:\n", | |
| " csv_reader = csv.DictReader(csvfile)\n", | |
| " for row in csv_reader:\n", | |
| " source_set.add(row['Source IP'])\n", | |
| " data_list.append(row['Source IP'])\n", | |
| " pivot_data = {}\n", | |
| " for ip in source_set:\n", | |
| " repeat_count = 0\n", | |
| " for data in data_list:\n", | |
| " if ip == data: repeat_count += 1\n", | |
| " pivot_data[ip] = repeat_count\n", | |
| " return pivot_data\n", | |
| " \n", | |
| " # ======================================\n", | |
| " # CREATE BAR CHART BY READING CSV FILE\n", | |
| " # ======================================\n", | |
| " def create_visualization(self, pivot_data):\n", | |
| " x = list(pivot_data.keys())\n", | |
| " y = list(pivot_data.values())\n", | |
| " plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n", | |
| " plt.xlabel('IPAddress')\n", | |
| " plt.xticks(rotation=90)\n", | |
| " plt.ylabel('Count')\n", | |
| " plt.title('IP Address Statistics', pad=25)\n", | |
| " plt.gca().spines['right'].set_visible(False) # remove right spine\n", | |
| " plt.gca().spines['top'].set_visible(False) # remove left spine\n", | |
| " for i, j in enumerate(y):\n", | |
| " plt.text(x=i, y=j+0.1, s=str(j))\n", | |
| " plt.show()\n", | |
| " \n", | |
| "# =====================================\n", | |
| "# PROGRAM EXECUTION STARTS HERE\n", | |
| "# =====================================\n", | |
| "pv = Pivot_Visualize(read_file_path='pivot_table.csv')\n", | |
| "pivot_data = pv.read_csv_file()\n", | |
| "pv.create_visualization(pivot_data)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>Learn more Real Time Scenarios related to CSV file - Refer <a href=\"https://cybersecpy.in/handle-csv-files-using-python/\">cybersecpy.in</a></b>" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment