Created
February 23, 2020 19:53
-
-
Save srinivas946/be50df07fb9960661f975cae8f0c9653 to your computer and use it in GitHub Desktop.
Read a CSV file which contains Logs Information, using this data make a pivot table and data visualization, the output has to be stored in Power Point as a table and Image respectively.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import csv # load csv module to handle csv data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>create logic for pivot table</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<p>Download sample Information file - <a href=\"https://drive.google.com/open?id=1EinQJ7Z3WAKWqZvUsEZJXg_HlJ_On0J4\">link</a></p>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "with open('pivot_table.csv', 'r') as csvfile:\n", | |
| " csv_reader = csv.DictReader(csvfile)\n", | |
| " for row in csv_reader:\n", | |
| " print(row)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "source_set, data_list = set(), []\n", | |
| "with open('pivot_table.csv', 'r') as csvfile:\n", | |
| " csv_reader = csv.DictReader(csvfile)\n", | |
| " for row in csv_reader:\n", | |
| " source_set.add(row['Source IP'])\n", | |
| " data_list.append(row['Source IP'])\n", | |
| "pivot_data = {}\n", | |
| "for ip in source_set:\n", | |
| " repeat_count = 0\n", | |
| " for data in data_list:\n", | |
| " if ip == data: repeat_count += 1\n", | |
| " pivot_data[ip] = repeat_count\n", | |
| "pivot_data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>visualize the obtained data</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import matplotlib.pyplot as plt # load matplotlib module to handle charts" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<li>Matplotlib is a Python Package used for data visualization</li>\n", | |
| "<li>Official documentation of matplotlib - <a href=\"https://matplotlib.org/\">link</a></li>\n", | |
| "<li>Easy way to learn data visualization - Refer my Tutorials <a href=\"https://github.com/srinivas946/Matplotlib_Tuts\">link</a></li>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "x = list(pivot_data.keys())\n", | |
| "y = list(pivot_data.values())\n", | |
| "plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n", | |
| "plt.xlabel('IPAddress')\n", | |
| "plt.xticks(rotation=90)\n", | |
| "plt.ylabel('Count')\n", | |
| "plt.title('IP Address Statistics', pad=25)\n", | |
| "plt.gca().spines['right'].set_visible(False) # remove right spine\n", | |
| "plt.gca().spines['top'].set_visible(False) # remove left spine\n", | |
| "for i, j in enumerate(y):\n", | |
| " plt.text(x=i, y=j+0.1, s=str(j))\n", | |
| "plt.savefig('ipaddress_stats.png') # save figure as png or jpg" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>Interact with Powerpoint</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<li>You can interact with Microsoft Powerpoint with python module - <a href=\"https://python-pptx.readthedocs.io/en/latest/user/quickstart.html\">python-pptx</a></li>\n", | |
| "<li>Install this package using the command - pip install python-pptx</li>\n", | |
| "<li>For this tutorial, we only use headings, table and inserting images</li>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from pptx import Presentation\n", | |
| "from pptx.util import Inches, Pt\n", | |
| "\n", | |
| "prs = Presentation()\n", | |
| "blank_slide_layout = prs.slide_layouts[6]\n", | |
| "slide = prs.slides.add_slide(blank_slide_layout)\n", | |
| "shapes = slide.shapes\n", | |
| "\n", | |
| "# add text box\n", | |
| "left = top = width = height = Inches(1)\n", | |
| "txBox = slide.shapes.add_textbox(left, top, width, height)\n", | |
| "tf = txBox.text_frame\n", | |
| "\n", | |
| "p = tf.add_paragraph()\n", | |
| "p.text = \"Pivot Table Information\"\n", | |
| "p.font.size = Pt(25)\n", | |
| "\n", | |
| "# add table\n", | |
| "rows, cols = len(pivot_data), 2\n", | |
| "left = top = Inches(2.0)\n", | |
| "width = Inches(6.0)\n", | |
| "height = Inches(0.8)\n", | |
| "\n", | |
| "table = shapes.add_table(rows+1, cols, left, top, width, height).table\n", | |
| "\n", | |
| "# set column widths\n", | |
| "table.columns[0].width = Inches(2.0)\n", | |
| "table.columns[1].width = Inches(4.0)\n", | |
| "\n", | |
| "# write column headings\n", | |
| "table.cell(0, 0).text = 'IP Address' # first row first cell\n", | |
| "table.cell(0, 1).text = 'Count' # first row second cell\n", | |
| "\n", | |
| "row_count = 1\n", | |
| "for key, val in pivot_data.items():\n", | |
| " if row_count <= rows:\n", | |
| " table.cell(row_count, 0).text = key\n", | |
| " table.cell(row_count, 1).text = str(val)\n", | |
| " row_count += 1\n", | |
| " \n", | |
| "# add image in a new slide\n", | |
| "slide = prs.slides.add_slide(blank_slide_layout)\n", | |
| "shapes = slide.shapes\n", | |
| "left = top = Inches(1)\n", | |
| "pic = slide.shapes.add_picture('ipaddress_stats.png', left, top)\n", | |
| "\n", | |
| "prs.save('output_powerpoint.pptx')\n", | |
| "print('File Created')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>Real Time Scenario Script</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import csv # load csv module to handle csv files\n", | |
| "import matplotlib.pyplot as plt # load matplotlib module to create charts\n", | |
| "from pptx import Presentation # load pptx module to handle powerpoint presentations\n", | |
| "from pptx.util import Inches, Pt\n", | |
| "\n", | |
| "# =========================================\n", | |
| "# CREATE PIVOT TABLE LOGIC AND VISUALIZE\n", | |
| "# =========================================\n", | |
| "class Pivot_Visualize:\n", | |
| " \n", | |
| " # ===================================\n", | |
| " # READ FILE PATH TO LOAD CSV DATA\n", | |
| " # ===================================\n", | |
| " def __init__(self, read_file_path, image_path, ppt_path):\n", | |
| " self._read_file_path = read_file_path\n", | |
| " self._image_path = image_path\n", | |
| " self._ppt_path = ppt_path\n", | |
| " \n", | |
| " # =================================\n", | |
| " # READ CSV FILE TO EXTRACT DATA\n", | |
| " # =================================\n", | |
| " def read_csv_file(self):\n", | |
| " source_set, data_list = set(), []\n", | |
| " with open(self._read_file_path, 'r') as csvfile:\n", | |
| " csv_reader = csv.DictReader(csvfile)\n", | |
| " for row in csv_reader:\n", | |
| " source_set.add(row['Source IP'])\n", | |
| " data_list.append(row['Source IP'])\n", | |
| " pivot_data = {}\n", | |
| " for ip in source_set:\n", | |
| " repeat_count = 0\n", | |
| " for data in data_list:\n", | |
| " if ip == data: repeat_count += 1\n", | |
| " pivot_data[ip] = repeat_count\n", | |
| " return pivot_data\n", | |
| " \n", | |
| " # ======================================\n", | |
| " # CREATE BAR CHART BY READING CSV FILE\n", | |
| " # ======================================\n", | |
| " def create_visualization(self, pivot_data):\n", | |
| " x = list(pivot_data.keys())\n", | |
| " y = list(pivot_data.values())\n", | |
| " plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n", | |
| " plt.xlabel('IPAddress')\n", | |
| " plt.xticks(rotation=90)\n", | |
| " plt.ylabel('Count')\n", | |
| " plt.title('IP Address Statistics', pad=25)\n", | |
| " plt.gca().spines['right'].set_visible(False) # remove right spine\n", | |
| " plt.gca().spines['top'].set_visible(False) # remove left spine\n", | |
| " for i, j in enumerate(y):\n", | |
| " plt.text(x=i, y=j+0.1, s=str(j))\n", | |
| " plt.savefig(self._image_path, bbox_inches='tight', dip=200)\n", | |
| " \n", | |
| " # ==========================================================\n", | |
| " # CREATE POWERPOINT PRESENTATION USING PYTHON PPTX MODULE\n", | |
| " # ==========================================================\n", | |
| " def create_presentation(self):\n", | |
| " prs = Presentation()\n", | |
| " blank_slide_layout = prs.slide_layouts[6]\n", | |
| " slide = prs.slides.add_slide(blank_slide_layout)\n", | |
| " shapes = slide.shapes\n", | |
| "\n", | |
| " # add text box\n", | |
| " left = top = width = height = Inches(1)\n", | |
| " txBox = slide.shapes.add_textbox(left, top, width, height)\n", | |
| " tf = txBox.text_frame\n", | |
| "\n", | |
| " p = tf.add_paragraph()\n", | |
| " p.text = \"Pivot Table Information\"\n", | |
| " p.font.size = Pt(25)\n", | |
| "\n", | |
| " # add table\n", | |
| " rows, cols = len(pivot_data), 2\n", | |
| " left = top = Inches(2.0)\n", | |
| " width = Inches(6.0)\n", | |
| " height = Inches(0.8)\n", | |
| "\n", | |
| " table = shapes.add_table(rows+1, cols, left, top, width, height).table\n", | |
| "\n", | |
| " # set column widths\n", | |
| " table.columns[0].width = Inches(2.0)\n", | |
| " table.columns[1].width = Inches(4.0)\n", | |
| "\n", | |
| " # write column headings\n", | |
| " table.cell(0, 0).text = 'IP Address'\n", | |
| " table.cell(0, 1).text = 'Count'\n", | |
| "\n", | |
| " row_count = 1\n", | |
| " for key, val in pivot_data.items():\n", | |
| " if row_count <= rows:\n", | |
| " table.cell(row_count, 0).text = key\n", | |
| " table.cell(row_count, 1).text = str(val)\n", | |
| " row_count += 1\n", | |
| "\n", | |
| " # add image in a new slide\n", | |
| " slide = prs.slides.add_slide(blank_slide_layout)\n", | |
| " shapes = slide.shapes\n", | |
| " left = top = Inches(1)\n", | |
| " pic = slide.shapes.add_picture(self._image_path, left, top)\n", | |
| "\n", | |
| " prs.save(self._ppt_path)\n", | |
| " return True\n", | |
| " \n", | |
| "# =====================================\n", | |
| "# PROGRAM EXECUTION STARTS HERE\n", | |
| "# =====================================\n", | |
| "\n", | |
| "# CREATE OBJECT FOR PIVOT VISUALIZE CLASS\n", | |
| "pv = Pivot_Visualize(read_file_path='pivot_table.csv', image_path='ipaddress_stats.png', ppt_path='output_presentation.pptx')\n", | |
| "\n", | |
| "# READ CSV FILE INFORMATION\n", | |
| "pivot_data = pv.read_csv_file()\n", | |
| "\n", | |
| "# VISUALIZE CSV FILE DATA BY MAKING IT TO PIVOT\n", | |
| "pv.create_visualization(pivot_data)\n", | |
| "\n", | |
| "# CREATE POWER POINT PRESENTATION\n", | |
| "confirm = pv.create_presentation()\n", | |
| "if confirm is True: print('Power Point Presentation Created Successfully')\n", | |
| "else: print('Not Able to Create Power Point Presentation')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>Learn more Real Time Scenarios related to CSV - <a href=\"https://cybersecpy.in/handle-csv-files-using-python/\">cybersecpy</a></b>" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment