Skip to content

Instantly share code, notes, and snippets.

@srinivas946
Created February 23, 2020 19:55
Show Gist options
  • Select an option

  • Save srinivas946/bddf585114728d20861dbdf98a044602 to your computer and use it in GitHub Desktop.

Select an option

Save srinivas946/bddf585114728d20861dbdf98a044602 to your computer and use it in GitHub Desktop.
Read a CSV file which contains Logs Information, using this data make a pivot table and data visualization, the output has to be stored in Word Document as a table and Image respectively.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import csv # load csv module to handle csv data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>create logic for pivot table</b>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<p>Download sample Information file - <a href=\"https://drive.google.com/open?id=1EinQJ7Z3WAKWqZvUsEZJXg_HlJ_On0J4\">link</a></p>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('pivot_table.csv', 'r') as csvfile:\n",
" csv_reader = csv.DictReader(csvfile)\n",
" for row in csv_reader:\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"source_set, data_list = set(), []\n",
"with open('pivot_table.csv', 'r') as csvfile:\n",
" csv_reader = csv.DictReader(csvfile)\n",
" for row in csv_reader:\n",
" source_set.add(row['Source IP'])\n",
" data_list.append(row['Source IP'])\n",
"pivot_data = {}\n",
"for ip in source_set:\n",
" repeat_count = 0\n",
" for data in data_list:\n",
" if ip == data: repeat_count += 1\n",
" pivot_data[ip] = repeat_count\n",
"pivot_data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>visualize the obtained data</b>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt # load matplotlib module to handle charts"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<li>Matplotlib is a Python Package used for data visualization</li>\n",
"<li>Official documentation of matplotlib - <a href=\"https://matplotlib.org/\">link</a></li>\n",
"<li>Easy way to learn data visualization - Refer my Tutorials <a href=\"https://github.com/srinivas946/Matplotlib_Tuts\">link</a></li>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"x = list(pivot_data.keys())\n",
"y = list(pivot_data.values())\n",
"plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n",
"plt.xlabel('IPAddress')\n",
"plt.xticks(rotation=90)\n",
"plt.ylabel('Count')\n",
"plt.title('IP Address Statistics', pad=25)\n",
"plt.gca().spines['right'].set_visible(False) # remove right spine\n",
"plt.gca().spines['top'].set_visible(False) # remove left spine\n",
"for i, j in enumerate(y):\n",
" plt.text(x=i, y=j+0.1, s=str(j))\n",
"plt.savefig('ipaddress_stats.png', bbox_inches='tight', dpi=200) # save figure with display-pixel-inch(dpi)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>Interaction with Word Document</b>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<li>Interact with Microsoft Office Word with Python module - <a href=\"https://python-docx.readthedocs.io/en/latest/\">python-docx</a></li>\n",
"<li>Install this package using command => pip install python-docx</li>\n",
"<li>In this tutorial, we can only deal with headings table, inserting images</li>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from docx import Document # load docx module to handle word document files\n",
"from docx.shared import Inches\n",
"\n",
"document = Document() # create object for document class\n",
"document.add_heading('Pivot Table Information', level=2) # add heading\n",
"total_rows = len(pivot_data) # logic to find total rows\n",
"table = document.add_table(rows=total_rows+1, cols=2) # create table with rows and cols\n",
"table.style = 'Table Grid' # style for the table\n",
"\n",
"# create first row as a header\n",
"row = table.rows[0] # taking first row\n",
"cell = row.cells[0] # taking first cell\n",
"cell.text = 'IP Address' # writting text to first cell\n",
"cell = row.cells[1] # taking second cell of first row\n",
"cell.text = 'Count' # writting text to second cell of first row\n",
"\n",
"# rest of the rows are for data\n",
"row_count = 1\n",
"for key, val in pivot_data.items():\n",
" if row_count <= total_rows:\n",
" row = table.rows[row_count]\n",
" cell = row.cells[0]\n",
" cell.text = key\n",
" cell = row.cells[1]\n",
" cell.text = str(val)\n",
" row_count += 1\n",
"document.add_heading('Chart for Pivot Informaiton', level=2)\n",
"document.add_picture('ipaddress_stats.png', width=Inches(4.25)) # add image to the word document\n",
"document.save('output_word.docx') # save the document by provide output file path\n",
"print('Docx File Created')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>Real Time Scenario Script</b>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import csv # load csv module to handle csv files\n",
"import matplotlib.pyplot as plt # load matplotlib module to handle data visualization\n",
"from docx import Document # load docx module to hanle word document files\n",
"from docx.shared import Inches\n",
"\n",
"class Pivot_Visualize:\n",
" \n",
" # ===================================\n",
" # READ FILE PATH TO LOAD CSV DATA\n",
" # ===================================\n",
" def __init__(self, read_file_path, image_path, document_file_path):\n",
" self._read_file_path = read_file_path\n",
" self._image_path = image_path\n",
" self._document_file_path = document_file_path\n",
" \n",
" # =================================\n",
" # READ CSV FILE TO EXTRACT DATA\n",
" # =================================\n",
" def read_csv_file(self):\n",
" source_set, data_list = set(), []\n",
" with open(self._read_file_path, 'r') as csvfile:\n",
" csv_reader = csv.DictReader(csvfile)\n",
" for row in csv_reader:\n",
" source_set.add(row['Source IP'])\n",
" data_list.append(row['Source IP'])\n",
" pivot_data = {}\n",
" for ip in source_set:\n",
" repeat_count = 0\n",
" for data in data_list:\n",
" if ip == data: repeat_count += 1\n",
" pivot_data[ip] = repeat_count\n",
" return pivot_data\n",
" \n",
" # ======================================\n",
" # CREATE BAR CHART BY READING CSV FILE\n",
" # ======================================\n",
" def create_visualization(self, pivot_data):\n",
" x = list(pivot_data.keys())\n",
" y = list(pivot_data.values())\n",
" plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n",
" plt.xlabel('IPAddress')\n",
" plt.xticks(rotation=90)\n",
" plt.ylabel('Count')\n",
" plt.title('IP Address Statistics', pad=25)\n",
" plt.gca().spines['right'].set_visible(False) # remove right spine\n",
" plt.gca().spines['top'].set_visible(False) # remove left spine\n",
" for i, j in enumerate(y):\n",
" plt.text(x=i, y=j+0.1, s=str(j))\n",
" plt.savefig(self._image_path, bbox_inches='tight', dpi=200)\n",
" \n",
" def create_word(self):\n",
" document = Document()\n",
" document.add_heading('Pivot Table Information', level=2)\n",
" total_rows = len(pivot_data)\n",
" table = document.add_table(rows=total_rows+1, cols=2)\n",
" table.style = 'Table Grid'\n",
"\n",
" # create first row as a header\n",
" row = table.rows[0] \n",
" cell = row.cells[0] \n",
" cell.text = 'IP Address'\n",
" cell = row.cells[1] \n",
" cell.text = 'Count' \n",
"\n",
" # rest of the rows are for data\n",
" row_count = 1\n",
" for key, val in pivot_data.items():\n",
" if row_count <= total_rows:\n",
" row = table.rows[row_count]\n",
" cell = row.cells[0]\n",
" cell.text = key\n",
" cell = row.cells[1]\n",
" cell.text = str(val)\n",
" row_count += 1\n",
" document.add_heading('Chart for Pivot Informaiton', level=2)\n",
" document.add_picture(self._image_path, width=Inches(4.25))\n",
" document.save(self._document_file_path)\n",
" return True\n",
" \n",
"# =====================================\n",
"# PROGRAM EXECUTION STARTS HERE\n",
"# =====================================\n",
"\n",
"# CREATE OBJECT FOR PIVOT VISUALIZE CLASS\n",
"pv = Pivot_Visualize(read_file_path='pivot_table.csv', \n",
" image_path='ipaddress_stats.png', \n",
" document_file_path='output_document.docx')\n",
"\n",
"# INVOKE READ CSV FILE TO READ INFORMATION REALTED TO LOGS\n",
"pivot_data = pv.read_csv_file()\n",
"\n",
"# CREATE CHARTS BY INVOKING CREATE_VISUALIZATON METHOD\n",
"pv.create_visualization(pivot_data)\n",
"\n",
"# INVOKE CREATE WORD METHOD TO CREATE WORD DOCUMENT\n",
"confirm = pv.create_word()\n",
"if confirm is True: print('Word Document Created Successfully')\n",
"else: print('Not Able to Create Word Document')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>Learn More Real Time Scenarios related to CSV - <a href=\"https://cybersecpy.in/handle-csv-files-using-python/\">cybersecpy</a></b>"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment