srinivas946 · February 23, 2020 19:55
diff --git a/Interact_CSV_with_Word.ipynb b/Interact_CSV_with_Word.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv # load csv module to handle csv data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>create logic for pivot table</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<p>Download sample Information file - <a href=\"https://drive.google.com/open?id=1EinQJ7Z3WAKWqZvUsEZJXg_HlJ_On0J4\">link</a></p>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('pivot_table.csv', 'r') as csvfile:\n",
    "    csv_reader = csv.DictReader(csvfile)\n",
    "    for row in csv_reader:\n",
    "        print(row)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "source_set, data_list = set(), []\n",
    "with open('pivot_table.csv', 'r') as csvfile:\n",
    "    csv_reader = csv.DictReader(csvfile)\n",
    "    for row in csv_reader:\n",
    "        source_set.add(row['Source IP'])\n",
    "        data_list.append(row['Source IP'])\n",
    "pivot_data = {}\n",
    "for ip in source_set:\n",
    "    repeat_count = 0\n",
    "    for data in data_list:\n",
    "        if ip == data: repeat_count += 1\n",
    "    pivot_data[ip] = repeat_count\n",
    "pivot_data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>visualize the obtained data</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import matplotlib.pyplot as plt # load matplotlib module to handle charts"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<li>Matplotlib is a Python Package used for data visualization</li>\n",
    "<li>Official documentation of matplotlib - <a href=\"https://matplotlib.org/\">link</a></li>\n",
    "<li>Easy way to learn data visualization - Refer my Tutorials <a href=\"https://github.com/srinivas946/Matplotlib_Tuts\">link</a></li>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = list(pivot_data.keys())\n",
    "y = list(pivot_data.values())\n",
    "plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n",
    "plt.xlabel('IPAddress')\n",
    "plt.xticks(rotation=90)\n",
    "plt.ylabel('Count')\n",
    "plt.title('IP Address Statistics', pad=25)\n",
    "plt.gca().spines['right'].set_visible(False) # remove right spine\n",
    "plt.gca().spines['top'].set_visible(False) # remove left spine\n",
    "for i, j in enumerate(y):\n",
    "    plt.text(x=i, y=j+0.1, s=str(j))\n",
    "plt.savefig('ipaddress_stats.png', bbox_inches='tight', dpi=200) # save figure with display-pixel-inch(dpi)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Interaction with Word Document</b>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<li>Interact with Microsoft Office Word with Python module - <a href=\"https://python-docx.readthedocs.io/en/latest/\">python-docx</a></li>\n",
    "<li>Install this package using command => pip install python-docx</li>\n",
    "<li>In this tutorial, we can only deal with headings table, inserting images</li>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from docx import Document # load docx module to handle word document files\n",
    "from docx.shared import Inches\n",
    "\n",
    "document = Document() # create object for document class\n",
    "document.add_heading('Pivot Table Information', level=2) # add heading\n",
    "total_rows = len(pivot_data) # logic to find total rows\n",
    "table = document.add_table(rows=total_rows+1, cols=2) # create table with rows and cols\n",
    "table.style = 'Table Grid' # style for the table\n",
    "\n",
    "# create first row as a header\n",
    "row = table.rows[0] # taking first row\n",
    "cell = row.cells[0] # taking first cell\n",
    "cell.text = 'IP Address' # writting text to first cell\n",
    "cell = row.cells[1] # taking second cell of first row\n",
    "cell.text = 'Count' # writting text to second cell of first row\n",
    "\n",
    "# rest of the rows are for data\n",
    "row_count = 1\n",
    "for key, val in pivot_data.items():\n",
    "    if row_count <= total_rows:\n",
    "        row = table.rows[row_count]\n",
    "        cell = row.cells[0]\n",
    "        cell.text = key\n",
    "        cell = row.cells[1]\n",
    "        cell.text = str(val)\n",
    "        row_count += 1\n",
    "document.add_heading('Chart for Pivot Informaiton', level=2)\n",
    "document.add_picture('ipaddress_stats.png', width=Inches(4.25)) # add image to the word document\n",
    "document.save('output_word.docx') # save the document by provide output file path\n",
    "print('Docx File Created')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Real Time Scenario Script</b>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv  # load csv module to handle csv files\n",
    "import matplotlib.pyplot as plt # load matplotlib module to handle data visualization\n",
    "from docx import Document # load docx module to hanle word document files\n",
    "from docx.shared import Inches\n",
    "\n",
    "class Pivot_Visualize:\n",
    "    \n",
    "    # ===================================\n",
    "    #  READ FILE PATH TO LOAD CSV DATA\n",
    "    # ===================================\n",
    "    def __init__(self, read_file_path, image_path, document_file_path):\n",
    "        self._read_file_path = read_file_path\n",
    "        self._image_path = image_path\n",
    "        self._document_file_path = document_file_path\n",
    "    \n",
    "    # =================================\n",
    "    #  READ CSV FILE TO EXTRACT DATA\n",
    "    # =================================\n",
    "    def read_csv_file(self):\n",
    "        source_set, data_list = set(), []\n",
    "        with open(self._read_file_path, 'r') as csvfile:\n",
    "            csv_reader = csv.DictReader(csvfile)\n",
    "            for row in csv_reader:\n",
    "                source_set.add(row['Source IP'])\n",
    "                data_list.append(row['Source IP'])\n",
    "        pivot_data = {}\n",
    "        for ip in source_set:\n",
    "            repeat_count = 0\n",
    "            for data in data_list:\n",
    "                if ip == data: repeat_count += 1\n",
    "            pivot_data[ip] = repeat_count\n",
    "        return pivot_data\n",
    "    \n",
    "    # ======================================\n",
    "    #  CREATE BAR CHART BY READING CSV FILE\n",
    "    # ======================================\n",
    "    def create_visualization(self, pivot_data):\n",
    "        x = list(pivot_data.keys())\n",
    "        y = list(pivot_data.values())\n",
    "        plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n",
    "        plt.xlabel('IPAddress')\n",
    "        plt.xticks(rotation=90)\n",
    "        plt.ylabel('Count')\n",
    "        plt.title('IP Address Statistics', pad=25)\n",
    "        plt.gca().spines['right'].set_visible(False) # remove right spine\n",
    "        plt.gca().spines['top'].set_visible(False) # remove left spine\n",
    "        for i, j in enumerate(y):\n",
    "            plt.text(x=i, y=j+0.1, s=str(j))\n",
    "        plt.savefig(self._image_path, bbox_inches='tight', dpi=200)\n",
    "    \n",
    "    def create_word(self):\n",
    "        document = Document()\n",
    "        document.add_heading('Pivot Table Information', level=2)\n",
    "        total_rows = len(pivot_data)\n",
    "        table = document.add_table(rows=total_rows+1, cols=2)\n",
    "        table.style = 'Table Grid'\n",
    "\n",
    "        # create first row as a header\n",
    "        row = table.rows[0] \n",
    "        cell = row.cells[0] \n",
    "        cell.text = 'IP Address'\n",
    "        cell = row.cells[1] \n",
    "        cell.text = 'Count' \n",
    "\n",
    "        # rest of the rows are for data\n",
    "        row_count = 1\n",
    "        for key, val in pivot_data.items():\n",
    "            if row_count <= total_rows:\n",
    "                row = table.rows[row_count]\n",
    "                cell = row.cells[0]\n",
    "                cell.text = key\n",
    "                cell = row.cells[1]\n",
    "                cell.text = str(val)\n",
    "                row_count += 1\n",
    "        document.add_heading('Chart for Pivot Informaiton', level=2)\n",
    "        document.add_picture(self._image_path, width=Inches(4.25))\n",
    "        document.save(self._document_file_path)\n",
    "        return True\n",
    "        \n",
    "# =====================================\n",
    "#   PROGRAM EXECUTION STARTS HERE\n",
    "# =====================================\n",
    "\n",
    "# CREATE OBJECT FOR PIVOT VISUALIZE CLASS\n",
    "pv = Pivot_Visualize(read_file_path='pivot_table.csv', \n",
    "                     image_path='ipaddress_stats.png', \n",
    "                     document_file_path='output_document.docx')\n",
    "\n",
    "# INVOKE READ CSV FILE TO READ INFORMATION REALTED TO LOGS\n",
    "pivot_data = pv.read_csv_file()\n",
    "\n",
    "# CREATE CHARTS BY INVOKING CREATE_VISUALIZATON METHOD\n",
    "pv.create_visualization(pivot_data)\n",
    "\n",
    "# INVOKE CREATE WORD METHOD TO CREATE WORD DOCUMENT\n",
    "confirm = pv.create_word()\n",
    "if confirm is True: print('Word Document Created Successfully')\n",
    "else: print('Not Able to Create Word Document')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<b>Learn More Real Time Scenarios related to CSV - <a href=\"https://cybersecpy.in/handle-csv-files-using-python/\">cybersecpy</a></b>"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import csv # load csv module to handle csv data"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<b>create logic for pivot table</b>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<p>Download sample Information file - <a href=\"https://drive.google.com/open?id=1EinQJ7Z3WAKWqZvUsEZJXg_HlJ_On0J4\">link</a></p>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"with open('pivot_table.csv', 'r') as csvfile:\n",
	" csv_reader = csv.DictReader(csvfile)\n",
	" for row in csv_reader:\n",
	" print(row)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"source_set, data_list = set(), []\n",
	"with open('pivot_table.csv', 'r') as csvfile:\n",
	" csv_reader = csv.DictReader(csvfile)\n",
	" for row in csv_reader:\n",
	" source_set.add(row['Source IP'])\n",
	" data_list.append(row['Source IP'])\n",
	"pivot_data = {}\n",
	"for ip in source_set:\n",
	" repeat_count = 0\n",
	" for data in data_list:\n",
	" if ip == data: repeat_count += 1\n",
	" pivot_data[ip] = repeat_count\n",
	"pivot_data"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<b>visualize the obtained data</b>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import matplotlib.pyplot as plt # load matplotlib module to handle charts"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<li>Matplotlib is a Python Package used for data visualization</li>\n",
	"<li>Official documentation of matplotlib - <a href=\"https://matplotlib.org/\">link</a></li>\n",
	"<li>Easy way to learn data visualization - Refer my Tutorials <a href=\"https://github.com/srinivas946/Matplotlib_Tuts\">link</a></li>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"x = list(pivot_data.keys())\n",
	"y = list(pivot_data.values())\n",
	"plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n",
	"plt.xlabel('IPAddress')\n",
	"plt.xticks(rotation=90)\n",
	"plt.ylabel('Count')\n",
	"plt.title('IP Address Statistics', pad=25)\n",
	"plt.gca().spines['right'].set_visible(False) # remove right spine\n",
	"plt.gca().spines['top'].set_visible(False) # remove left spine\n",
	"for i, j in enumerate(y):\n",
	" plt.text(x=i, y=j+0.1, s=str(j))\n",
	"plt.savefig('ipaddress_stats.png', bbox_inches='tight', dpi=200) # save figure with display-pixel-inch(dpi)"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<b>Interaction with Word Document</b>"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<li>Interact with Microsoft Office Word with Python module - <a href=\"https://python-docx.readthedocs.io/en/latest/\">python-docx</a></li>\n",
	"<li>Install this package using command => pip install python-docx</li>\n",
	"<li>In this tutorial, we can only deal with headings table, inserting images</li>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"from docx import Document # load docx module to handle word document files\n",
	"from docx.shared import Inches\n",
	"\n",
	"document = Document() # create object for document class\n",
	"document.add_heading('Pivot Table Information', level=2) # add heading\n",
	"total_rows = len(pivot_data) # logic to find total rows\n",
	"table = document.add_table(rows=total_rows+1, cols=2) # create table with rows and cols\n",
	"table.style = 'Table Grid' # style for the table\n",
	"\n",
	"# create first row as a header\n",
	"row = table.rows[0] # taking first row\n",
	"cell = row.cells[0] # taking first cell\n",
	"cell.text = 'IP Address' # writting text to first cell\n",
	"cell = row.cells[1] # taking second cell of first row\n",
	"cell.text = 'Count' # writting text to second cell of first row\n",
	"\n",
	"# rest of the rows are for data\n",
	"row_count = 1\n",
	"for key, val in pivot_data.items():\n",
	" if row_count <= total_rows:\n",
	" row = table.rows[row_count]\n",
	" cell = row.cells[0]\n",
	" cell.text = key\n",
	" cell = row.cells[1]\n",
	" cell.text = str(val)\n",
	" row_count += 1\n",
	"document.add_heading('Chart for Pivot Informaiton', level=2)\n",
	"document.add_picture('ipaddress_stats.png', width=Inches(4.25)) # add image to the word document\n",
	"document.save('output_word.docx') # save the document by provide output file path\n",
	"print('Docx File Created')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<b>Real Time Scenario Script</b>"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"import csv # load csv module to handle csv files\n",
	"import matplotlib.pyplot as plt # load matplotlib module to handle data visualization\n",
	"from docx import Document # load docx module to hanle word document files\n",
	"from docx.shared import Inches\n",
	"\n",
	"class Pivot_Visualize:\n",
	" \n",
	" # ===================================\n",
	" # READ FILE PATH TO LOAD CSV DATA\n",
	" # ===================================\n",
	" def __init__(self, read_file_path, image_path, document_file_path):\n",
	" self._read_file_path = read_file_path\n",
	" self._image_path = image_path\n",
	" self._document_file_path = document_file_path\n",
	" \n",
	" # =================================\n",
	" # READ CSV FILE TO EXTRACT DATA\n",
	" # =================================\n",
	" def read_csv_file(self):\n",
	" source_set, data_list = set(), []\n",
	" with open(self._read_file_path, 'r') as csvfile:\n",
	" csv_reader = csv.DictReader(csvfile)\n",
	" for row in csv_reader:\n",
	" source_set.add(row['Source IP'])\n",
	" data_list.append(row['Source IP'])\n",
	" pivot_data = {}\n",
	" for ip in source_set:\n",
	" repeat_count = 0\n",
	" for data in data_list:\n",
	" if ip == data: repeat_count += 1\n",
	" pivot_data[ip] = repeat_count\n",
	" return pivot_data\n",
	" \n",
	" # ======================================\n",
	" # CREATE BAR CHART BY READING CSV FILE\n",
	" # ======================================\n",
	" def create_visualization(self, pivot_data):\n",
	" x = list(pivot_data.keys())\n",
	" y = list(pivot_data.values())\n",
	" plt.bar(x, y, color=['skyblue', 'orange', 'green', 'yellow', 'pink'], alpha=0.5, width=0.5)\n",
	" plt.xlabel('IPAddress')\n",
	" plt.xticks(rotation=90)\n",
	" plt.ylabel('Count')\n",
	" plt.title('IP Address Statistics', pad=25)\n",
	" plt.gca().spines['right'].set_visible(False) # remove right spine\n",
	" plt.gca().spines['top'].set_visible(False) # remove left spine\n",
	" for i, j in enumerate(y):\n",
	" plt.text(x=i, y=j+0.1, s=str(j))\n",
	" plt.savefig(self._image_path, bbox_inches='tight', dpi=200)\n",
	" \n",
	" def create_word(self):\n",
	" document = Document()\n",
	" document.add_heading('Pivot Table Information', level=2)\n",
	" total_rows = len(pivot_data)\n",
	" table = document.add_table(rows=total_rows+1, cols=2)\n",
	" table.style = 'Table Grid'\n",
	"\n",
	" # create first row as a header\n",
	" row = table.rows[0] \n",
	" cell = row.cells[0] \n",
	" cell.text = 'IP Address'\n",
	" cell = row.cells[1] \n",
	" cell.text = 'Count' \n",
	"\n",
	" # rest of the rows are for data\n",
	" row_count = 1\n",
	" for key, val in pivot_data.items():\n",
	" if row_count <= total_rows:\n",
	" row = table.rows[row_count]\n",
	" cell = row.cells[0]\n",
	" cell.text = key\n",
	" cell = row.cells[1]\n",
	" cell.text = str(val)\n",
	" row_count += 1\n",
	" document.add_heading('Chart for Pivot Informaiton', level=2)\n",
	" document.add_picture(self._image_path, width=Inches(4.25))\n",
	" document.save(self._document_file_path)\n",
	" return True\n",
	" \n",
	"# =====================================\n",
	"# PROGRAM EXECUTION STARTS HERE\n",
	"# =====================================\n",
	"\n",
	"# CREATE OBJECT FOR PIVOT VISUALIZE CLASS\n",
	"pv = Pivot_Visualize(read_file_path='pivot_table.csv', \n",
	" image_path='ipaddress_stats.png', \n",
	" document_file_path='output_document.docx')\n",
	"\n",
	"# INVOKE READ CSV FILE TO READ INFORMATION REALTED TO LOGS\n",
	"pivot_data = pv.read_csv_file()\n",
	"\n",
	"# CREATE CHARTS BY INVOKING CREATE_VISUALIZATON METHOD\n",
	"pv.create_visualization(pivot_data)\n",
	"\n",
	"# INVOKE CREATE WORD METHOD TO CREATE WORD DOCUMENT\n",
	"confirm = pv.create_word()\n",
	"if confirm is True: print('Word Document Created Successfully')\n",
	"else: print('Not Able to Create Word Document')"
	]
	},
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"<b>Learn More Real Time Scenarios related to CSV - <a href=\"https://cybersecpy.in/handle-csv-files-using-python/\">cybersecpy</a></b>"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.8.1"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}
No results found