Created
February 24, 2020 09:58
-
-
Save srinivas946/278e91671e5c4d36f766e76b841fdf67 to your computer and use it in GitHub Desktop.
Create Own Tool for File Identification
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<h3>Identify the File Type using Python</h3>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "Identifing the File Extension gives you an brief overview of target machine operating system (OS) and its architechture(32 or 64 bit)<br/>\n", | |
| "<b>Required Packages</b>\n", | |
| "<li><b><i>requrests</i></b> - To Interact with Web Services</li>\n", | |
| "<li><b><i>os</i></b> - To Interact with Folders and Files in an operating system</li>\n", | |
| "<li><b><i>BeautifulSoup</i></b> - To Prase HTML Data</li>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "<b>1. Independent on External Python Package</b>" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import requests # for HTTP and HTTPS Interaction\n", | |
| "import os # Interact with Folders and Files\n", | |
| "from bs4 import BeautifulSoup # Parse HTML data\n", | |
| "\n", | |
| "# ==================================\n", | |
| "# CLASS TO IDENTIFY THE FILE TYPE\n", | |
| "# ==================================\n", | |
| "class Identify_File:\n", | |
| " \n", | |
| " # ------------------------------------------\n", | |
| " # CONSTRUCTOR TO LOAD REQUIRED DETAILS\n", | |
| " # ------------------------------------------\n", | |
| " def __init__(self, folder_path, url):\n", | |
| " \"\"\"Constructor Accepts folder path where exe file is located and url to check file signatues from the website\"\"\"\n", | |
| " self._folder_path = folder_path\n", | |
| " self._url = url\n", | |
| " \n", | |
| " # ------------------------------------------\n", | |
| " # CHOOSE THE FILE FROM PROVIDED DIRECTORY\n", | |
| " # ------------------------------------------\n", | |
| " def selecting_file(self):\n", | |
| " \"\"\"select the file from the given folder to verify the executable files\"\"\"\n", | |
| " file_dict, count = {}, 1\n", | |
| " for file in os.listdir(self._folder_path):\n", | |
| " file_dict[count] = file\n", | |
| " count += 1\n", | |
| " for key, val in file_dict.items():\n", | |
| " print(f'[{key}] {val}')\n", | |
| " while True:\n", | |
| " choice = int(input('Choose the file : '))\n", | |
| " if choice not in file_dict.keys(): print('[-] Invalid Selection. Please select again.')\n", | |
| " else: break\n", | |
| " print(f'You selected the file : {file_dict[choice]}')\n", | |
| " return f'{self._folder_path}/{file_dict[choice]}'\n", | |
| " \n", | |
| " # --------------------------------------\n", | |
| " # GET SIGNATURE DETAILS FROM WEBSITE\n", | |
| " # --------------------------------------\n", | |
| " def get_signatures(self, extension):\n", | |
| " \"\"\"get_signatures() method accepts file extension and check for the signatures related to it\"\"\"\n", | |
| " res = requests.get(self._url, params={'search':extension, 'mode':'EXT'})\n", | |
| " if res.status_code == 200:\n", | |
| " content = res.text\n", | |
| " soup = BeautifulSoup(content, 'html.parser')\n", | |
| " table = soup.find('table')\n", | |
| " tr = table.findAll('tr')\n", | |
| " result_list = []\n", | |
| " td = tr[3].findAll('td')\n", | |
| " result_list.append({'Extension': td[1].text, 'hex': td[2].text, 'hex_string': bytes.fromhex(td[2].text).decode('ascii'), 'os': td[3].text})\n", | |
| " return result_list\n", | |
| " else: \n", | |
| " print(f'Not able to Connect to {self._url}')\n", | |
| " return []\n", | |
| " \n", | |
| " # -------------------------------------------------\n", | |
| " # CHECK THE PROVIDED FILE IS EXECUTABLE FOR NOT\n", | |
| " # -------------------------------------------------\n", | |
| " def verify_file(self, filepath):\n", | |
| " \"\"\"Using file path check for the executables by reading the hexadecimal headers from binary file\"\"\"\n", | |
| " if filepath.__contains__('.'):\n", | |
| " extension = filepath.split('.')[-1]\n", | |
| " try:\n", | |
| " result_info = self.get_signatures(extension=extension)\n", | |
| " if len(result_info) != 0:\n", | |
| " with open(filepath, 'rb', buffering=0) as file:\n", | |
| " for f in file:\n", | |
| " hexa_val = f.hex()\n", | |
| " hexa_string = str(bytes.fromhex(hexa_val)).replace(\"b\", '').replace(\"'\", \"\")\n", | |
| " for info in result_info:\n", | |
| " if info['hex'].replace(\" \", '').lower() in hexa_val and info['hex_string'] in hexa_string:\n", | |
| " print(f\"Provided File is {info['os']}\")\n", | |
| " return\n", | |
| " else: \n", | |
| " print('Provided File is not an Executable File')\n", | |
| " return\n", | |
| " else: print('Provided File is not an Executable File')\n", | |
| " except AttributeError: print('Provied File is not an Executable File')\n", | |
| " else: print('Provied File is not a File. Please Provide Valid Path')\n", | |
| " \n", | |
| "# ================================\n", | |
| "# PROGRAM EXECUTION STARTS HERE\n", | |
| "# ================================\n", | |
| "Idf = Identify_File(folder_path='provide_folder_path', url='https://filesignatures.net/index.php')\n", | |
| "filepath = Idf.selecting_file()\n", | |
| "Idf.verify_file(filepath)" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.1" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment