Skip to content

Instantly share code, notes, and snippets.

@srinivas946
Created February 24, 2020 09:58
Show Gist options
  • Select an option

  • Save srinivas946/278e91671e5c4d36f766e76b841fdf67 to your computer and use it in GitHub Desktop.

Select an option

Save srinivas946/278e91671e5c4d36f766e76b841fdf67 to your computer and use it in GitHub Desktop.
Create Own Tool for File Identification
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<h3>Identify the File Type using Python</h3>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Identifing the File Extension gives you an brief overview of target machine operating system (OS) and its architechture(32 or 64 bit)<br/>\n",
"<b>Required Packages</b>\n",
"<li><b><i>requrests</i></b> - To Interact with Web Services</li>\n",
"<li><b><i>os</i></b> - To Interact with Folders and Files in an operating system</li>\n",
"<li><b><i>BeautifulSoup</i></b> - To Prase HTML Data</li>"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<b>1. Independent on External Python Package</b>"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import requests # for HTTP and HTTPS Interaction\n",
"import os # Interact with Folders and Files\n",
"from bs4 import BeautifulSoup # Parse HTML data\n",
"\n",
"# ==================================\n",
"# CLASS TO IDENTIFY THE FILE TYPE\n",
"# ==================================\n",
"class Identify_File:\n",
" \n",
" # ------------------------------------------\n",
" # CONSTRUCTOR TO LOAD REQUIRED DETAILS\n",
" # ------------------------------------------\n",
" def __init__(self, folder_path, url):\n",
" \"\"\"Constructor Accepts folder path where exe file is located and url to check file signatues from the website\"\"\"\n",
" self._folder_path = folder_path\n",
" self._url = url\n",
" \n",
" # ------------------------------------------\n",
" # CHOOSE THE FILE FROM PROVIDED DIRECTORY\n",
" # ------------------------------------------\n",
" def selecting_file(self):\n",
" \"\"\"select the file from the given folder to verify the executable files\"\"\"\n",
" file_dict, count = {}, 1\n",
" for file in os.listdir(self._folder_path):\n",
" file_dict[count] = file\n",
" count += 1\n",
" for key, val in file_dict.items():\n",
" print(f'[{key}] {val}')\n",
" while True:\n",
" choice = int(input('Choose the file : '))\n",
" if choice not in file_dict.keys(): print('[-] Invalid Selection. Please select again.')\n",
" else: break\n",
" print(f'You selected the file : {file_dict[choice]}')\n",
" return f'{self._folder_path}/{file_dict[choice]}'\n",
" \n",
" # --------------------------------------\n",
" # GET SIGNATURE DETAILS FROM WEBSITE\n",
" # --------------------------------------\n",
" def get_signatures(self, extension):\n",
" \"\"\"get_signatures() method accepts file extension and check for the signatures related to it\"\"\"\n",
" res = requests.get(self._url, params={'search':extension, 'mode':'EXT'})\n",
" if res.status_code == 200:\n",
" content = res.text\n",
" soup = BeautifulSoup(content, 'html.parser')\n",
" table = soup.find('table')\n",
" tr = table.findAll('tr')\n",
" result_list = []\n",
" td = tr[3].findAll('td')\n",
" result_list.append({'Extension': td[1].text, 'hex': td[2].text, 'hex_string': bytes.fromhex(td[2].text).decode('ascii'), 'os': td[3].text})\n",
" return result_list\n",
" else: \n",
" print(f'Not able to Connect to {self._url}')\n",
" return []\n",
" \n",
" # -------------------------------------------------\n",
" # CHECK THE PROVIDED FILE IS EXECUTABLE FOR NOT\n",
" # -------------------------------------------------\n",
" def verify_file(self, filepath):\n",
" \"\"\"Using file path check for the executables by reading the hexadecimal headers from binary file\"\"\"\n",
" if filepath.__contains__('.'):\n",
" extension = filepath.split('.')[-1]\n",
" try:\n",
" result_info = self.get_signatures(extension=extension)\n",
" if len(result_info) != 0:\n",
" with open(filepath, 'rb', buffering=0) as file:\n",
" for f in file:\n",
" hexa_val = f.hex()\n",
" hexa_string = str(bytes.fromhex(hexa_val)).replace(\"b\", '').replace(\"'\", \"\")\n",
" for info in result_info:\n",
" if info['hex'].replace(\" \", '').lower() in hexa_val and info['hex_string'] in hexa_string:\n",
" print(f\"Provided File is {info['os']}\")\n",
" return\n",
" else: \n",
" print('Provided File is not an Executable File')\n",
" return\n",
" else: print('Provided File is not an Executable File')\n",
" except AttributeError: print('Provied File is not an Executable File')\n",
" else: print('Provied File is not a File. Please Provide Valid Path')\n",
" \n",
"# ================================\n",
"# PROGRAM EXECUTION STARTS HERE\n",
"# ================================\n",
"Idf = Identify_File(folder_path='provide_folder_path', url='https://filesignatures.net/index.php')\n",
"filepath = Idf.selecting_file()\n",
"Idf.verify_file(filepath)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.1"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment