Skip to content

Instantly share code, notes, and snippets.

@dobrosketchkun
Last active September 1, 2022 12:57
Show Gist options
  • Select an option

  • Save dobrosketchkun/f490a7555712ac432be4cd5cb0fa0bd1 to your computer and use it in GitHub Desktop.

Select an option

Save dobrosketchkun/f490a7555712ac432be4cd5cb0fa0bd1 to your computer and use it in GitHub Desktop.
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup
from time import sleep
import urllib.request
from os.path import exists
import hashlib
import requests
import re
# TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
# Check file for duplicates every time it is opened
# File to write to
filename = "JP_all_hased.txt"
# Regex to search for IP
regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})")
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
'AppleWebKit/537.11 (KHTML, like Gecko) '
'Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
try:
with open(filename, 'r') as f:
hashes = f.readlines()
hashes = [_.split('\t')[-1].strip() for _ in hashes]
hashes_set = set(hashes)
except Exception as e:
print(e)
hashes_set = set()
##########
res = requests.get(url='http://insecam.org/en/bycountry/JP/?page=1', headers=headers)
last_page = int(re.findall(r'pagenavigator\("\?page=", (\d+)', res.text)[0])
##########
pages = list(range(1, last_page + 1))
with open(filename, 'a') as file:
if not exists(filename):
file.write("ip:port\tcountry\tcity\timage feed link\hash\n")
for page in pages:
reg_url = f'http://insecam.org/en/bycountry/JP/?page={page}'
try:
# reg_url = url + '/?page=' + str(_page)
print(reg_url+'\n')
req = Request(url=reg_url, headers=headers)
# Get HTML Source
with urllib.request.urlopen(req) as response:
html = response.read().decode('utf-8')
soup = BeautifulSoup(html, 'html.parser')
all_div = soup.find_all('div', class_='thumbnail-item')
for _item in all_div:
small_div = _item.find_all('div',class_="thumbnail-item__preview")
for _div in small_div:
img = _div.find_all('img')
# print(img)
regexed = re.findall(regex, str(img))
#print(regexed)
ip = regexed[0][0] + ':' + regexed[0][1]
source = img[0].attrs['src']
place = img[0].attrs['title'].split('Live camera')[1]
#country, town = place.split(',') <--old
# View CCTV IP camera online in iran, islamic republic, Tehran
town = place.split(',')[-1]
country = ','.join(place.split(',')[0:-1])
hashed = hashlib.sha224(ip.encode('utf-8')).hexdigest()
if hashed not in hashes_set:
file.write(ip + '\t' + country + '\t' + town + '\t' + source + '\t' + hashed + '\n' )
print(ip + '\t' + country + '\t' + town + '\t' + source + '\t' + hashed + '\n' )
sleep(1)
sleep(1)
except Exception as e:
print(e)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment