dobrosketchkun · September 1, 2022 12:57
diff --git a/JP_insecam_uptodater.py b/JP_insecam_uptodater.py
 from urllib.request import urlopen, Request
 from bs4 import BeautifulSoup
 from time import sleep
 import urllib.request
 from os.path import exists
 import hashlib
 import requests
 import re

 # TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO 
 # Check file for duplicates every time it is opened


 # File to write to
 filename = "JP_all_hased.txt"
 # Regex to search for IP
 regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})")

 headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) ' 
                      'AppleWebKit/537.11 (KHTML, like Gecko) '
                      'Chrome/23.0.1271.64 Safari/537.11',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
        'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
        'Accept-Encoding': 'none',
        'Accept-Language': 'en-US,en;q=0.8',
        'Connection': 'keep-alive'}


 try:
    with open(filename, 'r') as f:
        hashes = f.readlines()

    hashes = [_.split('\t')[-1].strip() for _ in hashes]
    hashes_set = set(hashes)

 except Exception as e:
    print(e)
    hashes_set = set()

 ##########
 res = requests.get(url='http://insecam.org/en/bycountry/JP/?page=1', headers=headers)
 last_page = int(re.findall(r'pagenavigator\("\?page=", (\d+)', res.text)[0])
 ##########

 pages = list(range(1, last_page + 1))



 with open(filename, 'a') as file:
    if not exists(filename):
        file.write("ip:port\tcountry\tcity\timage feed link\hash\n")
    for page in pages:
        reg_url = f'http://insecam.org/en/bycountry/JP/?page={page}'

        try:
            # reg_url = url + '/?page=' + str(_page)
            print(reg_url+'\n')
            req = Request(url=reg_url, headers=headers) 
            # Get HTML Source

            with urllib.request.urlopen(req) as response: 
                html = response.read().decode('utf-8')

            soup = BeautifulSoup(html, 'html.parser')
            all_div = soup.find_all('div', class_='thumbnail-item')


            for _item in all_div:
                small_div = _item.find_all('div',class_="thumbnail-item__preview")
                for _div in small_div:
                    img = _div.find_all('img')
                    # print(img)
                    regexed = re.findall(regex, str(img))
                    #print(regexed)
                    ip = regexed[0][0] + ':' + regexed[0][1]
                    source = img[0].attrs['src']
                    place = img[0].attrs['title'].split('Live camera')[1]
                    #country, town = place.split(',') <--old
                    # View CCTV IP camera online in iran, islamic republic, Tehran
                    town = place.split(',')[-1]
                    country = ','.join(place.split(',')[0:-1])
                    
                    hashed = hashlib.sha224(ip.encode('utf-8')).hexdigest()
                    if hashed not in hashes_set:
                        file.write(ip + '\t' + country + '\t' + town + '\t' + source + '\t' + hashed + '\n' )
                        print(ip + '\t' + country + '\t' + town + '\t' + source + '\t' + hashed + '\n' )
                        sleep(1)

            sleep(1)
        except Exception as e:
            print(e)
	from urllib.request import urlopen, Request
	from bs4 import BeautifulSoup
	from time import sleep
	import urllib.request
	from os.path import exists
	import hashlib
	import requests
	import re

	# TODO TODO TODO TODO TODO TODO TODO TODO TODO TODO
	# Check file for duplicates every time it is opened


	# File to write to
	filename = "JP_all_hased.txt"
	# Regex to search for IP
	regex = re.compile(r"(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}):(\d{1,5})")

	headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) '
	'AppleWebKit/537.11 (KHTML, like Gecko) '
	'Chrome/23.0.1271.64 Safari/537.11',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
	'Accept-Encoding': 'none',
	'Accept-Language': 'en-US,en;q=0.8',
	'Connection': 'keep-alive'}


	try:
	with open(filename, 'r') as f:
	hashes = f.readlines()

	hashes = [_.split('\t')[-1].strip() for _ in hashes]
	hashes_set = set(hashes)

	except Exception as e:
	print(e)
	hashes_set = set()

	##########
	res = requests.get(url='http://insecam.org/en/bycountry/JP/?page=1', headers=headers)
	last_page = int(re.findall(r'pagenavigator\("\?page=", (\d+)', res.text)[0])
	##########

	pages = list(range(1, last_page + 1))



	with open(filename, 'a') as file:
	if not exists(filename):
	file.write("ip:port\tcountry\tcity\timage feed link\hash\n")
	for page in pages:
	reg_url = f'http://insecam.org/en/bycountry/JP/?page={page}'

	try:
	# reg_url = url + '/?page=' + str(_page)
	print(reg_url+'\n')
	req = Request(url=reg_url, headers=headers)
	# Get HTML Source

	with urllib.request.urlopen(req) as response:
	html = response.read().decode('utf-8')

	soup = BeautifulSoup(html, 'html.parser')
	all_div = soup.find_all('div', class_='thumbnail-item')


	for _item in all_div:
	small_div = _item.find_all('div',class_="thumbnail-item__preview")
	for _div in small_div:
	img = _div.find_all('img')
	# print(img)
	regexed = re.findall(regex, str(img))
	#print(regexed)
	ip = regexed[0][0] + ':' + regexed[0][1]
	source = img[0].attrs['src']
	place = img[0].attrs['title'].split('Live camera')[1]
	#country, town = place.split(',') <--old
	# View CCTV IP camera online in iran, islamic republic, Tehran
	town = place.split(',')[-1]
	country = ','.join(place.split(',')[0:-1])

	hashed = hashlib.sha224(ip.encode('utf-8')).hexdigest()
	if hashed not in hashes_set:
	file.write(ip + '\t' + country + '\t' + town + '\t' + source + '\t' + hashed + '\n' )
	print(ip + '\t' + country + '\t' + town + '\t' + source + '\t' + hashed + '\n' )
	sleep(1)

	sleep(1)
	except Exception as e:
	print(e)
No results found