wuyongrui · March 4, 2021 08:27
diff --git a/similarity_images.py b/similarity_images.py
 import pytesseract  
 from pytesseract import *  
 from PIL import Image,ImageEnhance,ImageFilter  
 import os  
 import fnmatch  
 import re,time  
 import urllib, random 
 import time

 # or use imagehash lib

 def current_second():
 	return round(time.time())

 def is_image(file_path):
 	return file_path.lower().endswith('jpg') or file_path.lower().endswith('png')

 def find_images(root_path):
 	list = []
 	for root, dirs, files in os.walk(os.path.abspath(root_path)):
 	    for file in files:
 	    	if is_image(file):	    		
 	    		list.append(os.path.join(root, file))
 	return list

 def get_gray(image_file):  
   tmpls=[]  
   for h in range(0,  image_file.size[1]):#h  
      for w in range(0, image_file.size[0]):#w  
         tmpls.append( image_file.getpixel((w,h))  )  
   return tmpls  
   
 def get_avg(ls):#获取平均灰度值  
   return sum(ls)/len(ls)  
   
 def getMH(a,b):#比较100个字符有几个字符相同  
   dist = 0;  
   for i in range(0,len(a)):  
      if a[i]==b[i]:  
         dist=dist+1  
   return dist  
   
 def get_image_info(path):  
   image_file = Image.open(path) # 打开  
   width, height = image_file.size
   image_file=image_file.resize((12, 12))#重置图片大小我12px X 12px  
   image_file=image_file.convert("L")#转256灰度图  
   grayls=get_gray(image_file)#灰度集合  
   avg=get_avg(grayls)#灰度平均值  
   bitls=''#接收获取0或1  
   #除去变宽1px遍历像素  
   for h in range(1,  image_file.size[1]-1):#h  
      for w in range(1, image_file.size[0]-1):#w  
         if image_file.getpixel((w,h))>=avg:#像素的值比较平均值 大于记为1 小于记为0  
            bitls=bitls+'1'  
         else:  
            bitls=bitls+'0'  
   return (bitls,width,height)

 def check_images(path1,path2):
 	target_hash,target_width,target_height = get_image_info(path1)
 	dest_hash,dest_width,dest_height = get_image_info(path2)
 	compare = getMH(target_hash,dest_hash)
 	if compare>99 and target_width==dest_width and target_height == dest_height:
 		return True
 	return False

 def __main__():
 	target_path = './'
 	paths = find_images(target_path)
 	count = len(paths)
 	result = []
 	begin_seconds = current_second()
 	print('BEGIN')
 	for i in range(count):
 		target_path = paths[i]
 		for j in range(i+1,count):
 			dest_path = paths[j]
 			if check_images(target_path,dest_path):				
 				result.append((compare,target_path,dest_path))
 	print('DONE!,seconds:'+str((current_second()-begin_seconds)))
 	print(len(result))
 	
 __main__()
	import pytesseract
	from pytesseract import *
	from PIL import Image,ImageEnhance,ImageFilter
	import os
	import fnmatch
	import re,time
	import urllib, random
	import time

	# or use imagehash lib

	def current_second():
	return round(time.time())

	def is_image(file_path):
	return file_path.lower().endswith('jpg') or file_path.lower().endswith('png')

	def find_images(root_path):
	list = []
	for root, dirs, files in os.walk(os.path.abspath(root_path)):
	for file in files:
	if is_image(file):
	list.append(os.path.join(root, file))
	return list

	def get_gray(image_file):
	tmpls=[]
	for h in range(0, image_file.size[1]):#h
	for w in range(0, image_file.size[0]):#w
	tmpls.append( image_file.getpixel((w,h)) )
	return tmpls

	def get_avg(ls):#获取平均灰度值
	return sum(ls)/len(ls)

	def getMH(a,b):#比较100个字符有几个字符相同
	dist = 0;
	for i in range(0,len(a)):
	if a[i]==b[i]:
	dist=dist+1
	return dist

	def get_image_info(path):
	image_file = Image.open(path) # 打开
	width, height = image_file.size
	image_file=image_file.resize((12, 12))#重置图片大小我12px X 12px
	image_file=image_file.convert("L")#转256灰度图
	grayls=get_gray(image_file)#灰度集合
	avg=get_avg(grayls)#灰度平均值
	bitls=''#接收获取0或1
	#除去变宽1px遍历像素
	for h in range(1, image_file.size[1]-1):#h
	for w in range(1, image_file.size[0]-1):#w
	if image_file.getpixel((w,h))>=avg:#像素的值比较平均值大于记为1 小于记为0
	bitls=bitls+'1'
	else:
	bitls=bitls+'0'
	return (bitls,width,height)

	def check_images(path1,path2):
	target_hash,target_width,target_height = get_image_info(path1)
	dest_hash,dest_width,dest_height = get_image_info(path2)
	compare = getMH(target_hash,dest_hash)
	if compare>99 and target_width==dest_width and target_height == dest_height:
	return True
	return False

	def __main__():
	target_path = './'
	paths = find_images(target_path)
	count = len(paths)
	result = []
	begin_seconds = current_second()
	print('BEGIN')
	for i in range(count):
	target_path = paths[i]
	for j in range(i+1,count):
	dest_path = paths[j]
	if check_images(target_path,dest_path):
	result.append((compare,target_path,dest_path))
	print('DONE!,seconds:'+str((current_second()-begin_seconds)))
	print(len(result))

	__main__()
No results found