Skip to content

Instantly share code, notes, and snippets.

@wuyongrui
Last active March 4, 2021 08:27
Show Gist options
  • Select an option

  • Save wuyongrui/4a5cd580f420a33fabba0cc48e2090ce to your computer and use it in GitHub Desktop.

Select an option

Save wuyongrui/4a5cd580f420a33fabba0cc48e2090ce to your computer and use it in GitHub Desktop.
find directory similarity images
import pytesseract
from pytesseract import *
from PIL import Image,ImageEnhance,ImageFilter
import os
import fnmatch
import re,time
import urllib, random
import time
# or use imagehash lib
def current_second():
return round(time.time())
def is_image(file_path):
return file_path.lower().endswith('jpg') or file_path.lower().endswith('png')
def find_images(root_path):
list = []
for root, dirs, files in os.walk(os.path.abspath(root_path)):
for file in files:
if is_image(file):
list.append(os.path.join(root, file))
return list
def get_gray(image_file):
tmpls=[]
for h in range(0, image_file.size[1]):#h
for w in range(0, image_file.size[0]):#w
tmpls.append( image_file.getpixel((w,h)) )
return tmpls
def get_avg(ls):#获取平均灰度值
return sum(ls)/len(ls)
def getMH(a,b):#比较100个字符有几个字符相同
dist = 0;
for i in range(0,len(a)):
if a[i]==b[i]:
dist=dist+1
return dist
def get_image_info(path):
image_file = Image.open(path) # 打开
width, height = image_file.size
image_file=image_file.resize((12, 12))#重置图片大小我12px X 12px
image_file=image_file.convert("L")#转256灰度图
grayls=get_gray(image_file)#灰度集合
avg=get_avg(grayls)#灰度平均值
bitls=''#接收获取0或1
#除去变宽1px遍历像素
for h in range(1, image_file.size[1]-1):#h
for w in range(1, image_file.size[0]-1):#w
if image_file.getpixel((w,h))>=avg:#像素的值比较平均值 大于记为1 小于记为0
bitls=bitls+'1'
else:
bitls=bitls+'0'
return (bitls,width,height)
def check_images(path1,path2):
target_hash,target_width,target_height = get_image_info(path1)
dest_hash,dest_width,dest_height = get_image_info(path2)
compare = getMH(target_hash,dest_hash)
if compare>99 and target_width==dest_width and target_height == dest_height:
return True
return False
def __main__():
target_path = './'
paths = find_images(target_path)
count = len(paths)
result = []
begin_seconds = current_second()
print('BEGIN')
for i in range(count):
target_path = paths[i]
for j in range(i+1,count):
dest_path = paths[j]
if check_images(target_path,dest_path):
result.append((compare,target_path,dest_path))
print('DONE!,seconds:'+str((current_second()-begin_seconds)))
print(len(result))
__main__()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment