Last active
March 4, 2021 08:27
-
-
Save wuyongrui/4a5cd580f420a33fabba0cc48e2090ce to your computer and use it in GitHub Desktop.
find directory similarity images
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pytesseract | |
| from pytesseract import * | |
| from PIL import Image,ImageEnhance,ImageFilter | |
| import os | |
| import fnmatch | |
| import re,time | |
| import urllib, random | |
| import time | |
| # or use imagehash lib | |
| def current_second(): | |
| return round(time.time()) | |
| def is_image(file_path): | |
| return file_path.lower().endswith('jpg') or file_path.lower().endswith('png') | |
| def find_images(root_path): | |
| list = [] | |
| for root, dirs, files in os.walk(os.path.abspath(root_path)): | |
| for file in files: | |
| if is_image(file): | |
| list.append(os.path.join(root, file)) | |
| return list | |
| def get_gray(image_file): | |
| tmpls=[] | |
| for h in range(0, image_file.size[1]):#h | |
| for w in range(0, image_file.size[0]):#w | |
| tmpls.append( image_file.getpixel((w,h)) ) | |
| return tmpls | |
| def get_avg(ls):#获取平均灰度值 | |
| return sum(ls)/len(ls) | |
| def getMH(a,b):#比较100个字符有几个字符相同 | |
| dist = 0; | |
| for i in range(0,len(a)): | |
| if a[i]==b[i]: | |
| dist=dist+1 | |
| return dist | |
| def get_image_info(path): | |
| image_file = Image.open(path) # 打开 | |
| width, height = image_file.size | |
| image_file=image_file.resize((12, 12))#重置图片大小我12px X 12px | |
| image_file=image_file.convert("L")#转256灰度图 | |
| grayls=get_gray(image_file)#灰度集合 | |
| avg=get_avg(grayls)#灰度平均值 | |
| bitls=''#接收获取0或1 | |
| #除去变宽1px遍历像素 | |
| for h in range(1, image_file.size[1]-1):#h | |
| for w in range(1, image_file.size[0]-1):#w | |
| if image_file.getpixel((w,h))>=avg:#像素的值比较平均值 大于记为1 小于记为0 | |
| bitls=bitls+'1' | |
| else: | |
| bitls=bitls+'0' | |
| return (bitls,width,height) | |
| def check_images(path1,path2): | |
| target_hash,target_width,target_height = get_image_info(path1) | |
| dest_hash,dest_width,dest_height = get_image_info(path2) | |
| compare = getMH(target_hash,dest_hash) | |
| if compare>99 and target_width==dest_width and target_height == dest_height: | |
| return True | |
| return False | |
| def __main__(): | |
| target_path = './' | |
| paths = find_images(target_path) | |
| count = len(paths) | |
| result = [] | |
| begin_seconds = current_second() | |
| print('BEGIN') | |
| for i in range(count): | |
| target_path = paths[i] | |
| for j in range(i+1,count): | |
| dest_path = paths[j] | |
| if check_images(target_path,dest_path): | |
| result.append((compare,target_path,dest_path)) | |
| print('DONE!,seconds:'+str((current_second()-begin_seconds))) | |
| print(len(result)) | |
| __main__() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment