1

因此,我有一个包含图像的文件夹,我试图在 python 字典中以 Imagehash 作为键,将相似图像列表作为它们的值,创建相似图像的集群,如果是,我如何防止图像生成新键已经出现在任何其他字典键列表中的是我到目前为止所做的代码:


from PIL import Image
import imagehash
import cv2
import numpy as np
import dhash
import distance

norm_cache: dict = dict()
def _get_image(image_path: str) -> Image:
   try:
       img_arr = cv2.imread(image_path)
       img_arr = cv2.resize(img_arr, (512, 512), interpolation=cv2.INTER_AREA)

       # Convert image into 3 channels if it contains 4
       if len(img_arr.shape) > 2 and img_arr.shape[2] == 4:
           img_arr = cv2.cvtColor(img_arr, cv2.COLOR_BGRA2BGR)

       # using W3C luminance calc to convert into gray-scale
       data = np.inner(img_arr, [299, 587, 114]) / 1000.0

       return Image.fromarray(np.uint8(data),"L")
   except SyntaxError:
       pass


def find_similar_images(userpath):
   import os
   global norm_cache
   def is_image(filename):
       f = filename.lower()
       return f.endswith(".png") or f.endswith(".jpg") or \
           f.endswith(".jpeg") or f.endswith(".bmp") or f.endswith(".gif")
   
   image_filenames = [os.path.join(userpath, path) for path in os.listdir(userpath) if is_image(path)]
   images = {}
   buffer = []
   for img in image_filenames:
       if (len(buffer) == 0):
           print("Original list is empty, Appending first image to buffer.")
           buffer.append(img)
           continue
       gray1 = _get_image(img)
       h1r,h1c =  dhash.dhash_row_col(gray1)
       hash1 = dhash.format_hex(h1r,h1c)
       images[hash1] = images.get(hash1, []) + [img]
       for each in buffer:
           if each in norm_cache:
               print(f"cached val found for {each}")
               gray2 = norm_cache[each]
               h2r,h2c = dhash.dhash_row_col(gray2)
               hash2 = dhash.format_hex(h2r,h2c)
           else:
               print("No cached_val found, Computing and storing in norm_cache")
               gray2 = _get_image(each)
               h2r,h2c = dhash.dhash_row_col(gray2)
               hash2 = dhash.format_hex(h2r,h2c)
               norm_cache[each] = gray2  # Update cache...
           print(f"Comparing ---> {img}:{hash1} with {each}:{hash2}")
           if(distance.hamming(hash1,hash2) <= 22):
             //what should i put in here 

   
   unique = 0
   for k, img_list in images.items():
       if(len(img_list) >= 1):
           print(''.join(img_list))
           unique = unique + 1
   print(unique)
   


if __name__ == '__main__':
   import sys, os
   userpath = <Image folder/>
   find_similar_images(userpath=userpath)
   
4

0 回答 0