from PIL import Image import numpy as np import os import pandas as pd def image_to_grayscale(image_path): """将图像转换为灰度图""" img = Image.open(image_path).convert('L') return img def reduce_image_size(img, size=8): """将图像缩小到指定大小""" img = img.resize((size, size), Image.ANTIALIAS) return img def compute_average_hash(img): """计算图像的平均哈希值""" pixels = np.array(img, dtype=np.float32) avg = pixels.mean() diff = pixels > avg hash_ = ''.join(map(str, diff.flatten().astype(int))) return hash_ def phash(image_path, hash_size=8): """计算图像的pHash值""" img = image_to_grayscale(image_path) img = reduce_image_size(img, hash_size) return compute_average_hash(img) def calculate_image_similarities(folder_path): """计算文件夹中所有图片两两之间的相似度""" images = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.lower().endswith(('.png', '.jpg', '.jpeg'))] n_images = len(images) similarity_matrix = np.zeros((n_images, n_images), dtype=int) for i, img_path1 in enumerate(images): hash1 = phash(img_path1) for j, img_path2 in enumerate(images[i + 1:]): # 避免重复计算并节省时间 hash2 = phash(img_path2) distance = sum(c1 != c2 for c1, c2 in zip(hash1, hash2)) similarity_matrix[i, j + i + 1] = distance similarity_matrix[j + i + 1, i] = distance # 对称矩阵 return similarity_matrix, images def generate_similarity_table(similarity_matrix, images): """生成相似度表格""" df = pd.DataFrame(similarity_matrix, index=images, columns=images) return df # 示例使用 folder_path = 'C:\\Users\\Desktop\\全部图片数据' similarity_matrix, images = calculate_image_similarities(folder_path) similarity_table = generate_similarity_table(similarity_matrix, images) # 显示表格(可能需要调整显示设置以适合大量数据) print(similarity_table.head()) # 仅显示前几行,因为整个表格可能非常大 # 保存表格到CSV文件 similarity_table.to_csv('image_similarities.csv')
感知哈希算法计算图片相似度
于 2024-07-31 08:35:36 首次发布