import os
import numpy as np
from scipy.spatial.distance import cosine
import csv
# 获取文件夹中所有文件的键值对映射
def get_file_mapping(folder_path):
file_map = {}
for root, dirs, files in os.walk(folder_path):
for file in files:
file_map[file] = os.path.abspath(os.path.join(root, file))
return file_map
# 找到两个文件名的最长匹配长度
def find_max_common_prefix_length(file1, file2):
i = 0
while i < len(file1) and i < len(file2) and file1[i] == file2[i]:
i += 1
return i
# 找到文件夹a和文件夹b中匹配的文件
def find_matching_files(folder_a, folder_b):
a_map = get_file_mapping(folder_a)
b_map = get_file_mapping(folder_b)
matching_files = []
for file_a, path_a in a_map.items():
max_common_prefix_length = 0
matching_file_b = ""
for file_b, path_b in b_map.items():
common_prefix_length = find_max_common_prefix_length(file_a, file_b)
if common_prefix_length > max_common_prefix_length:
max_common_prefix_length = common_prefix_length
matching_file_b = file_b
if matching_file_b:
matching_files.append((path_a, b_map[matching_file_b]))
# 删除已匹配的文件
del b_map[matching_file_b]
# 按最大匹配字符的字典序对匹配文件排序
matching_files.sort(key=lambda x: os.path.basename(x[0]))
return matching_files
# 比较两个npy文件的差异
def compare_npy_files(file_a, file_b):
result_describe = ""
array_a = np.load(file_a)
array_b = np.load(file_b)
if array_a.shape != array_b.shape:
result_describe += f"shape {array_a.shape} {array_b.shape} a.shape==b.shape:{array_a.shape == array_b.shape}\n"
else:
result_describe += f"shape {array_a.shape} {array_b.shape}\n"
max_difference = np.max(np.abs(array_a - array_b))
min_difference = np.min(np.abs(array_a - array_b))
average_difference = np.mean(np.abs(array_a - array_b))
# 计算余弦相似度
flat_array_a = array_a.ravel()
flat_array_b = array_b.ravel()
similarity = 1 - cosine(flat_array_a, flat_array_b)
result_describe += f"max_value:{np.max(array_a)} {np.max(array_b)} {np.abs(np.max(array_a) - np.max(array_b))}\n"
result_describe += f"min_value:{np.min(array_a)} {np.min(array_b)} {np.abs(np.min(array_a) - np.min(array_b))}\n"
result_describe += f"avg_value:{np.average(array_a)} {np.average(array_b)} {np.abs(np.average(array_a) - np.average(array_b))}\n"
result_describe += f"Max Value Difference:{max_difference}\n"
result_describe += f"Min Value Difference:{min_difference}\n"
result_describe += f"Average Error Difference:{average_difference}\n"
result_describe += f"similarity: {similarity}\n"
return result_describe
def main():
folder_a = "a"
folder_b = "b"
matching_files = find_matching_files(folder_a, folder_b)
for file_a, file_b in matching_files:
print(f"匹配的文件:{os.path.basename(file_a)} 和 {os.path.basename(file_b)}")
comparison_result = compare_npy_files(file_a, file_b)
print(f"对比结果:\n{comparison_result}")
print("")
with open('comparison_results.csv', mode='a', newline='') as file:
writer = csv.writer(file)
comparison_result_lines = comparison_result.split('\n')
writer.writerow([f"{os.path.basename(file_a)} and {os.path.basename(file_b)}"] + comparison_result_lines[1:-1]) # Skip the first and last lines
if __name__ == "__main__":
main()
如何递归对比两个文件夹当中npy文件的内容
C__Try2023-11-09 18:00