import numpy as np
def load_data(file_path):
data = []
with open(file_path, 'r') as f:
for line in f.readlines():
line = line.strip().split(',')
data.append(line)
return data
def create_C1(data):
C1 = set()
for transaction in data:
for item in transaction:
C1.add(frozenset([item]))
return C1
def is_apriori(Ck_item, Lksub1):
for item in Ck_item:
sub_Ck = Ck_item - frozenset([item])
if sub_Ck not in Lksub1:
return False
return True
def create_Ck(Lksub1, k):
Ck = set()
len_Lksub1 = len(Lksub1)
list_Lksub1 = list(Lksub1)
for i in range(len_Lksub1):
for j in range(1, len_Lksub1):
l1 = list(list_Lksub1[i])
l2 = list(list_Lksub1[j])
l1.sort()
l2.sort()
if l1[0:k-2] == l2[0:k-2]:
Ck_item = list_Lksub1[i] | list_Lksub1[j]
if is_apriori(Ck_item, Lksub1):
Ck.add(Ck_item)
return Ck
def generate_Lk_by_Ck(data, Ck, min_support):
Lk = set()
len_data = len(data)
item_count = {}
for transaction in data:
for item in Ck:
if item.issubset(transaction):
if item not in item_count:
item_count[item] = 1
else:
item_count[item] += 1
support_data = {key: value / len_data for key, value in item_count.items() if value / len_data >= min_support}
for key in support_data:
Lk.add(key)
return Lk
def apriori(data, min_support=0.5):
C1 = create_C1(data)
D = list(map(set, data))
L1, support_data = generate_Lk_by_Ck(D, C1, min_support)
Lksub1 = L1.copy()
L = [Lksub1]
i = 2
while True:
Ci = create_Ck(Lksub1, i)
Li, supK = generate_Lk_by_Ck(D, Ci, min_support)
if not Li:
break
Lksub1 = Li.copy()
L.append(Lksub1)
i += 1
return L, support_data
if name == 'main':
file_path = 'your_file_path.csv' # 请替换为你的数据文件路径
data = load_data(file_path)
L, support_data = apriori(data)
print("频繁项集:", L)
print("支持度数据:", support_data)