apriori算法python实现

import numpy as np

def load_data(file_path):

data = []

with open(file_path, 'r') as f:

for line in f.readlines():

line = line.strip().split(',')

data.append(line)

return data

def create_C1(data):

C1 = set()

for transaction in data:

for item in transaction:

C1.add(frozenset([item]))

return C1

def is_apriori(Ck_item, Lksub1):

for item in Ck_item:

sub_Ck = Ck_item - frozenset([item])

if sub_Ck not in Lksub1:

return False

return True

def create_Ck(Lksub1, k):

Ck = set()

len_Lksub1 = len(Lksub1)

list_Lksub1 = list(Lksub1)

for i in range(len_Lksub1):

for j in range(1, len_Lksub1):

l1 = list(list_Lksub1[i])

l2 = list(list_Lksub1[j])

l1.sort()

l2.sort()

if l1[0:k-2] == l2[0:k-2]:

Ck_item = list_Lksub1[i] | list_Lksub1[j]

if is_apriori(Ck_item, Lksub1):

Ck.add(Ck_item)

return Ck

def generate_Lk_by_Ck(data, Ck, min_support):

Lk = set()

len_data = len(data)

item_count = {}

for transaction in data:

for item in Ck:

if item.issubset(transaction):

if item not in item_count:

item_count[item] = 1

else:

item_count[item] += 1

support_data = {key: value / len_data for key, value in item_count.items() if value / len_data >= min_support}

for key in support_data:

Lk.add(key)

return Lk

def apriori(data, min_support=0.5):

C1 = create_C1(data)

D = list(map(set, data))

L1, support_data = generate_Lk_by_Ck(D, C1, min_support)

Lksub1 = L1.copy()

L = [Lksub1]

i = 2

while True:

Ci = create_Ck(Lksub1, i)

Li, supK = generate_Lk_by_Ck(D, Ci, min_support)

if not Li:

break

Lksub1 = Li.copy()

L.append(Lksub1)

i += 1

return L, support_data

if name == 'main':

file_path = 'your_file_path.csv' # 请替换为你的数据文件路径

data = load_data(file_path)

L, support_data = apriori(data)

print("频繁项集:", L)

print("支持度数据:", support_data)

相关推荐
WJX_KOI3 小时前
Open Notebook 一个开源的结合AI的记笔记软件
python
喜欢吃燃面4 小时前
Linux:环境变量
linux·开发语言·学习
0思必得04 小时前
[Web自动化] 反爬虫
前端·爬虫·python·selenium·自动化
嘴贱欠吻!4 小时前
Flutter鸿蒙开发指南(七):轮播图搜索框和导航栏
算法·flutter·图搜索算法
徐徐同学4 小时前
cpolar为IT-Tools 解锁公网访问,远程开发再也不卡壳
java·开发语言·分布式
LawrenceLan4 小时前
Flutter 零基础入门(二十六):StatefulWidget 与状态更新 setState
开发语言·前端·flutter·dart
2301_822382764 小时前
Python上下文管理器(with语句)的原理与实践
jvm·数据库·python
m0_748229994 小时前
Laravel8.X核心功能全解析
开发语言·数据库·php
张祥6422889044 小时前
误差理论与测量平差基础笔记十
笔记·算法·机器学习
喵手4 小时前
Python爬虫实战:从零搭建字体库爬虫 - requests+lxml 实战采集字体网字体信息数据(附 CSV 导出)!
爬虫·python·爬虫实战·零基础python爬虫教学·csv导出·采集字体库数据·字体库字体信息采集