1、加载数据集,按照设定参数进行计算
"whole milk" 全脂牛奶
"other vegetables" 其他蔬菜
"rolls/buns" 面包卷
"yogurt" 酸奶
"soda" 汽水
"bottled water" 瓶装水
R
# 安装并加载包
#install.packages("arules")
library(arules)
#Apriori 要求数据为事务型数据(transactions),两种常见来源:
#直接用 Groceries 内置购物篮数据集(最常用示例)
#普通数据框转换为事务格式
# 加载内置购物篮数据
data(Groceries)
# 查看所有商品列表
itemLabels(Groceries)
# 查看前 5 条交易的项集
inspect(head(Groceries, 5))
# 查看所有商品的出现频率(从高到低)
print(itemFrequencyPlot(Groceries, topN = 10))
# 运行Apriori算法
rules <- apriori(
data = Groceries,
parameter = list(
supp = 0.01, # 最小支持度1%
conf = 0.3, # 最小置信度30%
target = "rules"
)
)
# 查看结果
inspect(head(rules, 10))
2、查看结果
> inspect(head(rules, 10))
lhs rhs support confidence coverage lift count
1 {hard cheese} => {whole milk} 0.01006609 0.4107884 0.02450432 1.607682 99
2 {butter milk} => {other vegetables} 0.01037112 0.3709091 0.02796136 1.916916 102
3 {butter milk} => {whole milk} 0.01159126 0.4145455 0.02796136 1.622385 114
4 {ham} => {whole milk} 0.01148958 0.4414062 0.02602949 1.727509 113
5 {sliced cheese} => {whole milk} 0.01077783 0.4398340 0.02450432 1.721356 106
6 {oil} => {whole milk} 0.01128622 0.4021739 0.02806304 1.573968 111
7 {onions} => {other vegetables} 0.01423488 0.4590164 0.03101169 2.372268 140
8 {onions} => {whole milk} 0.01209964 0.3901639 0.03101169 1.526965 119
9 {berries} => {yogurt} 0.01057448 0.3180428 0.03324860 2.279848 104
10 {berries} => {other vegetables} 0.01026945 0.3088685 0.03324860 1.596280 101
R
# 按提升度排序,看强关联规则
rules_sorted <- sort(rules, by = "lift", decreasing = TRUE)
inspect(head(rules_sorted, 5))
> inspect(head(rules_sorted, 5))
lhs rhs support confidence coverage lift
1 {citrus fruit, other vegetables} => {root vegetables} 0.01037112 0.3591549 0.02887646 3.295045
2 {tropical fruit, other vegetables} => {root vegetables} 0.01230300 0.3427762 0.03589222 3.144780
3 {beef} => {root vegetables} 0.01738688 0.3313953 0.05246568 3.040367
4 {citrus fruit, root vegetables} => {other vegetables} 0.01037112 0.5862069 0.01769192 3.029608
5 {tropical fruit, root vegetables} => {other vegetables} 0.01230300 0.5845411 0.02104728 3.020999
count
1 102
2 121
3 171
4 102
5 121
3、散点图
R
#install.packages("arulesViz")
library(arulesViz)
# 散点图:支持度/置信度/提升度(映射为颜色)
plot(rules, measure = c("support", "confidence"), shading = "lift")

4、网络图
R
# 网络图展示规则关系,前10条规则
plot(head(rules_sorted, 10), method = "graph")

R
# 只找结果包含"牛奶"的规则
milk_rules <- subset(rules, rhs %in% "whole milk")
inspect(milk_rules)
5、数据转事物
R
#-------------------数据转事物--------------
# 示例原始数据框
df <- data.frame(
T1 = c("牛奶", "面包", "鸡蛋"),
T2 = c("面包", "可乐"),
T3 = c("牛奶", "可乐", "薯片"),
T4 = c("牛奶", "面包", "可乐")
)
# 转为事务格式
trans <- as(df, "transactions")
# 执行Apriori
rules2 <- apriori(trans, parameter = list(supp = 0.2, conf = 0.5))
inspect(rules2)
#查看前5条规则
inspect(head(sort(rules2,by = "lift", decreasing = TRUE),5))