ttttt
# 一行代码数据集格式转换
# transactions = list(data.groupby('Transaction').agg(lambda x: set(x.Item.values))['Item'])
# 完整代码
from efficient_apriori import apriori
import sqlalchemy as sql
import pandas as pd
# 数据加载
engine = sql.create_engine('mysql+pymysql://root:passwd@localhost/wucai')
query = 'SELECT * FROM bread_basket'
data = pd.read_sql_query(query, engine)
# 统一小写
data['Item'] = data['Item'].str.lower()
# 去掉none项
data = data.drop(data[data.Item == 'none'].index)
# 得到一维数组orders_series,并且将Transaction作为index, value为Item取值
orders_series = data.set_index('Transaction')['Item']
# 将数据集进行格式转换
transactions = transactions = list(data.groupby('Transaction').agg(lambda x: set(x.Item.values))['Item'])
# 挖掘频繁项集和频繁规则
itemsets, rules = apriori(transactions, min_support=0.02, min_confidence=0.5)
print('频繁项集:', itemsets)
print('关联规则:', rules)
# ----------输出结果------------------ #
频繁项集: {1: {('alfajores',): 344, ('bread',): 3096, ('brownie',): 379, ('cake',): 983, ('coffee',): 4528, ('cookies',): 515, ('farm house',): 371, ('hot chocolate',): 552, ('juice',): 365, ('medialuna',): 585, ('muffin',): 364, ('pastry',): 815, ('sandwich',): 680, ('scandinavian',): 275, ('scone',): 327, ('soup',): 326, ('tea',): 1350, ('toast',): 318, ('truffles',): 192}, 2: {('bread', 'cake'): 221, ('bread', 'coffee'): 852, ('bread', 'pastry'): 276, ('bread', 'tea'): 266, ('cake', 'coffee'): 518, ('cake', 'tea'): 225, ('coffee', 'cookies'): 267, ('coffee', 'hot chocolate'): 280, ('coffee', 'juice'): 195, ('coffee', 'medialuna'): 333, ('coffee', 'pastry'): 450, ('coffee', 'sandwich'): 362, ('coffee', 'tea'): 472, ('coffee', 'toast'): 224}}
关联规则: [{cake} -> {coffee}, {cookies} -> {coffee}, {hot chocolate} -> {coffee}, {juice} -> {coffee}, {medialuna} -> {coffee}, {pastry} -> {coffee}, {sandwich} -> {coffee}, {toast} -> {coffee}]