针对下列数据集找出所有频繁项集,并使用提升度找出所有关联规则。¶
item_list = [['牛奶','面包'],
['面包','尿布','啤酒','土豆'],
['牛奶','尿布','啤酒','可乐'],
['面包','牛奶','尿布','啤酒'],
['面包','牛奶','尿布','可乐']]
import pandas as pd
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder
item_list = [['牛奶','面包'],
['面包','尿布','啤酒','土豆'],
['牛奶','尿布','啤酒','可乐'],
['面包','牛奶','尿布','啤酒'],
['面包','牛奶','尿布','可乐']]
item_df = pd.DataFrame(item_list)
te = TransactionEncoder()
df_tf = te.fit_transform(item_list)
df = pd.DataFrame(df_tf,columns=te.columns_)
df_data = df[['可乐','啤酒','土豆','尿布','牛奶','面包']]
frequent_items = apriori(df_data,min_support=0.5,use_colnames=True)
display(frequent_items)
rules = association_rules(frequent_items,metric='lift',min_threshold=1)
display(rules)