1.导包
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import pandas as pd
2.读取文件
my_data=pd.read_excel("D:/棒/数据挖掘/basket.xlsx")
df_data=my_data.iloc[:,7:].copy()
df_data.head()
3.数据显示
my_data.describe()
结果如图:
4.数据处理:
dict_data={'F':False,'T':True}
df_data['fruitveg']=df_data['fruitveg'].map(dict_data)
df_data['freshmeat']=df_data['freshmeat'].map(dict_data)
df_data['dairy']=df_data['dairy'].map(dict_data)
df_data['cannedveg']=df_data['cannedveg'].map(dict_data)
df_data['cannedmeat']=df_data['cannedmeat'].map(dict_data)
df_data['frozenmeal']=df_data['frozenmeal'].map(dict_data)
df_data['beer']=df_data['beer'].map(dict_data)
df_data['wine']=df_data['wine'].map(dict_data)
df_data['softdrink']=df_data['softdrink'].map(dict_data)
df_data['fish']=df_data['fish'].map(dict_data)
df_data['confectionery']=df_data['confectionery'].map(dict_data)
结果如下:
5.设置支持度求频繁项集
frequent_itemsets = apriori(df_data,min_support=0.1,use_colnames= True)
frequent_itemsets
结果如下:
6.
#求关联规则,设置最小置信度为0.15
rules = association_rules(frequent_itemsets,metric = 'confidence',min_threshold = 0.15)
#设置最小提升度
rules = rules.drop(rules[rules.lift <1.0].index)
#设置标题索引并打印结果
rules.rename(columns = {'antecedents':'from','consequents':'to','support':'sup','confidence':'conf'},inplace = True)
rules = rules[['from','to','sup','conf','lift']]
rules
结果如下: