实现购物篮分析
# 这里是吧每一个数据加上一个','
df1['Goods'] = df1['Goods'].apply(lambda x:','+x)
# 这里是将所有买的物品的字符串合在一起
df4=df1.groupby('id').sum()
# 把分好的物品变为列表
df4["Goods"]=df4["Goods"].apply(lambda x:x[1:].split(","))
上面的操作就是为了实现下面的 表
这下面写的是 相关性分析的函数
data=list(df4.Goods)
import copy
def get_all_list(datas):
all_list=[]
for data in datas:
for i in data:
if i not in all_list:
all_list.append(i)
return all_list
def get_C1(all_list,datas,minSupport):
lenght=len(all_list)
dict_count={}
for i in all_list:
count=0
for data in datas:
if i in data:
count+=1
dict_count[i]=round(count/lenght,2)
re_list=[]
for item in dict_count.items():
if item[1]>minSupport:
re_list.append(item[0])
return re_list
def isin(one,two):
for i in one:
if i not in two:
return False
else:
return True
def get_C2(re_list,datas,minSupport):
lenght=len(datas)
a=1
all_list=[]
for i in re_list:
for j in re_list[a:]:
all_list.append([i,j])
a+=1
dict_count={}
for i in all_list:
count=0
for data in datas:
if isin(i,data):
count+=1
dict_count[str(i)]=round(count/lenght,2)
dict_count2=copy.deepcopy(dict_count)
for item in dict_count2.items():
if item[1]<minSupport:
del dict_count[str(item[0])]
return dict_count
def get_datas(datas,dict_count,contract):
final_list=[]
for item in dict_count.items():
for it in [eval(item[0]),eval(item[0])[::-1]]:
need=support(datas,it)
flag=1
if need<contract:
flag=0
final_list.append([it,item[1],need,flag])
return final_list
def support(datas,item):
count1=0
count2=0
for i in datas:
if item[0] in i:
count1+=1
if item[1] in i:
count2+=1
return round(count1/count2,2)
# 这里传入的data 是 data=list(df4.Goods)
all_list=get_all_list(data)
re_list=get_C1(all_list,data,0.04)
C2=get_C2(re_list,data,0.04)
need_data=get_datas(data,C2,0.35)
table=pd.DataFrame(need_data,columns=["关系","支持度","置信度","是否强关联"])
table["关系"]=table["关系"].apply(lambda x:x[0]+"-->"+x[1])