数据准备
源数据:
数据处理 转变成类似热力表的数据:
数据库处理:
select a.买家ID,Replace(stuff((select ','+c.商品代号 from gl c where c.买家ID=a.买家ID for xml path('') ) ,1,1,''),' ','') sp
from gl a
group by a.买家ID
剔除批发客户数据(选择的商品SKU数据较多),得到最终数据:
转化成模型数据
安装mlxtend conda install -c conda-forge mlxtend
import pandas as ps
from sqlalchemy import create_engine
from mlxtend.preprocessing import TransactionEncoder
engine = create_engine('mssql+pymssql://sa:123456@127.0.0.1:1433/fenben',echo=True)
engine.connect()
data=ps.read_sql('select * from rlb',engine)
# print(data.dropna())
fdata=data.iloc[:,1:22]
dataset=fdata.dropna(axis=1).values#dataframe 转矩阵,axis=1按列操作
te=TransactionEncoder()
te_ary=te.fit(dataset).transform(dataset)
print(te.columns_)
print(te_ary)
sdata=ps.DataFrame(te_ary.astype("int"), columns=te.columns_)
print(sdata)
write=ps.ExcelWriter('glfx.xlsx')
sdata.to_excel(write)
write.save()