Project 3 :挖掘数据集中的关联商品

三种方式

import pandas as pd
import numpy as np
from efficient_apriori import apriori

#加载数据
data = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
data = data.fillna(0)
#print(data)
#将数据整理成Transaction列表
transaction = []
for i in range(data.shape[0]):
    temp = set()
    for j in range(data.shape[1]):
        if data.iloc[i, j] != 0:
            temp.add(data.iloc[i, j])
    transaction.append(temp)
# print(transaction)
print('总计有{}张小票'.format(data.shape[0]),'\n')
itemsets, rules = apriori(transaction, min_support = 0.03, min_confidence = 0.3)

print('频繁项集为:\n', itemsets,'\n')
print('关联规则为:\n', rules)

在这里插入图片描述

import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from pandas import DataFrame

#加载数据
data = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
data = data.fillna(0)

#将数据整理成Transaction列表
transaction = []
for i in range(data.shape[0]):
    temp = str()
    for j in range(data.shape[1]):
        if data.iloc[i, j] != 0:
            temp += str(data.iloc[i, j]) + str(',')
    transaction.append(temp)

transactions = DataFrame({'Item':transaction})
one_hot = transactions.drop('Item', 1).join(transactions.Item.str.get_dummies(','))

#使用onehot数据进行关联分析
frequent = apriori(one_hot, min_support = 0.05, use_colnames = True)
rules = association_rules(frequent, metric = 'lift', min_threshold = 1)

print('总计有{}张小票'.format(one_hot.shape[0]),'\n')
print('频繁项集为:\n', frequent,'\n')
print('关联规则为:\n')
rules

在这里插入图片描述
在这里插入图片描述

import pandas as pd
import numpy as np
from efficient_apriori import apriori

#加载数据
data = pd.read_csv('c:/Users/10109/Documents/Jupyter notebook/人工智能课程(BI方向)/商业智能和推荐系统/lesson2 挖掘数据中的关联规则/homework/Market_Basket_Optimisation.csv', header = None)
data = data.fillna(0)
#print(data)
#将数据整理成Transaction列表
transaction = []
for i in range(data.shape[0]):
    temp = set()
    for j in range(data.shape[1]):
        if data.iloc[i, j] != 0:
            temp.add(data.iloc[i, j])
    transaction.append(temp)

transactions = []
for i in transaction:
    temp_list = []
    for j in i:
        temp_list.append(j)
    transactions.append([temp_list])
    
from pyspark import SparkConf
from pyspark.sql import SparkSession
from pyspark.ml.fpm import FPGrowth
import datetime
if __name__ == "__main__":
    t1=datetime.datetime.now()
    appname = "FPgrowth"
    master ="local[4]" 

    #spark配置
    conf = SparkConf().setAppName(appname).setMaster(master)                  
    spark = SparkSession.builder.config(conf = conf).getOrCreate()
    
    #加载数据
    data = transactions    
    #将数据转为spark中的dataframe
    data = spark.createDataFrame(data, ["items"])
    
    #模型建立
    fp = FPGrowth(minSupport=0.03, minConfidence=0.2)
    #模型拟合
    fpm  = fp.fit(data)
    #在控制台显示前五条频繁项集
    fpm.freqItemsets.show(10)
    #强关联规则
    assRule=fpm.associationRules
    assRule.show(5)
    
    #转为python中的dataframe
    assRuleDf = assRule.toPandas()  
    #由 lift 按照降序排列
    assRuleDf = assRuleDf.sort_values(by = "lift", ascending = False)
    print('强关联规则:\n',assRuleDf)
    
    #新的前项数据
    new_data = spark.createDataFrame([(["milk"], )], ["items"])
    #预测后项
    print('后项预测:\n',fpm.transform(new_data).first().prediction)               
    spark.stop()#关闭spark
    t2=datetime.datetime.now()
    print('spent ts:',t2-t1)
    #遇到 'NoneType' object has no attribute 'setCallSite' 记得 restart kernel
    
    #接收data类型为如下格式:
    
    #     data_list=[[['r', 'z', 'h', 'k', 'p']]\
#                ,[['z', 'y', 'x', 'w', 'v', 'u', 't', 's']]\
#                ,[['s', 'x', 'o', 'n', 'r']]\
#                ,[['x', 'z', 'y', 'm', 't', 's', 'q', 'e']]\
#                ,[['z']]\
#                ,[['x', 'z', 'y', 'r', 'q', 't', 'p']]]#数据集

在这里插入图片描述
在这里插入图片描述

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值