#coding=utf-8
import pandas as pd
all_ratings = pd.read_csv("C:/Users/Administrator/Desktop/ml-latest-small/ratings.csv")
# 格式化时间,但是没什么必要
all_ratings["timestamp"] = pd.to_datetime(all_ratings['timestamp'],unit='s')
all_ratings["like"] = all_ratings["rating"]>=4
train_num = 200
# 训练数据
train_ratings = all_ratings[all_ratings['userId'].isin(range(train_num))]
like_ratings = train_ratings[train_ratings["like"] == True]
# 每一个人喜欢哪一些电影
like_by_user = dict((k,frozenset(v.values)) for k,v in like_ratings.groupby("userId")["movieId"])
# 电影被人喜欢的数量
num_like_of_movie = like_ratings[["movieId", "like"]].groupby("movieId").sum()
# frequent_itemsets是一个字典,key为K项值,value为也为一个字典
frequent_itemsets = {
}
min_support = 50
# first step 步骤一:生成初始的频繁数据集
frequent_itemsets[1] = dict((frozenset((movie_id,)),
apriori对消费者订单进行关联分析,分析消费者购物意向
于 2022-08-03 13:47:28 首次发布
本文通过Python的Apriori算法,对消费者订单数据进行深度挖掘,揭示购物篮中的关联规则,以理解消费者的购物习惯和潜在需求。
摘要由CSDN通过智能技术生成