Collaborative Filtering

生成示例数据

import numpy as np
import pandas as pd

raw = [['1', '101', 5.0], ['1', '102', 3.0], ['1', '103', 2.5],
       ['2', '101', 2.0], ['2', '102', 2.5], ['2', '103', 5.0],
       ['2', '104', 2.0], ['3', '101', 2.0], ['3', '104', 4.0], 
       ['3', '105', 4.5], ['3', '107', 5.0], ['4', '101', 5.0], 
       ['4', '103', 3.0], ['4', '104', 4.5], ['4', '106', 4.0],
       ['5', '101', 4.0], ['5', '102', 3.0], ['5', '103', 2.0],
       ['5', '104', 4.0], ['5', '105', 3.5], ['5', '106', 4.0]]

df = pd.DataFrame(raw, columns=['CID', 'MID', 'SCORE'])
df.head()
CIDMIDSCORE
011015.0
111023.0
211032.5
321012.0
421022.5

使用scikit-surprise

pip install scikit-surprise first

from surprise import Dataset
from surprise import KNNBasic, SlopeOne, SVD, SVDpp
from surprise import Reader
from surprise.model_selection import train_test_split, cross_validate

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(df[['CID', 'MID', 'SCORE']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

knn = KNNBasic(k=3, sim_options={'user_based': True})
knn.fit(trainset).test(testset)
Computing the msd similarity matrix...
Done computing similarity matrix.





[Prediction(uid='2', iid='103', r_ui=5.0, est=2.456521739130435, details={'was_impossible': False, 'actual_k': 3}),
 Prediction(uid='5', iid='105', r_ui=3.5, est=3.46875, details={'was_impossible': True, 'reason': 'User and/or item is unkown.'}),
 Prediction(uid='3', iid='105', r_ui=4.5, est=3.46875, details={'was_impossible': True, 'reason': 'User and/or item is unkown.'}),
 Prediction(uid='2', iid='101', r_ui=2.0, est=4.7101449275362315, details={'was_impossible': False, 'actual_k': 3}),
 Prediction(uid='3', iid='104', r_ui=4.0, est=4.166666666666666, details={'was_impossible': False, 'actual_k': 2})]
knn.predict(uid='1', iid='102', r_ui=3)
Prediction(uid='1', iid='102', r_ui=3, est=2.888888888888889, details={'was_impossible': False, 'actual_k': 3})
cross_validate(knn, data)
{'fit_time': (0.0, 0.0, 0.0, 0.0, 0.0),
 'test_mae': array([0.78095238, 1.07046414, 0.73560167, 0.95088853, 1.35708899]),
 'test_rmse': array([0.93155808, 1.29854187, 0.82083497, 1.36901744, 1.49573969]),
 'test_time': (0.0, 0.0, 0.0, 0.0, 0.0)}

使用movielens——调参

from surprise.model_selection import GridSearchCV

file_path = ('ml-100k/u.data')
reader = Reader(line_format='user item rating timestamp', sep='\t')
ml = Dataset.load_from_file(file_path, reader)

param_grid = {'k': [10, 20], 'reg_all': [0.4, 0.6]}
gcv = GridSearchCV(KNNBasic, param_grid, measures=['rmse', 'mae'], cv=3)
gcv.fit(ml)
print('best params:', gcv.best_params)
print('best score:', gcv.best_score)
best params: {'mae': {'reg_all': 0.4, 'k': 20}, 'rmse': {'reg_all': 0.4, 'k': 20}}
best score: {'mae': 0.7792187768992522, 'rmse': 0.9877429594511901}

svd

svd = SVD(n_factors=10)
param_grid = {'n_factors': [20, 50], 'reg_all':[0.05, 0.1, 0.5]}
gcv = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=3)
gcv.fit(ml)
print('best params:', gcv.best_params)
print('best score:', gcv.best_score)
best params: {'mae': {'reg_all': 0.05, 'n_factors': 50}, 'rmse': {'reg_all': 0.05, 'n_factors': 50}}
best score: {'mae': 0.7448516219452261, 'rmse': 0.9416068148189461}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值