推荐系统初学者系列（7）-- Surprise库做Top-K推荐

本文链接：https://blog.csdn.net/Dooonald/article/details/80787159

七月在线视频推荐：
七月在线
在这里插入图片描述

引言：

为什么工业界不用优化评分预测而转为优化排序列表啦？

推荐主要做的一件事情是主动帮助用户找到他最需要的信息，如何来做这个事情，一般就转化为一个优化问题。对于Netflix类似的电影推荐，由于许多用户看完一个电影以后都会对电影评分来表达自己对电影的满意程度。推荐这个优化问题一般都会选择一个损失函数，在这里损失函数就选择预测评分与实际评分的平均平方差的根（RMSE），来预测如果给用户推荐这部电影，用户的满意程度如何。但是这里边其实存在一个gap，用户其实希望看到的是从最满意到最不满意的一个排序（最满意的N个就是著名的topN了）。在很多情况下，RMSE下降了，但是topN排序并没有变好，甚至变差，这就是推荐的目标和损失函数不一致。
评分预测关注的主要是分数，既预测的分数和真实分数的误差尽可能小；而TopN推荐侧重于推荐N个和用户喜好相似的物品列。亚马逊科学家的观点在于：TopN推荐更符合实际的需求。例如：将一部你喜欢的电影（真实打分为 4.5 分）预测为一般般（预测打分为 3.5 分）和将一部你觉得一般般的电影（真实打分为 3.5 分）预测为非常一般（预测打分为 2.5 分），对于用户来说，前者显得更糟糕。擅长对所有电影进行预测评分的推荐引擎并不一定能很好的预测排名靠前的 N 部电影。
2017年3月份的一则新闻：Goodbye Stars, Hello Thumbs Up: Netflix Unveils New Rating System NETFLIX公司宣布不再使用五个星星的评分制度，改用点赞模式。喜欢/不喜欢二分类，并宣布从四月份开始实施生效。

关于非负矩阵分解更多内容看这里–非负矩阵分解NMF

利用surprise库中的SVD、NMF、ItemKNN等算法训练模型预测


def main():
    rec = 'SVD'
    threshold = 4
    topK = 10
    test_data_path = 'E:/Workspace/PyCharm/BiNE-master/data/1m/ratings_test.dat'
    # First train an SVD algorithm on the movielens dataset.
    print("load data...")
    # data = Dataset.load_builtin('ml-1m')
    # test set is made of 40% of the ratings.
    test_size = 0.4
    # trainset, testset = train_test_split(data, test_size=test_size)

    # path to dataset file
    file_path = os.path.expanduser('E:/Workspace/PyCharm/BiNE-master/data/1m/ratings_train.dat')
    reader = Reader(line_format='user item rating', sep='\t')
    data = Dataset.load_from_file(file_path, reader=reader)
    trainset = data.build_full_trainset()

    test_user, test_item, test_rate = read_data(test_data_path)
    print("test size %.1f..." % test_size)
    print("training...")

    sim_options = {'name': 'cosine',

                   'user_based': False  # compute  similarities between items
                   }
    if rec == 'NMF':
        algo = NMF()
    elif rec == 'SVD':
        algo = SVD()
        name = ['SVD']
    else:
        algo = KNNBaseline(sim_options=sim_options)
        name = ['ItemKNN']

    train_start = time.time()
    algo.fit(trainset)
    train_end = time.time()
    print('train time:%.1f s' % (train_end - train_start))

    # Than predict ratings for all pairs (u, i) that are NOT in the training set.
    # testset = trainset.build_anti_testset()
    # predictions = algo.test(testset)
    test_end = time.time()
    print('test time:%.1f s' % (test_end - train_end))

    # top_n_est, true_ratings = get_top_n(predictions, n=10, threshold = threshold)

    f1, map, mrr, mndcg = evaluate_model_new(algo, test_user, test_item, test_rate, topK)
    eval_end = time.time()
    print('evaluate time:%.1f s' % (eval_end - test_end))
    print("algorithm : %s" % rec)
    print('recommendation metrics: F1 : %0.4f, NDCG : %0.4f, MAP : %0.4f, MRR : %0.4f' % (f1, mndcg, map, mrr))

    '''
    # Print the recommended items for each user
    for uid, user_ratings in top_n_est.items():
        print(uid, [iid for (iid, _) in user_ratings])
    print("#" * 150)
    for uid, user_ratings in top_n_true.items():
        print(uid, [iid for (iid, _) in user_ratings])
    '''

根据获得的top-K，对四个指标进行评价

def evaluate_model_new(model, test_user, test_item, test_rate, top_n):
    recommend_dict = {}
    for u in test_user:
        recommend_dict[u] = {}
        for i in test_item:
            pred = model.predict(str(u), str(i), r_ui=4)
            est_str = '{est:1.4f}'.format(est=pred.est)
            recommend_dict[u][i] = float(est_str)

    precision_list = []
    recall_list = []
    ap_list = []
    ndcg_list = []
    rr_list = []

    for u in test_user:
        tmp_r = sorted(recommend_dict[u].items(), key = lambda x:x[1], reverse=True)[
                0:min(len(recommend_dict[u]), top_n)]
        tmp_t = sorted(test_rate[u].items(), key = lambda x:x[1], reverse=True)[
                0:min(len(test_rate[u]), len(test_rate[u]))]
        tmp_r_list = []
        tmp_t_list = []
        for (item, rate) in tmp_r:
            tmp_r_list.append(item)

        for (item, rate) in tmp_t:
            tmp_t_list.append(item)
        print(tmp_r_list, "-->", tmp_t_list)

        pre, rec = precision_and_racall(tmp_r_list, tmp_t_list)
        ap = AP(tmp_r_list, tmp_t_list)
        rr = RR(tmp_r_list, tmp_t_list)
        ndcg = nDCG(tmp_r_list, tmp_t_list)
        precision_list.append(pre)
        recall_list.append(rec)
        ap_list.append(ap)
        rr_list.append(rr)
        ndcg_list.append(ndcg)
    precison = sum(precision_list) / len(precision_list)
    recall = sum(recall_list) / len(recall_list)
    # print(precison, recall)
    f1 = 2 * precison * recall / (precison + recall)
    map = sum(ap_list) / len(ap_list)
    mrr = sum(rr_list) / len(rr_list)
    mndcg = sum(ndcg_list) / len(ndcg_list)
    return f1, map, mrr, mndcg

度量指标–F1，NDCG， MAP， MRR


def nDCG(ranked_list, ground_truth):
    dcg = 0
    idcg = IDCG(len(ground_truth))
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id not in ground_truth:
            continue
        rank = i + 1
        dcg += 1 / math.log(rank + 1, 2)
    return dcg / idcg


def IDCG(n):
    idcg = 0
    for i in range(n):
        idcg += 1 / math.log(i + 2, 2)
    return idcg


def AP(ranked_list, ground_truth):
    hits, sum_precs = 0, 0.0
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id in ground_truth:
            hits += 1
            sum_precs += hits / (i + 1.0)
    if hits > 0:
        return sum_precs / len(ground_truth)
    else:
        return 0.0


def RR(ranked_list, ground_list):
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id in ground_list:
            return 1 / (i + 1.0)
    return 0


def precision_and_racall(ranked_list, ground_list):
    hits = 0
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id in ground_list:
            hits += 1
    pre = hits / (1.0 * len(ranked_list))
    rec = hits / (1.0 * len(ground_list))
    return pre, rec

整体代码

#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'LDD'

import sys
import numpy as np
import time
import random
import math
import os
from surprise.model_selection import KFold
from surprise import accuracy, KNNBasic, Reader
from collections import defaultdict
from surprise import SVD
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from surprise import NMF, KNNBaseline


def get_top_n(predictions, n=10, threshold = 3.5):
    '''Return the top-N recommendation for each user from a set of predictions.

    Args:
        predictions(list of Prediction objects): The list of predictions, as
            returned by the test method of an algorithm.
        n(int): The number of recommendation to output for each user. Default
            is 10.

    Returns:
    A dict where keys are user (raw) ids and values are lists of tuples:
        [(raw item id, rating estimation), ...] of size n.
    '''

    # First map the predictions to each user.
    top_n_est = defaultdict(list)
    true_ratings = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n_est[uid].append((iid, est))
        true_ratings[uid].append((iid, true_r))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n_est.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        user_ratings = [x for x in user_ratings if x[1] > threshold]
        top_n_est[uid] = user_ratings[:n]       # top n
        # add 0 if less than n
        est_len = len(top_n_est[uid])
        if est_len < n:
            for i in range(est_len, n):
                top_n_est[uid].append(('0', 0)) # append 0 if not enough
    # Then sort the true ratings for each user and retrieve the k highest ones.
    for uid, user_ratings in true_ratings.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        true_ratings[uid] = [x for x in user_ratings if x[1] > threshold]          # len

    return top_n_est, true_ratings


def nDCG(ranked_list, ground_truth):
    dcg = 0
    idcg = IDCG(len(ground_truth))
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id not in ground_truth:
            continue
        rank = i + 1
        dcg += 1 / math.log(rank + 1, 2)
    return dcg / idcg if idcg != 0 else 0


def IDCG(n):
    idcg = 0
    for i in range(n):
        idcg += 1 / math.log(i + 2, 2)
    return idcg


def AP(ranked_list, ground_truth):
    hits, sum_precs = 0, 0.0
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id in ground_truth:
            hits += 1
            sum_precs += hits / (i + 1.0)
    if hits > 0:
        return sum_precs / len(ground_truth)
    else:
        return 0.0


def RR(ranked_list, ground_list):
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id in ground_list:
            return 1 / (i + 1.0)
    return 0


def precision_and_racall(ranked_list, ground_list):
    hits = 0
    for i in range(len(ranked_list)):
        id = ranked_list[i]
        if id in ground_list:
            hits += 1
    pre = hits / (1.0 * len(ranked_list) if len(ground_list) != 0 else 1)
    rec = hits / (1.0 * len(ground_list) if len(ground_list) != 0 else 1)
    return pre, rec


def evaluate(top_n_est, true_ratings):
    precision_list = []
    recall_list = []
    ap_list = []
    ndcg_list = []
    rr_list = []

    for u, user_ratings in top_n_est.items():
        tmp_r = top_n_est.get(u)  # [('302', 4.2889227920390836), ('258', 3.9492992642799027)]
        tmp_t = true_ratings.get(u)
        tmp_r_list = []
        tmp_t_list = []
        for (item, rate) in tmp_r:
            tmp_r_list.append(item)

        for (item, rate) in tmp_t:
            tmp_t_list.append(item)
        print(tmp_r_list, "-->", tmp_t_list)

        pre, rec = precision_and_racall(tmp_r_list, tmp_t_list)
        ap = AP(tmp_r_list, tmp_t_list)
        rr = RR(tmp_r_list, tmp_t_list)
        ndcg = nDCG(tmp_r_list, tmp_t_list)
        precision_list.append(pre)
        recall_list.append(rec)
        ap_list.append(ap)
        rr_list.append(rr)
        ndcg_list.append(ndcg)
    precison = sum(precision_list) / len(precision_list)
    recall = sum(recall_list) / len(recall_list)
    f1 = 2 * precison * recall / (precison + recall)
    map = sum(ap_list) / len(ap_list)
    mrr = sum(rr_list) / len(rr_list)
    mndcg = sum(ndcg_list) / len(ndcg_list)
    return f1, map, mrr, mndcg


def evaluate_model_new(model, test_user, test_item, test_rate, top_n):
    recommend_dict = {}
    for u in test_user:
        recommend_dict[u] = {}
        for i in test_item:
            pred = model.predict(str(u), str(i), r_ui=4)
            est_str = '{est:1.4f}'.format(est=pred.est)
            recommend_dict[u][i] = float(est_str)

    precision_list = []
    recall_list = []
    ap_list = []
    ndcg_list = []
    rr_list = []

    for u in test_user:
        tmp_r = sorted(recommend_dict[u].items(), key = lambda x:x[1], reverse=True)[
                0:min(len(recommend_dict[u]), top_n)]
        tmp_t = sorted(test_rate[u].items(), key = lambda x:x[1], reverse=True)[
                0:min(len(test_rate[u]), len(test_rate[u]))]
        tmp_r_list = []
        tmp_t_list = []
        for (item, rate) in tmp_r:
            tmp_r_list.append(item)

        for (item, rate) in tmp_t:
            tmp_t_list.append(item)
        print(tmp_r_list, "-->", tmp_t_list)

        pre, rec = precision_and_racall(tmp_r_list, tmp_t_list)
        ap = AP(tmp_r_list, tmp_t_list)
        rr = RR(tmp_r_list, tmp_t_list)
        ndcg = nDCG(tmp_r_list, tmp_t_list)
        precision_list.append(pre)
        recall_list.append(rec)
        ap_list.append(ap)
        rr_list.append(rr)
        ndcg_list.append(ndcg)
    precison = sum(precision_list) / len(precision_list)
    recall = sum(recall_list) / len(recall_list)
    # print(precison, recall)
    f1 = 2 * precison * recall / (precison + recall)
    map = sum(ap_list) / len(ap_list)
    mrr = sum(rr_list) / len(rr_list)
    mndcg = sum(ndcg_list) / len(ndcg_list)
    return f1, map, mrr, mndcg


def read_data(filename):
    users, items, rates = set(), set(), {}
    with open(filename, "r") as fin:
        line = fin.readline()
        while line:
            user, item, rate = line.strip().split()
            if rates.get(user) is None:
                rates[user] = {}
            rates[user][item] = float(rate)
            users.add(user)
            items.add(item)
            line = fin.readline()
    return users, items, rates


def main():
    rec = 'SVD'
    threshold = 4
    topK = 10
    test_data_path = 'E:/Workspace/PyCharm/BiNE-master/data/1m/ratings_test.dat'
    # First train an SVD algorithm on the movielens dataset.
    print("load data...")
    # data = Dataset.load_builtin('ml-1m')
    # test set is made of 40% of the ratings.
    test_size = 0.4
    # trainset, testset = train_test_split(data, test_size=test_size)

    # path to dataset file
    file_path = os.path.expanduser('E:/Workspace/PyCharm/BiNE-master/data/1m/ratings_train.dat')
    reader = Reader(line_format='user item rating', sep='\t')
    data = Dataset.load_from_file(file_path, reader=reader)
    trainset = data.build_full_trainset()

    test_user, test_item, test_rate = read_data(test_data_path)
    print("test size %.1f..." % test_size)
    print("training...")

    sim_options = {'name': 'cosine',

                   'user_based': False  # compute  similarities between items
                   }
    if rec == 'NMF':
        algo = NMF()
    elif rec == 'SVD':
        algo = SVD()
        name = ['SVD']
    else:
        algo = KNNBaseline(sim_options=sim_options)
        name = ['ItemKNN']

    train_start = time.time()
    algo.fit(trainset)
    train_end = time.time()
    print('train time:%.1f s' % (train_end - train_start))

    # Than predict ratings for all pairs (u, i) that are NOT in the training set.
    # testset = trainset.build_anti_testset()
    # predictions = algo.test(testset)
    test_end = time.time()
    print('test time:%.1f s' % (test_end - train_end))

    # top_n_est, true_ratings = get_top_n(predictions, n=10, threshold = threshold)

    f1, map, mrr, mndcg = evaluate_model_new(algo, test_user, test_item, test_rate, topK)
    eval_end = time.time()
    print('evaluate time:%.1f s' % (eval_end - test_end))
    print("algorithm : %s" % rec)
    print('recommendation metrics: F1 : %0.4f, NDCG : %0.4f, MAP : %0.4f, MRR : %0.4f' % (f1, mndcg, map, mrr))

    '''
    # Print the recommended items for each user
    for uid, user_ratings in top_n_est.items():
        print(uid, [iid for (iid, _) in user_ratings])
    print("#" * 150)
    for uid, user_ratings in top_n_true.items():
        print(uid, [iid for (iid, _) in user_ratings])
    '''


if __name__ == "__main__":
    sys.exit(main())