推荐系统 python 简单化

看了不少的推荐系统,感觉都比较复杂,最后整理了一下,变成简单形式的。

#-*- coding: utf-8 -*-
'''
Created on 2020-12-20
'''
import sys
import math
from texttable import Texttable
import os


# 计算余弦距离
def getCosDist(user1, user2):
    sum_x = 0.0
    sum_y = 0.0
    sum_xy = 0.0
    for key1 in user1:
        for key2 in user2:
            if key1[0] == key2[0]:
                sum_x += key1[1] * key1[1]
                sum_y += key2[1] * key2[1]
                sum_xy += key1[1] * key2[1]
    if sum_xy == 0.0:
        return 0
    demo = math.sqrt(sum_x * sum_y)
    return sum_xy / demo


# 读取文件,读取以行为单位,每一行是列表里的一个元素
def readFile(filename):
    contents = []
    path = os.path.join('Data', filename)
    f = open(path, "r")
    contents = f.readlines()
    f.close()
    return contents


# 数据格式化为二维数组
def getRatingInfo(ratings):
    rates = []
    for line in ratings:
        rate = line.split("::")
        rates.append([int(rate[0]), int(rate[1]), int(rate[2])])
    return rates


# 生成用户评分数据结构
def getUserScoreDataStructure(rates):
    # userDict[2]=[(1,5),(4,2)].... 表示用户2对电影1的评分是5,对电影4的评分是2
    userDict = {}
    itemUser = {}
    for k in rates:
        user_rank = (k[1], k[2])
        if k[0] in userDict:
            userDict[k[0]].append(user_rank)
        else:
            userDict[k[0]] = [user_rank]

        if k[1] in itemUser:
            itemUser[k[1]].append(k[0])
        else:
            itemUser[k[1]] = [k[0]]
    return userDict, itemUser


# 计算与指定用户最相近的邻居
def getNearestNeighbor(userId, userDict, itemUser):
    neighbors = []
    for item in userDict[userId]:
        for neighbor in itemUser[item[0]]:
            if neighbor != userId and neighbor not in neighbors:
                neighbors.append(neighbor)
    neighbors_dist = []
    for neighbor in neighbors:
        dist = getCosDist(userDict[userId], userDict[neighbor])
        neighbors_dist.append([dist, neighbor])
    neighbors_dist.sort(reverse=True)
    return neighbors_dist


# 使用UserFC进行推荐,输入:文件名,用户ID,邻居数量
def recommendByUserFC(filename, userId, k=5):
    # 读取文件
    contents = readFile(filename)

    # 文件格式数据转化为二维数组
    rates = getRatingInfo(contents)

    # 格式化成字典数据
    userDict, itemUser = getUserScoreDataStructure(rates)

    # 找邻居
    neighbors = getNearestNeighbor(userId, userDict, itemUser)[:5]

    # 建立推荐字典
    recommand_dict = {}
    for neighbor in neighbors:
        neighbor_user_id = neighbor[1]
        movies = userDict[neighbor_user_id]
        for movie in movies:
            if movie[0] not in recommand_dict:
                recommand_dict[movie[0]] = neighbor[0]
            else:
                recommand_dict[movie[0]] += neighbor[0]

                # 建立推荐列表
    recommand_list = []
    for key in recommand_dict:
        recommand_list.append([recommand_dict[key], key])
    recommand_list.sort(reverse=True)
    user_movies = [k[0] for k in userDict[userId]]
    return [k[1] for k in recommand_list], user_movies, itemUser, neighbors


# 获取电影的列表
def getMovieList(filename):
    contents = readFile(filename)
    movies_info = {}
    for movie in contents:
        single_info = movie.split("::")
        movies_info[int(single_info[0])] = single_info[1:]
    return movies_info


# 从这里开始运行
if __name__ == '__main__':


    # 获取所有电影的列表
    movies = getMovieList("movies.dat")
    print("输入对应的需要推荐的用户ID:")
    userID = input()
    recommend_list, user_movie, items_movie, neighbors = recommendByUserFC("ratings.dat", int(userID), 80)
    neighbors_id = [i[1] for i in neighbors]
    table = Texttable()
    table.set_deco(Texttable.HEADER)
    table.set_cols_dtype(['t', 't', 't'])
    table.set_cols_align(["l", "l", "l"])
    rows = []
    rows.append([u"movie name", u"release", u"category"])
    for movie_id in recommend_list[:20]:
        from_user = []
        for user_id in items_movie[movie_id]:
            if user_id in neighbors_id:
                from_user.append(user_id)
        rows.append([movies[movie_id][0], movies[movie_id][1], ""])
    table.add_rows(rows)
    print(table.draw())

 

完整的代码下载如下

链接:https://pan.baidu.com/s/1qG8RYec9ZZEmQwZNekjznw
提取码:22cm
复制这段内容后打开百度网盘手机App,操作更方便哦

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值