1. 技术路线
(1)爬取B站用户关注Up主列表
使用B站的API进行获取数据
https://api.bilibili.com/x/relation/followings?vmid
(2)采用简单的ItemCF模型
参考之前的文章[零基础入门推荐系统(1)]基于用户和基于物品的协同过滤方法(python代码实现)
class ItemCF(object):
"""
物品协同过滤,根据用户浏览过的物品推荐相似物品
"""
def train(self, user_items, alpha=0.5, normalization=False):
"""
训练模型
:return:
"""
self.user_items = user_items
# 计算物品的协同矩阵
#self.item_sim_matrix = self.item_similarity(user_items, normalization=True)
#self.item_sim_matrix = self.improved_item_similarity(user_items)
self.item_sim_matrix = self.improved_item_similarity2(user_items, alpha=alpha, normalization=normalization)
#print(self.item_sim_matrix)
return self.item_sim_matrix
def improved_item_similarity(self, user_items, normalization=False):
"""
:param user_items: {user1:[movie1,movie2], user2:[movie1]}
:return: W: {items1: {item2: sim12, item3:sim13}}
"""
# calculate co-rated users between items.
C = dict()
N = dict()
for user, items in user_items.items():
for i in items:
N[i] = N.get(i,0) + 1
if i not in C:
C[i] = dict()
for j in items:
if i == j:
continue
C[i][j] = C[i].get(j,0) + 1/math.log(1+len(items))
# calculate final similarity matrix W
W = dict()
for i, related_items in C.items():
if i not in W:
W[i] = dict()
for j, cij in related_items.items():
W[i][j] = cij / math.sqrt(N[i] * N[j])
if normalization:
for i, item_list in W.items():
item_list = [item/max(item_list) for item in item_list]
W[i] = item_list
return W
def improved_item_similarity2(self, user_items, alpha=0.5, normalization=False):
"""
Solution for Harry Potter problem.
:param user_items: {user1:[movie1,movie2], user2:[movie1]}
:return: W: {items1: {item2: sim12, item3:sim13}}
"""
# calculate co-rated users between items.
C = dict()
N = dict()
for user, items in user_items.items():
for i in items:
N[i] = N.get(i,0) + 1
if i not in C:
C[i] = dict()
for j in items:
if i == j:
continue
C[i][j] = C[i].get(j,0) + 1/math.log(1+len(items))
# calculate final similarity matrix W
W = dict()
for i, related_items in C.items():
if i not in W:
W[i] = dict()
for j, cij in related_items.items():
# if N[i] < N[j]:
W[i][j] = cij / (N[i]**(1-alpha) * N[j]**alpha)
# else:
# W[i][j] = cij / (N[j] ** (1 - alpha) * N[i] ** alpha)
if normalization:
for i, item_list in W.items():
item_list = [item/max(item_list) for item in item_list]
W[i] = item_list
return W
def item_similarity(self, user_items, normalization=False):
"""
:param user_items: {user1:[movie1,movie2], user2:[movie1]}
:return: W: {items1: {item2: sim12, item3:sim13}}
"""
# calculate co-rated users between items.
C = dict()
N = dict()
for user, items in user_items.items():
for i in items:
N[i] = N.get(i,0) + 1
if i not in C:
C[i] = dict()
for j in items:
if i == j:
continue
C[i][j] = C[i].get(j,0) + 1
# calculate final similarity matrix W
W = dict()
for i, related_items in C.items():
if i not in W:
W[i] = dict()
for j, cij in related_items.items():
W[i][j] = cij / math.sqrt(N[i] * N[j])
if normalization:
for i, item_sim_dict in W.items():
max_val = max(item_sim_dict.values())
#print(max_val)
for j,sim in item_sim_dict.items():
item_sim_dict[j] = sim/max_val
return W
def recommend(self, user, N, K):
"""
recommend item according to the history items of users.
:param user:
:param N: the number of recommend items
:param K: the number of most similar users
:return: recommend items dict, {item: similarity}
"""
already_items = set(self.user_items.get(user, set()))
recommend_items = dict()
for i in already_items:
for j, sim in sorted(self.item_sim_matrix.get(i,dict()).items(), key=lambda x:-x[1])[:K]:
if j in already_items:
continue
recommend_items[j] = recommend_items.get(j,0) + sim
recommend_item_list = sorted(recommend_items.items(), key=lambda x:-x[1])[:N]
return recommend_item_list
def recommend_users(self, users, N, K):
"""
:param users:
:param N:
:param K:
:return: dict, {user:[movie1, movie2]}
"""
recommend_result = dict()
for user in users:
recommend_item_list = self.recommend(user, N, K)
recommend_result[user] = recommend_item_list
return recommend_result
————————————————
版权声明:本文为CSDN博主「rosefunR」的原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/rosefun96/article/details/109107812
2. 效果展现
(1)根据喜欢的Up主查询可能喜欢的其他Up主
比如喜欢象棋,输入
推荐结果:
(2)输入个人的B站 UID(B站身份ID)
比如,输入 碧诗的UID 2,推荐结果: