1.内容简介
所用技术:scrapy、django、协同过滤
2.过程
2.1爬虫
使用scrapy爬取https://www.douguo.com网站的食谱数据
主要代码:
# 在 parse 方法中获取下一页链接并发送请求
def parse(self, response):
# 获取当前页面的菜谱详情链接并发送请求
for url in response.xpath('//ul[@class="cook-list"]//a[@class="cook-img"]/@href').getall():
pipei_url = re.sub("/0.*", "", response.url)
tag_id = self.url_tag[unquote(pipei_url)][0]
tag_name = self.url_tag[unquote(pipei_url)][1]
m = {"tag_id": tag_id, "tag_name": tag_name}
yield scrapy.Request(url=self.url_root + url, callback=self.parse_detail, meta=m)
# 解析出下一页链接并发送请求
next_page = response.xpath('//a[@class="anext"]/@href')
if next_page:
yield scrapy.Request(url=next_page[0].get().replace("http", "https"), callback=self.parse)
def md5_encrypt(self, s):
md5 = hashlib.md5()
md5.update(s.encode("utf-8"))
return md5.hexdigest()
def parse_detail(self, response, **kwargs):
tag_id = response.meta["tag_id"]
tag_name = response.meta["tag_name"]
divs = response.xpath('//div[@class="step"]/div')
title = response.xpath("//h1/text()").get()
# 步骤
step_list = []
for div in divs:
step_img = div.xpath("a/img//@src").get()
step_index = div.xpath('div[@class="stepinfo"]/p/text()').get()
step_text = "\n".join(div.xpath('div[@class="stepinfo"]/text()').getall()).strip()
step_list.append([step_img, step_index, step_text])
# 配料
mix_list = []
for td in response.xpath("//table/tr/td"):
mix_name = td.xpath('span[@class="scname"]//text()').get()
mix_cot = td.xpath('span[@class="right scnum"]//text()').get()
mix_list.append([mix_name, mix_cot])
info_item = FoodInfoItem(
tag_id=tag_id,
tag_name=tag_name,
food_id=self.md5_encrypt(response.url),
food_url=response.url,
title=title,
step_list=str(step_list),
img=response.xpath('//*[@id="banner"]/a/img/@src').get(),
desc1="\n".join(response.xpath('//p[@class="intro"]/text()').getall()).strip(),
mix_list=str(mix_list),
all_key=tag_name + title + str(mix_list),
)
yield info_item
2.2django展示
2.2.1主页展示(协同过滤推荐)
2.2.2搜索功能
2.2.3 饮食营养构成
2.3协同过滤推荐
基于用户的协同过滤推荐算法
def recommend_by_user_cf(user_id, similarity_matrix, N=10):
"""
基于用户的协同过滤推荐算法
Args:
user_id: 目标用户ID
similarity_matrix: 用户之间的相似度矩阵
records: 所有用户的点菜记录,格式为 [(user_id, food_id, eat_date), ...]
N: 推荐菜品数量
Returns:
recommended_foods: 推荐的菜品列表,格式为 [(food_id_1, score_1), (food_id_2, score_2), ...]
"""
# 找到和指定用户吃过相同菜品的其他用户
user_eating_records = EatingRecord.objects.filter(user_id=user_id).values("food_id", "eat_date")
user_food_ids = [record["food_id"] for record in user_eating_records]
similar_users = []
for i in range(similarity_matrix.shape[0]):
if i != user_id - 1:
similarity = similarity_matrix[user_id - 1, i]
if similarity > 0:
# 找到该相似用户在最近M天内吃过的所有菜品
similar_user_eating_records = (
EatingRecord.objects.filter(user_id=i + 1).exclude(food_id__in=user_food_ids).values("food_id", "eat_date").order_by("-eat_date")[:10]
)
similar_user_food_ids = [record["food_id"] for record in similar_user_eating_records]
# 计算该相似用户与指定用户之间的相似度,并加入相似用户列表中
similar_users.append((i + 1, similarity, similar_user_food_ids))
# 统计所有相似用户对每个菜品的兴趣度得分
scores = {}
for similar_user_id, similarity, similar_user_food_ids in similar_users:
for food_id in similar_user_food_ids:
if food_id not in user_food_ids:
scores[food_id] = scores.get(food_id, 0) + similarity
# 按照得分从高到低排序,选取前N个菜品作为推荐结果
sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
recommended_foods = sorted_scores[:N]
return recommended_foods
python 毕设帮助,指导,源码分享,调试部署:worthy_life_