这个项目是要从存储了许多菜谱的json文件中依据关键词快速找到相应菜谱。
使用时,直接使用search函数即可找相关菜谱,支持多个关键词搜索,支持高评分词条优先显示。搜索结果为前十个相关菜谱,显示时有三种排序方式:
ordering=‘normal’:默认排序,依照相关性。
ordering=‘health’:依据卡路里,蛋白质,脂肪数值计算菜谱是否健康,将更健康的排在前面。
ordering=‘simple’:依据原材料数量和烧菜步骤数量,将更简单的菜谱排在前面。
import json
import re
import sys
#有用到sys.maxsize,这是系统最大int型整数,方便排序
#parse and tokenize (split into words) of all recipes
def process_recipes(filename):
#定义新的dictiorary数据库存储菜谱数据
title_to_terms = {
}
categories_to_terms = {
}
ingredients_to_terms = {
}
directions_to_terms = {
}
pattern = re.compile('[\W_]+')
#打开json文件
with open(filename) as f:
recipes = json.load(f)
print(len(recipes))
for recipe_number in range(20):
#将每个菜谱句子分解成单词,存入词袋库(dictionary实现)
recipe = recipes[recipe_number]
recipe_to_terms[recipe_number] = {
}
#如果此recipe有title,将此title句子分割为一组单词并存入数据库
if 'title' in recipe.keys():
title = recipe['title']
title_to_terms['title'] = pattern.sub(' ',title)
re.sub(r'[\W_]+','', title_to_terms['title'])
title_to_terms['title'] = title_to_terms['title'].split()
recipe_to_terms[recipe_number].update(title_to_terms)
#如果此recipe有categories,将此categories句子分割为一组单词并存入数据库
if 'categories' in recipe.keys():
categories = str(recipe['categories'])
categories_to_terms['categories'] = pattern.sub(' ',categories)
re.sub(r'[\W_]+','', categories_to_terms['categories'])
categories_to_terms['categories'] = categories_to_terms['categories'].split()
recipe_to_terms[recipe_number].update(categories_to_terms)
#如果此recipe有ingredients,统计原料数量存入数据库,将此ingredients句子分割为一组单词并存入数据库
if 'ingredients' in recipe.keys():
recipe_to_terms[recipe_number].update({
'number':len(recipe['ingredients'])})
ingredients = str(recipe['ingredients'])
ingredients_to_terms['ingredients'] = pattern.sub(' ',ingredients)
re.sub(r'[\W_]+','', ingredients_to_terms['ingredients'])
ingredients_to_terms['ingredients'] = ingredients_to_terms['ingredients'].split()
recipe_to_terms[recipe_number].update(ingredients_to_terms)
#如果此recipe有directions,统计步骤数量存入数据库,将此directions句子分割为一组单词并存入数据库
if