使用Redis进行搜索

第1关:构建反向索引

#!/usr/bin/env python  
#-*- coding:utf-8 -*-
import re  
import redis
conn = redis.Redis()
# 文本序列化  
def tokenize(content):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    words = set()  
    for word in re.findall("[a-z]{2,}", content.lower()):  
        if len(word) >= 2:  
            words.add(word)  
    return words  
    #********* End *********#
# 创建文本的反向索引  
def index_document(content):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    content_id = conn.incr("content:id")  
    conn.hset("contents", content_id, content)  
    words = tokenize(content)
    pipeline = conn.pipeline(True)  
    for word in words:  
        pipeline.sadd('keyword:' + word, content_id)  
    pipeline.execute()  
    #********* End *********#  

第2关:基本搜索操作

#!/usr/bin/env python  
#-*- coding:utf-8 -*-
import re  
import uuid  
import redis
conn = redis.Redis()
# 解析检索式  
def parse(query):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    unwanted = set()  
    wanted = []  
    synonym = set()  
    for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):  
        prefix = qword[:1]  
        if prefix in '+-':  
            qword = qword[1:]  
        else:  
            prefix = None
        if prefix == '-':  
            unwanted.add(qword)  
            continue
        if synonym and not prefix:  
            wanted.append(list(synonym))  
            synonym = set()  
        synonym.add(qword)
    if synonym:  
        wanted.append(list(synonym))
    return wanted, list(unwanted)  
    #********* End *********#
# 对集合进行交/并/差操作,并暂存至临时集合  
def set_common(method, names):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    common_id = str(uuid.uuid4())  
    names = ["keyword:" + name for name in names]  
    getattr(conn, method)("keyword:" + common_id, *names)  
    conn.expire("keyword:" + common_id, 60)  
    return common_id  
    #********* End *********#  

第3关:实现搜索

#!/usr/bin/env python  
#-*- coding:utf-8 -*-
import re  
import uuid  
import redis
conn = redis.Redis()
# 执行搜索  
def search(query):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    wanted, unwanted = parse(query)  
    if not wanted:  
        return None
    to_intersect = []  
    for qwords in wanted:  
        if len(qwords) > 1:  
            to_intersect.append(set_common("sunionstore", qwords))  
        else:  
            to_intersect.append(qwords[0])
    if len(to_intersect) > 1:  
        result = set_common("sinterstore", to_intersect)  
    else:  
        result = to_intersect[0]
    if unwanted:  
        unwanted.insert(0, result)  
        result = set_common("sdiffstore", unwanted)
    return conn.smembers("keyword:" + result)  
    #********* End *********#
# 解析检索式  
def parse(query):  
    unwanted = set()  
    wanted = []  
    synonym = set()  
    for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):  
        prefix = qword[:1]  
        if prefix in '+-':  
            qword = qword[1:]  
        else:  
            prefix = None
        if prefix == '-':  
            unwanted.add(qword)  
            continue
        if synonym and not prefix:  
            wanted.append(list(synonym))  
            synonym = set()
        synonym.add(qword)
    if synonym:  
        wanted.append(list(synonym))
    return wanted, list(unwanted)
# 对集合进行交/并/差操作,并暂存至临时集合  
def set_common(method, names):  
    common_id = str(uuid.uuid4())  
    names = ["keyword:" + name for name in names]  
    getattr(conn, method)("keyword:" + common_id, *names)  
    conn.expire("keyword:" + common_id, 60)  
    return common_id  

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值