使用Redis进行搜索

最新推荐文章于 2024-02-27 16:16:01 发布

本人已run不再更新内容保留有错见谅

最新推荐文章于 2024-02-27 16:16:01 发布

阅读量1.8k

点赞数 1

分类专栏： educoder 文章标签： python

本文链接：https://blog.csdn.net/m0_56494324/article/details/124474347

版权

educoder 专栏收录该内容

35 篇文章 67 订阅

订阅专栏

第1关：构建反向索引

#!/usr/bin/env python  
#-*- coding:utf-8 -*-
import re  
import redis
conn = redis.Redis()
# 文本序列化  
def tokenize(content):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    words = set()  
    for word in re.findall("[a-z]{2,}", content.lower()):  
        if len(word) >= 2:  
            words.add(word)  
    return words  
    #********* End *********#
# 创建文本的反向索引  
def index_document(content):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    content_id = conn.incr("content:id")  
    conn.hset("contents", content_id, content)  
    words = tokenize(content)
    pipeline = conn.pipeline(True)  
    for word in words:  
        pipeline.sadd('keyword:' + word, content_id)  
    pipeline.execute()  
    #********* End *********#

第2关：基本搜索操作

#!/usr/bin/env python  
#-*- coding:utf-8 -*-
import re  
import uuid  
import redis
conn = redis.Redis()
# 解析检索式  
def parse(query):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    unwanted = set()  
    wanted = []  
    synonym = set()  
    for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):  
        prefix = qword[:1]  
        if prefix in '+-':  
            qword = qword[1:]  
        else:  
            prefix = None
        if prefix == '-':  
            unwanted.add(qword)  
            continue
        if synonym and not prefix:  
            wanted.append(list(synonym))  
            synonym = set()  
        synonym.add(qword)
    if synonym:  
        wanted.append(list(synonym))
    return wanted, list(unwanted)  
    #********* End *********#
# 对集合进行交/并/差操作，并暂存至临时集合  
def set_common(method, names):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    common_id = str(uuid.uuid4())  
    names = ["keyword:" + name for name in names]  
    getattr(conn, method)("keyword:" + common_id, *names)  
    conn.expire("keyword:" + common_id, 60)  
    return common_id  
    #********* End *********#

第3关：实现搜索

#!/usr/bin/env python  
#-*- coding:utf-8 -*-
import re  
import uuid  
import redis
conn = redis.Redis()
# 执行搜索  
def search(query):  
    # 请在下面完成要求的功能  
    #********* Begin *********#  
    wanted, unwanted = parse(query)  
    if not wanted:  
        return None
    to_intersect = []  
    for qwords in wanted:  
        if len(qwords) > 1:  
            to_intersect.append(set_common("sunionstore", qwords))  
        else:  
            to_intersect.append(qwords[0])
    if len(to_intersect) > 1:  
        result = set_common("sinterstore", to_intersect)  
    else:  
        result = to_intersect[0]
    if unwanted:  
        unwanted.insert(0, result)  
        result = set_common("sdiffstore", unwanted)
    return conn.smembers("keyword:" + result)  
    #********* End *********#
# 解析检索式  
def parse(query):  
    unwanted = set()  
    wanted = []  
    synonym = set()  
    for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):  
        prefix = qword[:1]  
        if prefix in '+-':  
            qword = qword[1:]  
        else:  
            prefix = None
        if prefix == '-':  
            unwanted.add(qword)  
            continue
        if synonym and not prefix:  
            wanted.append(list(synonym))  
            synonym = set()
        synonym.add(qword)
    if synonym:  
        wanted.append(list(synonym))
    return wanted, list(unwanted)
# 对集合进行交/并/差操作，并暂存至临时集合  
def set_common(method, names):  
    common_id = str(uuid.uuid4())  
    names = ["keyword:" + name for name in names]  
    getattr(conn, method)("keyword:" + common_id, *names)  
    conn.expire("keyword:" + common_id, 60)  
    return common_id

本人已run不再更新内容保留有错见谅

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
使用Redis进行搜索

第1关：构建反向索引#!/usr/bin/env python #-*- coding:utf-8 -*-import re import redisconn = redis.Redis()# 文本序列化 def tokenize(content): # 请在下面完成要求的功能 #********* Begin *********# words = set() for word in re.findall("[a-z]{2,}", ..
复制链接

扫一扫