#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re
import redis
conn = redis.Redis()
# 文本序列化
def tokenize(content):
# 请在下面完成要求的功能
#********* Begin *********#
words = set()
for word in re.findall("[a-z]{2,}", content.lower()):
if len(word) >= 2:
words.add(word)
return words
#********* End *********#
# 创建文本的反向索引
def index_document(content):
# 请在下面完成要求的功能
#********* Begin *********#
content_id = conn.incr("content:id")
conn.hset("contents", content_id, content)
words = tokenize(content)
pipeline = conn.pipeline(True)
for word in words:
pipeline.sadd('keyword:' + word, content_id)
pipeline.execute()
#********* End *********#
第2关:基本搜索操作
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re
import uuid
import redis
conn = redis.Redis()
# 解析检索式
def parse(query):
# 请在下面完成要求的功能
#********* Begin *********#
unwanted = set()
wanted = []
synonym = set()
for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):
prefix = qword[:1]
if prefix in '+-':
qword = qword[1:]
else:
prefix = None
if prefix == '-':
unwanted.add(qword)
continue
if synonym and not prefix:
wanted.append(list(synonym))
synonym = set()
synonym.add(qword)
if synonym:
wanted.append(list(synonym))
return wanted, list(unwanted)
#********* End *********#
# 对集合进行交/并/差操作,并暂存至临时集合
def set_common(method, names):
# 请在下面完成要求的功能
#********* Begin *********#
common_id = str(uuid.uuid4())
names = ["keyword:" + name for name in names]
getattr(conn, method)("keyword:" + common_id, *names)
conn.expire("keyword:" + common_id, 60)
return common_id
#********* End *********#
第3关:实现搜索
#!/usr/bin/env python
#-*- coding:utf-8 -*-
import re
import uuid
import redis
conn = redis.Redis()
# 执行搜索
def search(query):
# 请在下面完成要求的功能
#********* Begin *********#
wanted, unwanted = parse(query)
if not wanted:
return None
to_intersect = []
for qwords in wanted:
if len(qwords) > 1:
to_intersect.append(set_common("sunionstore", qwords))
else:
to_intersect.append(qwords[0])
if len(to_intersect) > 1:
result = set_common("sinterstore", to_intersect)
else:
result = to_intersect[0]
if unwanted:
unwanted.insert(0, result)
result = set_common("sdiffstore", unwanted)
return conn.smembers("keyword:" + result)
#********* End *********#
# 解析检索式
def parse(query):
unwanted = set()
wanted = []
synonym = set()
for qword in re.findall("[+-]?[a-z]{2,}", query.lower()):
prefix = qword[:1]
if prefix in '+-':
qword = qword[1:]
else:
prefix = None
if prefix == '-':
unwanted.add(qword)
continue
if synonym and not prefix:
wanted.append(list(synonym))
synonym = set()
synonym.add(qword)
if synonym:
wanted.append(list(synonym))
return wanted, list(unwanted)
# 对集合进行交/并/差操作,并暂存至临时集合
def set_common(method, names):
common_id = str(uuid.uuid4())
names = ["keyword:" + name for name in names]
getattr(conn, method)("keyword:" + common_id, *names)
conn.expire("keyword:" + common_id, 60)
return common_id