pip install whoosh
首先,我有一个xiaoshuo文件夹,装了几部小说
直接上代码:
首先是创建索引的文件
from whoosh.filedb.filestore import FileStorage
from whoosh.fields import *
from jieba.analyse import ChineseAnalyzer
import os
analyzer = ChineseAnalyzer()
schema = Schema(
title=TEXT(stored=True),
content=TEXT(stored=True, analyzer=analyzer)
)
storage = FileStorage('./xiaoshuoIndex')
if not os.path.exists('./xiaoshuoIndex'):
os.mkdir('./xiaoshuoIndex')
ix = storage.create_index(schema)
else:
ix = storage.open_index()
writer = ix.writer()
filelist = os.listdir('./xiaoshuo')
for file in filelist:
content = open('./xiaoshuo/'+file, encoding='utf-8').readlines()
# content是一个列表,必须转成字符串,才能正常使用
# writer.add_document(title=file, content=content)
writer.add_document(title=file, content=''.join(content))
print(file, '索引完成')
writer.commit()
print('索引全部完成')
索引创建完成之后,会生成一个文件夹
然后是做个测试
from whoosh.qparser import QueryParser
from whoosh.filedb.filestore import FileStorage
# 创建索引存储对象
storage = FileStorage('./xiaoshuoIndex')
# 打开索引文件,获取索引对象
ix = storage.open_index()
# 获取搜索对象searcher,用户进行搜索的
# for item in ix.reader().all_terms():
# print(item)
with ix.searcher() as searcher:
# 创建query对象,被用来搜索的
# QueryParser(检索的字段名, 索引结构).parse(关键词)
query = QueryParser('content', ix.schema).parse('剑眉')
# 使用搜索对象的搜索方法来完成检索
# search(query, limit=None)
# limit限制搜索结果的条数,默认为10个,指定为None则显示所有
results = searcher.search(query, limit=None)
for res in results:
print(res['title'])
老规矩,运行看下结果
打开这个小说,搜索一下
把‘’剑眉‘’换成‘游戏’
有点多,随便找几个看看