Pylucene 7.6.0 demo 建立索引
import sys, os, lucene, threading, time
from datetime import datetime
import glob
from java.io import File
from java.nio.file import Paths
from org.apache.lucene.analysis.miscellaneous import LimitTokenCountAnalyzer
from org.apache.lucene.analysis.core import WhitespaceAnalyzer
from org.apache.lucene.analysis.standard import StandardAnalyzer
from org.apache.lucene.document import Document, Field, FieldType,TextField
from org.apache.lucene.index import FieldInfo, IndexWriter, IndexWriterConfig
from org.apache.lucene.store import SimpleFSDirectory
from org.apache.lucene.util import Version
from org.apache.lucene.analysis.cjk import CJKAnalyzer
"""
Example of Indexing with PyLucene 3.0
"""
def luceneIndexer(docdir, indir):
"""
IndexDocuments from a directory
"""
lucene.initVM()
DIRTOINDEX = docdir
INDEXIDR = Paths.get(indir)
indexdir = SimpleFSDirectory(INDEXIDR)
analyzer= CJKAnalyzer()
config = IndexWriterConfig(analyzer)
index_writer = IndexWriter(indexdir,config)
for tfile in glob.glob(os.path.join(DIRTOINDEX, '*.txt')):
print("Indexing: ", tfile)
document = Document()
content = open(tfile, 'r').read()
document.add(Field("text", content, TextField.TYPE_STORED))
index_writer.addDocument(document)
print("Done: ", tfile)
print(index_writer.numDocs())
index_writer.commit()
index_writer.close()
if __name__ == '__main__':
luceneIndexer('docs','indexs')