1、合并索引库片段文件
IndexWriter的optimize()方法已经过时,因为这个方法的效率很低。合并文件主要是使用IndexWriter的setMergeFactor(int)方法,但是在Lucene3.6版本中,该方法已过时,直接使用LogMergePolicy.setMergeFactor(int)方法代替。
当setMergeFactor(int)的参数值较小的时候,创建索引的速度较慢。当参数值较大的时候,创建索引的速度就比较快。大于10适合批量创建索引。
示例代码
//合并索引文件
@Test
public void testMergeFactor() {
try {
String path = "D:\\LuceneEx\\day03";
File file = new File(path);
//在当前路径下建立一个目录叫indexDir
// File indexDir = new File("./indexDir");
Directory mDirectory = FSDirectory.open(file);
Analyzer mAnalyzer = new IKAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
mAnalyzer);
LogByteSizeMergePolicy mergePolicy = new LogByteSizeMergePolicy();
// mergePolicy.setMaxMergeDocs(maxMergeDocs);
// mergePolicy.setMaxMergeMB(mb);
mergePolicy.setMergeFactor(3); // 达到3个文件时就和合并
config.setMergePolicy(mergePolicy);
IndexWriter mIndexWriter = new IndexWriter(mDirectory, config);
Book book1 = createBook("Android内核揭秘", "ABC", "2010-07",
"android 移动开发", 8.9f);
Document doc1 = createDocument(book1);
Book book2 = createBook("Android多媒体开发", "BCD", "2011-07",
"android 多媒体", 8.5f);
Document doc2 = createDocument(book2);
Book book3 = createBook("Android企业应用开发", "QAB", "2012-05",
"android 企业应用", 8.2f);
Document doc3 = createDocument(book3);
doc3.setBoost(1.5F); // boost:设置得分,2F在当前得分的基础上*2,使得分增高
Book book4 = createBook("Android内核剖析", "WPS", "2012-09",
"android 驱动开发", 9.8f);
Document doc4 = createDocument(book4);
mIndexWriter.addDocument(doc1);
mIndexWriter.addDocument(doc2);
mIndexWriter.addDocument(doc3);
mIndexWriter.addDocument(doc4);
mIndexWriter.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 创建文档对象的工具方法
*
* @param book
* @return
*/
public Document createDocument(Book book) {
Document doc = new Document();
Field id = new Field("id", book.getId() + "", Store.YES, Index.ANALYZED);
Field title = new Field("title", book.getTitle(), Store.YES,
Index.ANALYZED);
Field author = new Field("author", book.getAuthor(), Store.YES,
Index.ANALYZED);
Field publishTime = new Field("publishTime", book.getPublishTime(),
Store.YES, Index.ANALYZED);
Field source = new Field("source", book.getSource(), Store.YES,
Index.ANALYZED);
Field category = new Field("category", book.getCategory(), Store.YES,
Index.ANALYZED);
Field reputation = new Field("reputation", book.getReputation() + "",
Store.YES, Index.ANALYZED);
doc.add(id);
doc.add(title);
doc.add(author);
doc.add(publishTime);
doc.add(source);
doc.add(category);
doc.add(reputation);
return doc;
}
/**
* 创建Book对象
*
* @param title
* @param author
* @param publishTime
* @param category
* @param reputation
* @return
*/
public Book createBook(String title, String author, String publishTime,
String category, float reputation) {
Random r = new Random();
int id = r.nextInt(10000);
Book book = new Book();
book.setId(id);
book.setAuthor(author);
book.setTitle(title);
book.setCategory(category);
book.setPublishTime(publishTime);
book.setReputation(reputation);
book.setSource("清华大学出版社");
return book;
}
2、内存索引目录和文件系统索引目录结合使用
内存索引目录的操作速度非常快,所以我们在操作索引的时候可以把索引库从文件系统加载到内存中,操作完成后再写回文件系统。
内存中的索引文件写回到文建系统中的时候,我们需要对索引目录进行重建。比如原来文件系统中的索引目录有10个文件,加载到内存目录的时候是把10个文件拷贝一份到内存,然后我们添加了一个索引文件,内存中的索引目录文件数就变成11个,写会到文件系统的时候,内存索引目录文件数(11个)加上原来文件系统索引目录的文件数(10)就变成21个了,有10个文件是重复了,所以我们需要删除原来文件系统中的索引目录重新创建。
但是如果索引库是巨大的,不建议使用,因为所需的内存很大。
示例代码
@Test
public void testRAMFSCombination() {
try {
String path = "D:\\LuceneEx\\day03";
File file = new File(path);
Directory mDirectory = FSDirectory.open(file);
Analyzer mAnalyzer = new IKAnalyzer();
// 创建内存索引目录,把文件系统中的索引库加载进来
RAMDirectory mRamDirectory = new RAMDirectory(mDirectory);
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, mAnalyzer);
IndexWriter ramIndexWriter = new IndexWriter(mRamDirectory, config);
Book book1 = createBook("Spring企业开发", "QWA", "2012-07",
"spring web开发", 8.5f);
Document doc1 = createDocument(book1);
Book book2 = createBook("精通SSH三大框架", "SSH", "2012-11",
"web开发 SSH", 9.1f);
Document doc2 = createDocument(book2);
ramIndexWriter.addDocument(doc1);
ramIndexWriter.addDocument(doc2);
ramIndexWriter.close();
//写回到文件中
IndexWriterConfig fsIndexWriterConfig = new IndexWriterConfig(Version.LUCENE_36,mAnalyzer);
//创建新的索引目录或者覆盖原来的索引目录
fsIndexWriterConfig.setOpenMode(OpenMode.CREATE);
IndexWriter fsIndexWriter = new IndexWriter(mDirectory, fsIndexWriterConfig);
//把内存中的索引库写到文件系统中
fsIndexWriter.addIndexes(mRamDirectory);
fsIndexWriter.close();
} catch (IOException e) {
e.printStackTrace();
}
}