package com.mylucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.nio.CharBuffer;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.LucenePackage;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class MyLuceneTest {
/**
* 根据内容,构建索引
* @param analyzer
* @param directory
* @param items
* @return
*/
private boolean buildIndexer(Analyzer analyzer, Directory directory, List<Item> items) {
IndexWriter iwriter = null;
try {
// 配置索引
iwriter = new IndexWriter(directory, new IndexWriterConfig(
Version.LUCENE_47, analyzer));
// 删除所有document
iwriter.deleteAll();
// 将文档信息存入索引
Document doc[] = new Document[items.size()];
for (int i = 0; i < items.size(); i++) {
doc[i] = new Document();
Item item = items.get(i);
java.lang.reflect.Field[] fields = item.getClass().getDeclaredFields();
for (java.lang.reflect.Field field : fields) {
String fieldName = field.getName();
// System.out.println(fieldName);
String getMethodName = "get"+toFirstLetterUpperCase(fieldName);
Object obj = item.getClass().getMethod(getMethodName).invoke(item);
//System.out.println((String)obj);
doc[i].add(new Field(fieldName, (String)obj, TextField.TYPE_STORED));
// Field field1 = new Field("", new FileReader(new File("")));
// doc[1].add(field1);
}
iwriter.addDocument(doc[i]);
}
} catch (Exception e) {
e.printStackTrace();
return false;
} finally {
try {
iwriter.close();
} catch (IOException e) {
}
}
return true;
}
/**
* 根据keyword搜索索引
* @param analyzer
* @param directory
* @param keyword
* @return
*/
public List<Item> searchIndexer(Analyzer analyzer, Directory directory, String keyword) {
DirectoryReader ireader = null;
List<Item> result = new ArrayList<Item>();
try {
// 设定搜索目录
ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
// 对多field进行搜索
java.lang.reflect.Field[] fields = Item.class.getDeclaredFields();
int length = fields.length;
String[] multiFields = new String[length];
for (int i = 0; i < length; i++) {
multiFields[i] = fields[i].getName();
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(
Version.LUCENE_47, multiFields, analyzer);
// 设定具体的搜索词
Query query = parser.parse(keyword);
ScoreDoc[] hits = isearcher.search(query, null, 10).scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
Item item = new Item();
for (String field : multiFields) {
String setMethodName = "set"+toFirstLetterUpperCase(field);
item.getClass().getMethod(setMethodName, String.class).invoke(item, hitDoc.get(field));
}
result.add(item);
}
} catch (Exception e) {
e.printStackTrace();
return null;
} finally {
try {
ireader.close();
directory.close();
} catch (IOException e) {
}
}
return result;
}
/**
* 首字母转大写
* @param str
* @return
*/
public static String toFirstLetterUpperCase(String str) {
if(str == null || str.length() < 2){
return str;
}
return str.substring(0, 1).toUpperCase() + str.substring(1, str.length());
}
public static void main(String[] args) throws Exception {
System.out.println(LucenePackage.get());
MyLuceneTest demo = new MyLuceneTest();
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
// Analyzer analyzer = new SmartChineseAnalyzer(Version.LUCENE_47);
List<Item> items = new ArrayList<Item>();
/*items.add(new Item("1", "中国", "This is the text to be greatly indexed."));
items.add(new Item("2", "second", "This is great"));
items.add(new Item("3", "third", "I love apple and pear. "));
items.add(new Item("4", "four", "我是中国人"));
items.add(new Item("5", "five", "中华人民共和国"));
*/File dataFile = new File("C:/mylucene");
File[] dataFiles = dataFile.listFiles();
for(int i = 0; i < dataFiles.length; i++){
Reader txtReader = new FileReader(dataFiles[i]);
char []buff = new char[10000];
txtReader.read(buff);
String str = String.valueOf(buff);
System.out.println(buff);
items.add(new Item(dataFiles[i].getCanonicalPath(),dataFiles[i].getName(),str));
//System.out.println(dataFiles[i].getCanonicalPath());
//System.out.println(dataFiles[i].getName());
//System.out.println(buff);
//System.out.println(txtReader.toString());
}
// 索引存到内存中的目录
//Directory directory = new RAMDirectory();
// 索引存储到硬盘
File file = new File("c:/lucene");
Directory directory = FSDirectory.open(file);
demo.buildIndexer(analyzer, directory, items);
List<Item> result = demo.searchIndexer(analyzer, directory, "中国");
for (Item item : result) {
System.out.println(item.toString());
}
}
}
package com.mylucene;
public class Item {
private String id;
private String title;
private String content;
public Item() {
}
public Item(String id, String title, String content) {
this.id = id;
this.title = title;
this.content = content;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("[id=").append(id).append(",title=").append(title)
.append(",content=").append(content).append("]");
return sb.toString();
}
}
这里是将文件的的三个属性进行了一下抽象,并且运用另一个类去表示,在以前版本中是运用Reader进行读取文件,并且在文件进行添加索引的时候直接对Reader读取的对象进行添加,不需要将其所有进行读出都进行封装。这里就是文件非常大的时候内存将会存不下,导致内存不足或者数组越界的可能。这里应该还可以像以前版本一样可以直接对文件建立索引的,我相信是我没有找到好的解决办法,所以应该多研究一下4.8的api。