1、工具类
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
public class File2DocumentUtils {
//文件转化为Document对象
public static Document file2Document(String filePath) {
File file=new File(filePath);
Document doc=new Document();
doc.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
doc.add(new Field("content",readFileContent(file),Store.YES,Index.ANALYZED));
doc.add(new Field("size",NumberTools.longToString(file.length()),Store.YES,Index.ANALYZED));
doc.add(new Field("path",file.getAbsolutePath(),Store.YES,Index.ANALYZED));
System.out.println(doc);
return doc;
}
private static String readFileContent(File file){
try {
BufferedReader bufer=new BufferedReader(new InputStreamReader(new FileInputStream(file)));
StringBuffer buf=new StringBuffer();
String str="";
while( (str= bufer.readLine())!=null)
{
buf.append(str).append("\n");
}
return buf.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
//打印document
public static void printDocumentInfo(Document doc) {
System.out.println("------------------------------");
System.out.println("name = " + doc.get("name"));
System.out.println("content = " + doc.get("content"));
System.out.println("size = " + NumberTools.stringToLong(doc.get("size")));
System.out.println("path = " + doc.get("path"));
}
}
2、索引的正删改和搜索
public class IndexDao {
String indexPath = "索引文件存放路径";
// Analyzer analyzer = new StandardAnalyzer();
Analyzer analyzer = new MMAnalyzer();// 词库分词
/**
* 添加/创建索引
*
* @param doc
*/
public void save(Document doc) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);// Adds a document to this index.
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* Term是搜索的最小单位,代表某个 Field 中的一个关键词,如:<title, lucene>
*
* new Term( "title", "lucene" );
*
* new Term( "id", "5" );
*
* new Term( "id", UUID );
*
* @param term
*/
public void delete(Term term) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.deleteDocuments(term);//Deletes the document(s) containing term.
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* 更新索引
*
* <pre>
* indexWriter.deleteDocuments(term);
* indexWriter.addDocument(doc);
* </pre>
*
* @param term
* @param doc
*/
public void update(Term term, Document doc) {
IndexWriter indexWriter = null;
try {
indexWriter = new IndexWriter(indexPath, analyzer, MaxFieldLength.LIMITED);
indexWriter.updateDocument(term, doc);//Updates a document by first deleting the document(s) containing term and then adding the new document.
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexWriter.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/**
* <pre>
* totalPage = recordCount / pageSize;
* if (recordCount % pageSize > 0)
* totalPage++;
* </pre>
*
* @param queryString
* @param firstResult
* @param maxResults
* @return
*/
public QueryResult search(String queryString, int firstResult, int maxResults) {
try {
// 1,把要搜索的文本解析为 Query
String[] fields = { "name", "content" };
Map<String, Float> boosts = new HashMap<String, Float>();
//boosts.put("name", 3f);
// boosts.put("content", 1.0f); 默认为1.0f
QueryParser queryParser = new MultiFieldQueryParser(fields, analyzer, boosts);
Query query = queryParser.parse(queryString);
return search(query, firstResult, maxResults);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
public QueryResult search(Query query, int firstResult, int maxResults) {
IndexSearcher indexSearcher = null;
try {
// 2,进行查询
indexSearcher = new IndexSearcher(indexPath);
Filter filter = new RangeFilter("size", NumberTools.longToString(20)
, NumberTools.longToString(1000), true, true);//效率很低,不建议使用
// ========== 排序
Sort sort = new Sort();
sort.setSort(new SortField("size")); // 默认为升序
// sort.setSort(new SortField("size", true));
// ==========
TopDocs topDocs = indexSearcher.search(query, filter, 10000, sort);
//Finds the top n hits for query, applying filter if non-null, and sorting the hits by the criteria in sort.
int recordCount = topDocs.totalHits;
List<Document> recordList = new ArrayList<Document>();
// ============== 准备高亮器
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer scorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, scorer);
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
// ==============
// 3,取出当前页的数据
int end = Math.min(firstResult + maxResults, topDocs.totalHits);
for (int i = firstResult; i < end; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docSn = scoreDoc.doc; // 文档内部编号
Document doc = indexSearcher.doc(docSn); // 根据编号取出相应的文档
// =========== 高亮
// 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回 null
String hc = highlighter.getBestFragment(analyzer, "content", doc.get("content"));
if (hc == null) {
String content = doc.get("content");
int endIndex = Math.min(50, content.length());
hc = content.substring(0, endIndex);// 最多前50个字符
}
doc.getField("content").setValue(hc);
// ===========
recordList.add(doc);
}
// 返回结果
return new QueryResult(recordCount, recordList);
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
try {
indexSearcher.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}