package com.java_min.test;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;
import com.java_min.util.File2DocumentUtils;
public class HelloWorld {
private String filePath = "D://workspace//luceneDemo//luceneDateSource//test.txt";
private String indexPath = "D://workspace//luceneDemo//luceneIndex";
private Analyzer analyzer = new StandardAnalyzer();
/**
*
* @throws Exception
* Administrator Jun 15, 2010
*
* 创建索引
* IndexWriter是用来操作(增、删、改)索引库的
*
*/
@Test
public void createIndex() throws Exception{
/**
* 文件系统路径(将索引放入磁盘)
*/
Directory directory = FSDirectory.getDirectory(indexPath);
/**
* 内存路径(将索引放入内存)
*/
//Directory directory = new RAMDirectory();
Document doc = File2DocumentUtils.file2Document(filePath);
IndexWriter indexWriter = new IndexWriter(directory,analyzer,true,MaxFieldLength.LIMITED);
indexWriter.addDocument(doc);
indexWriter.close();
}
//搜索
@Test
public void searchWord() throws Exception {
String queryStr = "中国足球";
//1.把要搜索的文本解析为Query
String[] fields = {"name","context"};
QueryParser queryParser = new MultiFieldQueryParser(fields,analyzer);
Query query = queryParser.parse(queryStr);
//2.进行查询
IndexSearcher indexSearcher = new IndexSearcher(indexPath);
Filter filter = null;
TopDocs topDocs = indexSearcher.search(query, filter,100000);
System.out.println("当前共搜索到【"+topDocs.totalHits+"】条匹配结果");
//3.打印结果
for(ScoreDoc scoreDoc:topDocs.scoreDocs){
int doc = scoreDoc.doc; //文档内部编号
Document document = indexSearcher.doc(doc); //根据编号取出相应文档
File2DocumentUtils.printDocumentInfo(document); //打印出文档信息
}
}
}
package com.java_min.util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumberTools;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
public class File2DocumentUtils {
/**
*
* @param path
* @return
* Administrator Jun 16, 2010
*
* file到document的转换
*/
public static Document file2Document(String path){
File file = new File(path);
Document document = new Document();
document.add(new Field("name",file.getName(),Store.YES,Index.ANALYZED));
document.add(new Field("context",getFileContext(file),Store.YES,Index.ANALYZED));
document.add(new Field("size", NumberTools.longToString(file.length()),Store.YES,Index.NOT_ANALYZED));
document.add(new Field("path",file.getAbsolutePath(),Store.YES,Index.NO));
return document;
}
/**
*
* @param doc
* Administrator Jun 16, 2010
*
* document到file的转换
*/
public static void document2File(Document doc){
}
/**
*
* @param file
* @return
* Administrator Jun 16, 2010
*
* 得到文件内容
*/
public static String getFileContext(File file){
StringBuffer context = new StringBuffer();
try {
BufferedReader bf = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
for(String line = null;(line = bf.readLine()) != null;){
context.append(line).append("/n");
}
} catch (Exception e) {
throw new RuntimeException(e);
}
return context.toString() ;
}
/**
*
* @param document
* Administrator Jun 16, 2010
*
* 打印document
*
* 获取name属性的值的两种方法:
* 1. Field f = doc.getField("name");
* f.stringValue();
*
* 2. doc.get("name")
*/
public static void printDocumentInfo(Document document){
System.out.println("文件名为--------------" + document.get("name"));
System.out.println("文件内容为-------------" + document.get("context"));
System.out.println("文件大小为-------------" + NumberTools.stringToLong(document.get("size")));
System.out.println("文件路径为-------------" + document.get("path"));
}
}