import java.io.File;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.nstcrm.person.model.Person;
/**
* @author Sam 时间:2011-9-9 下午2:29:22
*/
public class LuceneTool {
//使用分词器
public static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);
public static void create(Person person) throws Exception {
// 1,启动时读取
//Directory ramDir = new RAMDirectory();
//存放索引的文件夹
Directory dir = FSDirectory.open(new File("d:\\index"));
//使用索引文件夹,分词器创建IndexWriter
IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED);
Document doc = new Document();
doc.add(new Field("chName",person.getChName(),Store.YES,Index.ANALYZED));
doc.add(new Field("enName",person.getEnName(),Store.YES,Index.ANALYZED));
doc.add(new Field("nickName",person.getNickName(),Store.YES,Index.ANALYZED));
indexWriter.addDocument(doc);
indexWriter.commit();
indexWriter.optimize();
indexWriter.close();
}
public static void search(String str) throws Exception {
// 1,把要搜索的文本解析为 Query
String[] fields = {"chName", "enName" ,"nickName"};
QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_33, fields, analyzer);
Query query = queryParser.Query(str);
// 2,进行查询
IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(new File("d:\\index")));
Filter filter = null;
TopDocs topDocs = indexSearcher.search(query, filter, 10000);
System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");
// 3,打印结果
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int docSn = scoreDoc.doc; // 文档内部编号
Document doc = indexSearcher.doc(docSn); // 根据编号取出相应的文档
// 打印出文档信息
System.out.println("chName: " + doc.get("chName"));
System.out.println("enName: " + doc.get("enName"));
System.out.println("nickName: " + doc.get("nickName"));
}
}
}
**************************************************************************************************************************************************************
**************************************************************************************************************************************************************
package cn.sam.query.junit;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import cn.sam.query.utils.DocumentTools;
/**
* @author Sam 时间:2011-10-4 下午8:49:52
*/
@SuppressWarnings("deprecation")
public class LuceneTest {
private String rootPath = System.getProperty("user.dir").replace("\\", "/");//项目绝对路径
private String filePath = rootPath + "/luceneData/data_test.txt";
private String indexPath = rootPath + "/luceneIndex";
private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
//private HttpServletRequest request = ServletActionContext.getRequest();
@Test
public void test() {
//filePath = request.getRequestURL().toString();
}
/**
* 创建索引
* @author Sam 时间:2011-10-4 下午9:46:01
* @throws Exception
*/
@Test
public void createIndex() throws Exception {
//Directory ramDir = new RAMDirectory();
// 建立索引 IndexWriter 是用来操作(增、删、改)索引库
IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
analyzer, true, MaxFieldLength.LIMITED);
Document doc = DocumentTools.getDoc(filePath);
iw.addDocument(doc);
iw.commit();
iw.optimize();//优化
iw.close();
System.out.println("已创建索引!");
}
/**
* 查询
* @author Sam 时间:2011-10-4 下午10:37:38
* @throws Exception
*/
@Test
public void query() throws Exception {
String keywords = "房间";
Map<String, String> mapDoc = new HashMap<String, String>();
long start = System.currentTimeMillis();
// 把要搜索的文本解析为 Query
String[] fields = { "name", "content" };
QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer);
Query query = qp.parse(keywords);
//进行查询
IndexSearcher is = new IndexSearcher(FSDirectory.open(new File(indexPath)));
Filter filter = null;
TopDocs tds = is.search(query, filter, 10000);
long end = System.currentTimeMillis();
System.out.println("找到约 " + tds.totalHits + " 条结果 (用时 " + (double)(end - start)/1000d + " 秒)");
//获取查询结果
for (ScoreDoc sd : tds.scoreDocs) {
int sdId = sd.doc;// 文档内部编号
Document doc = is.doc(sdId);//根据编号取出相应的文档
mapDoc = DocumentTools.readDoc(doc);
System.out.println(mapDoc.get("content"));
}
}
}
package cn.sam.query.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumberTools;
/**
* @author Sam 时间:2011-10-4 下午9:45:13
*/
@SuppressWarnings("deprecation")
public class DocumentTools {
/**
* 文件:name, content, size, path
* @author Sam 时间:2011-10-4 下午10:03:21
* @param path
* @return Document
*/
public static Document getDoc(String path) {
File file = new File(path);
Document doc = new Document();
doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
doc.add(new Field("content", readFile(file), Store.YES, Index.ANALYZED));
doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
return doc;
}
/**
* 读取文件内容
*/
public static String readFile(File file) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
StringBuffer sb = new StringBuffer();
String tem = null;
while ((tem = br.readLine()) != null) {
sb.append(tem).append("\n");
}
return sb.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* 将document打印输出
* @author Sam 时间:2011-10-4 下午10:19:50
* @param doc
* @return map
*/
public static Map<String, String> readDoc(Document doc) {
Map<String, String> map = new HashMap<String, String>();
map.put("name", doc.get("name"));
map.put("content", doc.get("content"));
map.put("size", doc.get("size"));
map.put("path", doc.get("path"));
return map;
}
}