lucene简单实例

import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.nstcrm.person.model.Person;

/**
 * @author Sam 时间:2011-9-9 下午2:29:22
 */
public class LuceneTool {
	
	
    //使用分词器  
    public static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_33);
	
	public static void create(Person person) throws Exception {
		// 1,启动时读取
		//Directory ramDir = new RAMDirectory();
		//存放索引的文件夹  
	    Directory dir = FSDirectory.open(new File("d:\\index")); 
        //使用索引文件夹,分词器创建IndexWriter  
        IndexWriter indexWriter = new IndexWriter(dir, analyzer, MaxFieldLength.LIMITED); 
        Document doc = new Document();
        doc.add(new Field("chName",person.getChName(),Store.YES,Index.ANALYZED));
        doc.add(new Field("enName",person.getEnName(),Store.YES,Index.ANALYZED));
        doc.add(new Field("nickName",person.getNickName(),Store.YES,Index.ANALYZED));
        indexWriter.addDocument(doc);
        indexWriter.commit();
        indexWriter.optimize();
        indexWriter.close();
	}
	
	public static void search(String str) throws Exception {
		// 1,把要搜索的文本解析为 Query
		String[] fields = {"chName", "enName" ,"nickName"};
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_33, fields, analyzer);
		Query query = queryParser.Query(str);
		// 2,进行查询
		IndexSearcher indexSearcher = new IndexSearcher(FSDirectory.open(new File("d:\\index")));
		Filter filter = null;
		TopDocs topDocs = indexSearcher.search(query, filter, 10000);
		System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");
		// 3,打印结果
		for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
			int docSn = scoreDoc.doc; // 文档内部编号
			Document doc = indexSearcher.doc(docSn); // 根据编号取出相应的文档

			// 打印出文档信息
			System.out.println("chName: " + doc.get("chName"));
			System.out.println("enName: " + doc.get("enName"));
			System.out.println("nickName: " + doc.get("nickName"));
		}
	}
}


**************************************************************************************************************************************************************

**************************************************************************************************************************************************************

package cn.sam.query.junit;


import java.io.File;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;

import cn.sam.query.utils.DocumentTools;

/**
 * @author Sam 时间:2011-10-4 下午8:49:52
 */
@SuppressWarnings("deprecation")
public class LuceneTest {

	private String rootPath = System.getProperty("user.dir").replace("\\", "/");//项目绝对路径
	private String filePath = rootPath + "/luceneData/data_test.txt";
	private String indexPath = rootPath + "/luceneIndex";
	private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
	//private HttpServletRequest request = ServletActionContext.getRequest();
	
	@Test
	public void test() {
		//filePath = request.getRequestURL().toString();
	}
	
	/**
	 * 创建索引
	 * @author Sam 时间:2011-10-4 下午9:46:01	
	 * @throws Exception
	 */
	@Test
	public void createIndex() throws Exception {
		//Directory ramDir = new RAMDirectory();
		// 建立索引 IndexWriter 是用来操作(增、删、改)索引库
		IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)), 
			analyzer, true, MaxFieldLength.LIMITED);
		Document doc = DocumentTools.getDoc(filePath);
		iw.addDocument(doc);
		iw.commit();
		iw.optimize();//优化
		iw.close();
		System.out.println("已创建索引!");
	}
	
	/**
	 * 查询
	 * @author Sam 时间:2011-10-4 下午10:37:38	
	 * @throws Exception
	 */
	@Test
	public void query() throws Exception {
		String keywords = "房间";
		
		Map<String, String> mapDoc = new HashMap<String, String>();
		long start = System.currentTimeMillis();
		// 把要搜索的文本解析为 Query
		String[] fields = { "name", "content" };
		QueryParser qp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer);
		Query query = qp.parse(keywords);
		
		//进行查询
		IndexSearcher is = new IndexSearcher(FSDirectory.open(new File(indexPath)));
		Filter filter = null;
		TopDocs tds = is.search(query, filter, 10000);
		long end = System.currentTimeMillis();
		System.out.println("找到约 " + tds.totalHits + " 条结果 (用时 " + (double)(end - start)/1000d + " 秒)");
		
		//获取查询结果
		for (ScoreDoc sd : tds.scoreDocs) {
			int sdId = sd.doc;// 文档内部编号
			Document doc = is.doc(sdId);//根据编号取出相应的文档
			mapDoc = DocumentTools.readDoc(doc);
			System.out.println(mapDoc.get("content"));
		}
	}

}

package cn.sam.query.utils;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.NumberTools;

/**
 * @author Sam 时间:2011-10-4 下午9:45:13
 */
@SuppressWarnings("deprecation")
public class DocumentTools {

	/**
	 * 文件:name, content, size, path
	 * @author Sam 时间:2011-10-4 下午10:03:21	
	 * @param path
	 * @return Document
	 */
	public static Document getDoc(String path) {
		File file = new File(path);
		Document doc = new Document();
		doc.add(new Field("name", file.getName(), Store.YES, Index.ANALYZED));
		doc.add(new Field("content", readFile(file), Store.YES, Index.ANALYZED));
		doc.add(new Field("size", NumberTools.longToString(file.length()), Store.YES, Index.NOT_ANALYZED));
		doc.add(new Field("path", file.getAbsolutePath(), Store.YES, Index.NOT_ANALYZED));
		return doc;
	}
	
	/**
	 * 读取文件内容
	 */
	public static String readFile(File file) {
		try {
			BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file)));
			StringBuffer sb = new StringBuffer();
			String tem = null; 
			while ((tem = br.readLine()) != null) {
				sb.append(tem).append("\n");
			}
			return sb.toString();
		} catch (Exception e) {
			throw new RuntimeException(e);
		}
	}
	
	/**
	 * 将document打印输出
	 * @author Sam 时间:2011-10-4 下午10:19:50	
	 * @param doc
	 * @return map
	 */
	public static Map<String, String> readDoc(Document doc) {
		Map<String, String> map = new HashMap<String, String>();
		map.put("name", doc.get("name"));
		map.put("content", doc.get("content"));
		map.put("size", doc.get("size"));
		map.put("path", doc.get("path"));
		
		return map;
	}
}



评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值