Lucene的简单使用

1 篇文章 0 订阅
1 篇文章 0 订阅

Lucene的简单使用:在进行检索的时候是需要原来已经创建了索引才能检索到需要的内容的。所以在使用Lucene的时候大体就是两个方面,建立索引和检索,其次就是一些不能的策略了!

 

 

1.建立索引

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Writer {
	
	public static void main(String args[]) throws IOException {
		String indexDir = "E:\\index\\test";//索引存放的路径
		String dataDir = "D:\\Backup\\Note";//这个是用来建立索引的数据源,此处是以一个一个的文件为例来建立索引的
		Directory dir = FSDirectory.open(new File(indexDir));
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);//建立一个对应版本的分析器
		IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);//这是用于在建立索引时配置信息的,默认是默认信息
		IndexWriter indexWriter = new IndexWriter(dir, config);
		File files[] = new File(dataDir).listFiles();
		for (File file:files) {
			Document doc = new Document();
			Field field1 = new Field("fileName",file.getName(), Field.Store.YES, Field.Index.ANALYZED);
			FileInputStream fis = new FileInputStream(file);
			int len = fis.available();
			byte[] bytes = new byte[len];
			fis.read(bytes);
			fis.close();
			String content = new String(bytes);
			Field field2 = new Field("content", content, Field.Store.YES, Field.Index.ANALYZED);
			doc.add(field1);
			doc.add(field2);
//现在流行的检索工具都是把每一个信息源看作一个Document来进行处理的
			indexWriter.addDocument(doc);
			indexWriter.optimize();
		}
		int docs = indexWriter.numDocs();
		System.out.println("共索引了"+docs+"个文件!");
		indexWriter.close();
	}
	
}

 

 

2.进行检索

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

public class Searcher {

	public static void main(String args[]) throws IOException, ParseException {
		
		String indexDir = "E:\\index\\test";
		Directory dir = FSDirectory.open(new File(indexDir));
		IndexSearcher indexSearcher = new IndexSearcher(dir);
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
//		QueryParser queryParser = new QueryParser(Version.LUCENE_31, "content", analyzer);
		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_31, new String[] {"fileName","content"}, analyzer);
		Query query = queryParser.parse("内 的 值");
		TopScoreDocCollector collector = TopScoreDocCollector.create(3, true);//前面一个参数表示一次最多检索多少,这里检索的是永远从第一条开始
		indexSearcher.search(query, collector);
		TopDocs topDocs = collector.topDocs(2, 2);//第一个参数为从第多少个记录开始,第二个参数为每次取多少条记录
//		TopDocs topDocs = indexSearcher.search(query, 10);
		int totalHits = topDocs.totalHits;
		System.out.println("totalHits:"+totalHits);
		System.out.println();
		ScoreDoc scoreDocs[] = topDocs.scoreDocs;
		for (ScoreDoc scoreDoc:scoreDocs) {
			float score = scoreDoc.score;
			Document doc = indexSearcher.doc(scoreDoc.doc);
			System.out.println("fileName:"+doc.get("fileName"));
//			System.out.println("content:"+doc.get("content"));
			System.out.println("score:"+score);
			System.out.println();
		}
		indexSearcher.close();
		
	}
	
}

 

 

 

说明:依赖jar包:lucene-core-2.3.2.jar、IKAnalyzer3.2.8.jar。 一、LuceneUtil 工具类代码: package com.zcm.lucene; import java.io.File; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.wltea.analyzer.IKSegmentation; import org.wltea.analyzer.Lexeme; /** * Apache Lucene全文检索和IKAnalyzer分词工具类 * Company: 91注册码 * time:2014-04-22 * @author www.91zcm.com * @date * @version 1.1 */ public class LuceneUtil { /**索引创建的路径**/ private static String LucenePath = "d://index"; /** * 创建索引 * @throws Exception */ public static int createIndex(List list) throws Exception{ /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); /**注意最后一个boolean类型的参数:表示是否重新创建,true表示新创建(以前存在时回覆盖)**/ IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,true); for (int i = 0; i < list.size(); i++) { LuceneVO vo = (LuceneVO)list.get(i); Document doc = new Document(); Field FieldId = new Field("aid", String.valueOf(vo.getAid()),Field.Store.YES, Field.Index.NO); Field FieldTitle = new Field("title", vo.getTitle(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); Field FieldRemark = new Field("remark", vo.getRemark(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(FieldId); doc.add(FieldTitle); doc.add(FieldRemark); indexWriter.addDocument(doc); } /**查看IndexWriter里面有多少个索引**/ int num = indexWriter.docCount(); System.out.println("总共------》" + num); indexWriter.optimize(); indexWriter.close(); return num; } /** * IKAnalyzer分词 * @param word * @return * @throws IOException */ public static List tokenWord(String word) throws IOException{ List tokenArr = new ArrayList(); StringReader reader = new StringReader(word); /**当为true时,分词器进行最大词长切分**/ IKSegmentation ik = new IKSegmentation(reader, true); Lexeme lexeme = null; while ((lexeme = ik.next()) != null){ tokenArr.add(lexeme.getLexemeText()); } return tokenArr; } /** * 创建索引(单个) * @param list * @throws Exception */ public static void addIndex(LuceneVO vo) throws Exception { /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer, false); /**增加document到索引去 **/ Document doc = new Document(); Field FieldId = new Field("aid", String.valueOf(vo.getAid()),Field.Store.YES, Field.Index.NO); Field FieldTitle = new Field("title", vo.getTitle(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); Field FieldRemark = new Field("remark", vo.getRemark(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(FieldId); doc.add(FieldTitle); doc.add(FieldRemark); indexWriter.addDocument(doc); /**optimize()方法是对索引进行优化 **/ indexWriter.optimize(); indexWriter.close(); } /** * 创建索引(多个) * @param list * @throws Exception */ public static void addIndexs(List list) throws Exception { /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,false); /**增加document到索引去 **/ for (int i=0; i<list.size();i++){ LuceneVO vo = (LuceneVO)list.get(i); Document doc = new Document(); Field FieldId = new Field("aid", String.valueOf(vo.getAid()),Field.Store.YES, Field.Index.NO); Field FieldTitle = new Field("title", vo.getTitle(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); Field FieldRemark = new Field("remark", vo.getRemark(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(FieldId); doc.add(FieldTitle); doc.add(FieldRemark); indexWriter.addDocument(doc); } /**optimize()方法是对索引进行优化 **/ indexWriter.optimize(); indexWriter.close(); } /** * 更新索引(单个) * @param list * @throws Exception */ public static void updateIndex(LuceneVO vo) throws Exception { /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,false); /**增加document到索引去 **/ Document doc = new Document(); Field FieldId = new Field("aid", String.valueOf(vo.getAid()),Field.Store.YES, Field.Index.NO); Field FieldTitle = new Field("title", vo.getTitle(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); Field FieldRemark = new Field("remark", vo.getRemark(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(FieldId); doc.add(FieldTitle); doc.add(FieldRemark); Term term = new Term("aid",String.valueOf(vo.getAid())); indexWriter.updateDocument(term, doc); /**optimize()方法是对索引进行优化 **/ indexWriter.optimize(); indexWriter.close(); } /** * 创建索引(多个) * @param list * @throws Exception */ public static void updateIndexs(List list) throws Exception { /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,false); /**增加document到索引去 **/ for (int i=0; i<list.size();i++){ LuceneVO vo = (LuceneVO)list.get(i); Document doc = new Document(); Field FieldId = new Field("aid", String.valueOf(vo.getAid()),Field.Store.YES, Field.Index.NO); Field FieldTitle = new Field("title", vo.getTitle(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); Field FieldRemark = new Field("remark", vo.getRemark(), Field.Store.YES,Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(FieldId); doc.add(FieldTitle); doc.add(FieldRemark); Term term = new Term("aid",String.valueOf(vo.getAid())); indexWriter.updateDocument(term, doc); } /**optimize()方法是对索引进行优化 **/ indexWriter.optimize(); indexWriter.close(); } /** * 创建索引(单个) * @param list * @throws Exception */ public static void deleteIndex(LuceneVO vo) throws Exception { /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,false); Term term = new Term("aid",String.valueOf(vo.getAid())); indexWriter.deleteDocuments(term); /**optimize()方法是对索引进行优化 **/ indexWriter.optimize(); indexWriter.close(); } /** * 创建索引(多个) * @param list * @throws Exception */ public static void deleteIndexs(List list) throws Exception { /**这里放索引文件的位置**/ File indexDir = new File(LucenePath); Analyzer luceneAnalyzer = new StandardAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, luceneAnalyzer,false); /**删除索引 **/ for (int i=0; i<list.size();i++){ LuceneVO vo = (LuceneVO)list.get(i); Term term = new Term("aid",String.valueOf(vo.getAid())); indexWriter.deleteDocuments(term); } /**optimize()方法是对索引进行优化 **/ indexWriter.optimize(); indexWriter.close(); } /** * 检索数据 * @param word * @return */ public static List search(String word) { List list = new ArrayList(); Hits hits = null; try { IndexSearcher searcher = new IndexSearcher(LucenePath); String[] queries = {word,word}; String[] fields = {"title", "remark"}; BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD}; Query query = MultiFieldQueryParser.parse(queries, fields, flags, new StandardAnalyzer()); if (searcher != null) { /**hits结果**/ hits = searcher.search(query); LuceneVO vo = null; for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); vo = new LuceneVO(); vo.setAid(Integer.parseInt(doc.get("aid"))); vo.setRemark(doc.get("remark")); vo.setTitle(doc.get("title")); list.add(vo); } } } catch (Exception ex) { ex.printStackTrace(); } return list; } } 二、Lucene用到的JavaBean代码: package com.zcm.lucene; /** * Apache Lucene全文检索用到的Bean * Company: 91注册码 * time:2014-04-22 * @author www.91zcm.com * @date * @version 1.1 */ public class LuceneVO { private Integer aid; /**文章ID**/ private String title; /**文章标题**/ private String remark; /**文章摘要**/ public Integer getAid() { return aid; } public void setAid(Integer aid) { this.aid = aid; } public String getTitle() { return title; } public void setTitle(String title) { this.title = title; } public String getRemark() { return remark; } public void setRemark(String remark) { this.remark = remark; } } 备注:源码来源于www.91zcm.com 开源博客中的全文检索代码。(http://www.91zcm.com/)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值