前面两篇博文已经介绍过了Lucene,大家也对Lucene有了初步了解,我呢,在这里给出我项目中的一些实际的例子,这些例子中包含了
对索引的增删改查操作,还包括关键字高亮~~~
当然这些例子建立在Lucene3.0的基础之上,是对txt文件创建的索引
好的以下是代码~~
创建索引类:
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
public class Index{
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// 保存索引文件的地方
String indexDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";
// 将要搜索TXT文件的地方
String dateDir = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\data";
IndexWriter indexWriter = null;
// 创建Directory对象 ,FSDirectory代表待索引的文件存在磁盘上
Directory dir = new SimpleFSDirectory(new File(indexDir));
// 创建IndexWriter对象,第一个参数是Directory,第二个是分词器,第三个表示是否是创建,如果为false为在此基础上面修改,第四表示表示分词的最大值,比如说new
// MaxFieldLength(2),就表示两个字一分,一般用IndexWriter.MaxFieldLength.LIMITED
indexWriter = new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), true, IndexWriter.MaxFieldLength.UNLIMITED);
File[] files = new File(dateDir).listFiles();
for (int i = 0; i < files.length; i++) {
Document doc = new Document();
// 创建Field对象,并放入doc对象中
doc.add(new Field("id", i+"", Field.Store.YES,
Field.Index.NOT_ANALYZED));
doc.add(new Field("contents", new FileReader(files[i])));
doc.add(new Field("path", files[i].getAbsolutePath(), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
DateTools.Resolution.DAY), Field.Store.YES,
Field.Index.NOT_ANALYZED));
// 写入IndexWriter
indexWriter.addDocument(doc);
}
// 查看IndexWriter里面有多少个索引
System.out.println("总共------》" + indexWriter.numDocs());
indexWriter.close();
}
}
查找索引类:
import java.io.File;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* @author ht
* 查询
*
*/
public class Seacher {
private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";//索引所在的路径
private static String KEYWORD = " ";//关键词
private static int TOP_NUM = 10;//显示前10条结果
public static void main(String[] args) throws Exception {
File indexDir = new File(INDEX_DIR);
if (!indexDir.exists() || !indexDir.isDirectory()) {
throw new Exception(indexDir +" 该目录不存在~~");
}
search(indexDir, KEYWORD);//调用search方法进行查询
}
/**查询
* @param indexDir
* @param q
* @throws Exception
*/
public static void search(File indexDir, String q) throws Exception {
IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir),true);//read-only
String field = "contents";
QueryParser parser = new QueryParser(Version.LUCENE_30, field, new StandardAnalyzer(Version.LUCENE_30));//有变化的地方
Query query = parser.parse(q);
TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM , false);//有变化的地方
long start = new Date().getTime();// start time
is.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = is.doc(hits[i].doc);//new method is.doc()
System.out.println("路径:"+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间:"+doc.get("indexDate"));
}
long end = new Date().getTime();//end time
System.out.println("\n找到" + collector.getTotalHits() +
"个结果,总共花费时间 : " + (end - start)+"毫秒"
);
}
}
增加索引:
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/*
* 添加索引
*
*
*/
public class addIndex {
public static void main(String s[]) throws IOException, ParseException
{
Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引)
Document doc=new Document();
doc.add(new Field("contents", "java", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("path", "新添加的路径www.baidu.com", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
DateTools.Resolution.DAY), Field.Store.YES,
Field.Index.NOT_ANALYZED));
iw.addDocument(doc);
IndexReader id=IndexReader.open(dir);//
System.out.println("总共索引数"+id.maxDoc());
iw.commit();
iw.close();
System.out.println("删除条数:"+id.numDeletedDocs());
}
}
删除索引:
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/*
* 删除索引
*
*
*/
public class deleteIndex {
public static void main(String s[]) throws IOException, ParseException
{
Directory dir=FSDirectory.open(new File("C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
IndexWriter iw=new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);//注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引)
QueryParser qp =new QueryParser(Version.LUCENE_30,"contents",new StandardAnalyzer(Version.LUCENE_30));
Query p=qp.parse("新添加的contents");
iw.deleteDocuments(p);
IndexReader id=IndexReader.open(dir);//
System.out.println("总共索引数"+id.maxDoc());
iw.commit();
System.out.println("删除条数:"+id.numDeletedDocs());
}
}
更新索引:
import java.awt.font.OpenType;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/*
* 更新索引
*
*
*/
public class updateIndex {
public static void main(String s[]) throws IOException, ParseException {
Directory dir = FSDirectory.open(new File(
"C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index"));
IndexSearcher is = new IndexSearcher(dir, true);// read-only
IndexWriter iw = new IndexWriter(dir, new StandardAnalyzer(
Version.LUCENE_30), false, IndexWriter.MaxFieldLength.UNLIMITED);// 注意在执行删除索引时IndexWriter中的Boolean属性必须为false;否则其他索引会被删除(true为重新建立索引,false为不重新建索引)
//新建要拿来替换的doc
Document doc = new Document();
doc.add(new Field("contents", "android", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("path", "新添加的路径www.baidu.com android", Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("indexDate", DateTools.dateToString(new Date(),
DateTools.Resolution.DAY), Field.Store.YES,
Field.Index.ANALYZED));
/*
* 使用indexwriter中的updatedocument不知道为什么。只添加,不会更新
*
* 所以这里只好采用 “先删除,再添加” 的方式就行更新操作了
*/
long start = new Date().getTime();// start time
///查找要被替换的所有对应的项
QueryParser qp = new QueryParser(Version.LUCENE_30, "contents",
new StandardAnalyzer(Version.LUCENE_30));
Query p = qp.parse("java");
/*
* 查找模块,查找要更新的项
*/
TopScoreDocCollector collector = TopScoreDocCollector.create(10, false);// 有变化的地方
is.search(p, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
//
//删除所有查到的,只添加一条
// if (hits.length > 0)
// {// 控制添加,如果有要更新的项,就进行,没有就不进行
// /*
// * 这里有个问题要注意:当搜索出多个要更新的项,,将所有的项都删除了,但是就添加了一个,这里可以用id唯一标示来解决这个问题,
// * 也可以用hits数组来循环删除添加,来解决这个问题,如注释1
// * 但是呢,对于数据库操作时,基本不会出现这样的问题,因为数据库中有id项,直接对id进行查询就行了,不会出现重复项
// */
// // 删除操作
// iw.deleteDocuments(p);
// // 添加操作
// iw.addDocument(doc);
//
// }
/*注释1 删除几条,添加几条*/
if(hits.length > 0){//如果有更新项才进行操作,否则不进行
iw.deleteDocuments(p);//删除只执行一次就行,就可以将所有的删除
for(int i=0 ; i < hits.length ;i++){
iw.addDocument(doc);
}
}
iw.optimize();//优化索引
iw.close();
IndexReader id = IndexReader.open(dir);
System.out.println("总共索引数" + id.maxDoc());
long end = new Date().getTime();// end time
System.out.println("删除条数:" + id.numDeletedDocs() + "总共花费时间:"
+ (end - start));
}
}
关键字高亮:
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import java.util.Date;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class HighLighter {
/**
* 高亮设置
*
* @param query
* @param doc
* @param field
* @return
*/
private static String INDEX_DIR = "C:\\Users\\ZEQ\\Desktop\\ZZUluceneTest1\\index";// 索引所在的路径
private static String KEYWORD = "android";// 关键词
private static int TOP_NUM = 100;// 显示前100条结果
private static String toHighlighter(Query query, Document doc, String field)
{// 关键字高亮显示
try {
SimpleHTMLFormatter simpleHtmlFormatter = new SimpleHTMLFormatter(
"<font color=\"red\">", "</font>");
Highlighter highlighter = new Highlighter(simpleHtmlFormatter,
new QueryScorer(query));
StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
TokenStream tokenStream1 = analyzer.tokenStream("path",
new StringReader(doc.get(field)));
String highlighterStr = highlighter.getBestFragment(tokenStream1,
doc.get(field));
return highlighterStr == null ? doc.get(field) : highlighterStr;
} catch (IOException e) {
// TODO Auto-generated catch block
System.out.println(e);
} catch (InvalidTokenOffsetsException e) {
// TODO Auto-generated catch block
System.out.println(e);
}
return null;
}
public static void main(String[] args) throws Exception
{
File indexDir = new File(INDEX_DIR);
if (!indexDir.exists() || !indexDir.isDirectory()) {
throw new Exception(indexDir + " 该目录不存在~~");
}
search(indexDir, KEYWORD);// 调用search方法进行查询
}
/**
* 查询
*
* @param indexDir
* @param q
* @throws Exception
*/
public static void search(File indexDir, String q) throws Exception
{
IndexSearcher is = new IndexSearcher(FSDirectory.open(indexDir), true);// read-only
String field = "contents";
QueryParser parser = new QueryParser(Version.LUCENE_30, field,
new StandardAnalyzer(Version.LUCENE_30));// 有变化的地方
Query query = parser.parse(q);
TopScoreDocCollector collector = TopScoreDocCollector.create(TOP_NUM,
false);// 有变化的地方
long start = new Date().getTime();// 计算开始时间
is.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document doc = is.doc(hits[i].doc);// new method is.doc()
// System.out.println("路径:"+doc.get("path")+"---------------------"+doc.getField("path")+"\n创建时间:"+doc.get("indexDate"));
System.out.println("高亮----------------"
+ toHighlighter(query, doc, "path") + "\n 创建时间:"
+ doc.get("indexDate"));
}
long end = new Date().getTime();// 计算结束时间
System.out.println("\n找到" + collector.getTotalHits() + "个结果,总共花费时间 : "
+ (end - start) + "毫秒");
}
}
以上是我自己整理的,亲自调试过,没什么问题~~~对于更新索引操作我目前还没什么好的办法!!!希望大家批评指正!
在这里把源代码以及架包和索引例子给大家贡献出来~~~~
http://download.csdn.net/detail/zeq9069/6571589