做了两个简单的例子
创建索引
- package com.langhua;
- import java.io.File;
- import java.io.FileReader;
- import java.io.IOException;
- import java.util.Date;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.DateTools;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.SimpleFSDirectory;
- import org.apache.lucene.util.Version;
- /**
- * 创建索引 Lucene 3.0+
- * @author Administrator
- *
- */
- public class Indexer {
- /**
- * @param args
- * @throws IOException
- */
- public static void main(String[] args) throws IOException {
- //保存索引文件的地方
- String indexDir = "F://indexDir";
- //将要搜索TXT文件的地方
- String dateDir = "F://dateDir";
- IndexWriter indexWriter = null;
- //创建Directory对象
- Directory dir = new SimpleFSDirectory(new File(indexDir));
- //创建IndexWriter对象,第一个参数是Directory,第二个是分词器,第三个表示是否是创建,如果为false为在此基础上面修改,第四表示表示分词的最大值,比如说new MaxFieldLength(2),就表示两个字一分,一般用IndexWriter.MaxFieldLength.LIMITED
- indexWriter = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED);
- File[] files = new File(dateDir).listFiles();
- for (int i = 0; i < files.length; i++) {
- Document doc = new Document();
- //创建Field对象,并放入doc对象中
- doc.add(new Field("contents", new FileReader(files[i])));
- doc.add(new Field("filename", files[i].getName(),
- Field.Store.YES, Field.Index.NOT_ANALYZED));
- doc.add(new Field("indexDate",DateTools.dateToString(new Date(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.NOT_ANALYZED));
- //写入IndexWriter
- indexWriter.addDocument(doc);
- }
- //查看IndexWriter里面有多少个索引
- System.out.println("numDocs"+indexWriter.numDocs());
- indexWriter.close();
- }
- }
package com.langhua;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
/**
* 创建索引 Lucene 3.0+
* @author Administrator
*
*/
public class Indexer {
/**
* @param args
* @throws IOException
*/
public static void main(String[] args) throws IOException {
//保存索引文件的地方
String indexDir = "F://indexDir";
//将要搜索TXT文件的地方
String dateDir = "F://dateDir";
IndexWriter indexWriter = null;
//创建Directory对象
Directory dir = new SimpleFSDirectory(new File(indexDir));
//创建IndexWriter对象,第一个参数是Directory,第二个是分词器,第三个表示是否是创建,如果为false为在此基础上面修改,第四表示表示分词的最大值,比如说new MaxFieldLength(2),就表示两个字一分,一般用IndexWriter.MaxFieldLength.LIMITED
indexWriter = new IndexWriter(dir,new StandardAnalyzer(Version.LUCENE_30),true,IndexWriter.MaxFieldLength.UNLIMITED);
File[] files = new File(dateDir).listFiles();
for (int i = 0; i < files.length; i++) {
Document doc = new Document();
//创建Field对象,并放入doc对象中
doc.add(new Field("contents", new FileReader(files[i])));
doc.add(new Field("filename", files[i].getName(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("indexDate",DateTools.dateToString(new Date(), DateTools.Resolution.DAY),Field.Store.YES,Field.Index.NOT_ANALYZED));
//写入IndexWriter
indexWriter.addDocument(doc);
}
//查看IndexWriter里面有多少个索引
System.out.println("numDocs"+indexWriter.numDocs());
indexWriter.close();
}
}
搜索索引 Lucene 3.0+
- package com.langhua;
- import java.io.File;
- import java.io.IOException;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.SimpleFSDirectory;
- import org.apache.lucene.util.Version;
- /**
- * 搜索索引 Lucene 3.0+
- * @author Administrator
- *
- */
- public class Searcher {
- public static void main(String[] args) throws IOException, ParseException {
- //保存索引文件的地方
- String indexDir = "F://indexDir";
- Directory dir = new SimpleFSDirectory(new File(indexDir));
- //创建 IndexSearcher对象,相比IndexWriter对象,这个参数就要提供一个索引的目录就行了
- IndexSearcher indexSearch = new IndexSearcher(dir);
- //创建QueryParser对象,第一个参数表示Lucene的版本,第二个表示搜索Field的字段,第三个表示搜索使用分词器
- QueryParser queryParser = new QueryParser(Version.LUCENE_30,
- "contents", new StandardAnalyzer(Version.LUCENE_30));
- //生成Query对象
- Query query = queryParser.parse("langhua9527");
- //搜索结果 TopDocs里面有scoreDocs[]数组,里面保存着索引值
- TopDocs hits = indexSearch.search(query, 10);
- //hits.totalHits表示一共搜到多少个
- System.out.println("找到了"+hits.totalHits+"个");
- //循环hits.scoreDocs数据,并使用indexSearch.doc方法把Document还原,再拿出对应的字段的值
- for (int i = 0; i < hits.scoreDocs.length; i++) {
- ScoreDoc sdoc = hits.scoreDocs[i];
- Document doc = indexSearch.doc(sdoc.doc);
- System.out.println(doc.get("filename"));
- }
- indexSearch.close();
- }
- }