Lucene的简单使用:在进行检索的时候是需要原来已经创建了索引才能检索到需要的内容的。所以在使用Lucene的时候大体就是两个方面,建立索引和检索,其次就是一些不能的策略了!
1.建立索引
- import java.io.File;
- import java.io.FileInputStream;
- import java.io.IOException;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.document.Field;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class Writer {
- public static void main(String args[]) throws IOException {
- String indexDir = "E:\\index\\test";//索引存放的路径
- String dataDir = "D:\\Backup\\Note";//这个是用来建立索引的数据源,此处是以一个一个的文件为例来建立索引的
- Directory dir = FSDirectory.open(new File(indexDir));
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);//建立一个对应版本的分析器
- IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);//这是用于在建立索引时配置信息的,默认是默认信息
- IndexWriter indexWriter = new IndexWriter(dir, config);
- File files[] = new File(dataDir).listFiles();
- for (File file:files) {
- Document doc = new Document();
- Field field1 = new Field("fileName",file.getName(), Field.Store.YES, Field.Index.ANALYZED);
- FileInputStream fis = new FileInputStream(file);
- int len = fis.available();
- byte[] bytes = new byte[len];
- fis.read(bytes);
- fis.close();
- String content = new String(bytes);
- Field field2 = new Field("content", content, Field.Store.YES, Field.Index.ANALYZED);
- doc.add(field1);
- doc.add(field2);
- //现在流行的检索工具都是把每一个信息源看作一个Document来进行处理的
- indexWriter.addDocument(doc);
- indexWriter.optimize();
- }
- int docs = indexWriter.numDocs();
- System.out.println("共索引了"+docs+"个文件!");
- indexWriter.close();
- }
- }
2.进行检索
- import java.io.File;
- import java.io.IOException;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.queryParser.MultiFieldQueryParser;
- import org.apache.lucene.queryParser.ParseException;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.search.TopScoreDocCollector;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.util.Version;
- public class Searcher {
- public static void main(String args[]) throws IOException, ParseException {
- String indexDir = "E:\\index\\test";
- Directory dir = FSDirectory.open(new File(indexDir));
- IndexSearcher indexSearcher = new IndexSearcher(dir);
- Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
- // QueryParser queryParser = new QueryParser(Version.LUCENE_31, "content", analyzer);
- QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_31, new String[] {"fileName","content"}, analyzer);
- Query query = queryParser.parse("内 的 值");
- TopScoreDocCollector collector = TopScoreDocCollector.create(3, true);//前面一个参数表示一次最多检索多少,这里检索的是永远从第一条开始
- indexSearcher.search(query, collector);
- TopDocs topDocs = collector.topDocs(2, 2);//第一个参数为从第多少个记录开始,第二个参数为每次取多少条记录
- // TopDocs topDocs = indexSearcher.search(query, 10);
- int totalHits = topDocs.totalHits;
- System.out.println("totalHits:"+totalHits);
- System.out.println();
- ScoreDoc scoreDocs[] = topDocs.scoreDocs;
- for (ScoreDoc scoreDoc:scoreDocs) {
- float score = scoreDoc.score;
- Document doc = indexSearcher.doc(scoreDoc.doc);
- System.out.println("fileName:"+doc.get("fileName"));
- // System.out.println("content:"+doc.get("content"));
- System.out.println("score:"+score);
- System.out.println();
- }
- indexSearcher.close();
- }
- }