lucence 实例3

 各种 样式 的搜索  不同类型的搜索 

 

package com.lucene;


import java.io.File;
import java.io.FileReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
/**
* 各种 样式 的搜索
*
* @author dengyangyang
*
*/
public class TestIndex3 {

/**
* @param args
*/
// static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);//内置分词器
static Analyzer analyzer = new IKAnalyzer(); // IK分词器 第三方
public static void createIndexByPath(File indexDir, File dataDir) {
try {
Directory dir = new SimpleFSDirectory(indexDir);
IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_35,analyzer);
iwConf.setOpenMode(OpenMode.CREATE);
IndexWriter indexWriter = new IndexWriter(dir, iwConf);
long startTime = new Date().getTime();
File[] files = dataDir.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isFile()) {
// System.out.println(files[i].getName());
Document document = new Document();
document.add(new Field("path", files[i].getCanonicalPath(),Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("filename", files[i].getName(),Field.Store.YES, Field.Index.ANALYZED));
document.add(new Field("contents", new FileReader(files[i])));
indexWriter.addDocument(document);
}
}
indexWriter.close();
long endTime = new Date().getTime();
System.out.println("创建索引功耗时:" + (endTime - startTime) / 1000 + "s");
} catch (Exception e) {
e.printStackTrace();
}
}

public static void searchIndexByString(String searchStr,String fileName,File indexDir) {
try {
QueryParser queryParser = new QueryParser(Version.LUCENE_35,fileName, analyzer);//解析查询表达式
Query query = queryParser.parse(searchStr);//S0,可以使用一些表达式 ,如:+a +b ab aORb a AND b * ~ *:*等等
System.out.println("query = "+query);

// Query query = new TermQuery(new Term("filename","面"));//S1 对特定项的搜索 结构与分词有关

// TermRangeQuery query = new TermRangeQuery("filename","a","d",true,true);//S2在指定范围内搜索
// NumericRangeQuery query =NumericRangeQuery.newIntRange("publish_date",2011,2012,true,true);S3//在指定数字范围内搜索

// PrefixQuery query = new PrefixQuery(new Term("path","//面试与就业"));//S4通过字符串搜索

// BooleanQuery query = new BooleanQuery();//S5 组合查询 合并子查询
// query.add(term_S1,BooleanClause.Occur.MUST);
// query.add(term_S3,BooleanClause.Occur.MUST);

// PhraseQuery query = new PhraseQuery();//S6通过短语搜索
// query.setSlop(1);//S6
// query.add(new Term("性格"));//S6

// Query query = new WildcardQuery(new Term("filename","面试"));//S7通配符查询

// Query query = new FuzzyQuery(new Term("filename","性格"));//S8 搜索类似项

// Query query = new MatchAllDocsQuery("性格");//S9 匹配所有文档

IndexReader reader = IndexReader.open(new SimpleFSDirectory(indexDir));//reader.reopen();
IndexSearcher searcher = new IndexSearcher(reader);
long startTime = new Date().getTime();

TopDocs topDocs = searcher.search(query, 10);//默认的搜索方法
System.out.println("共有文件:" + topDocs.totalHits);
for (int i = 0; i < topDocs.scoreDocs.length; i++) {
Document document = searcher.doc(topDocs.scoreDocs[i].doc);
System.out.println("num = "+i+" path=" + document.get("path")+" filename=" + document.get("filename"));
}
reader.close();
searcher.close();
long endTime = new Date().getTime();
System.out.println("搜索功耗时:" + (endTime - startTime)+"ms");
} catch (Exception e) {
e.printStackTrace();
}

}
public static void main(String[] args) {
File indexDir = new File("D:\\luceneIndex");
File dataDir = new File("F:\\面试与就业");

createIndexByPath(indexDir, dataDir);
// searchIndexByString("面试与就业", "path", indexDir);
searchIndexByString("面试","filename",indexDir);
}

}

 

本人还有关于lucene的 高级搜索,排序,分类,过滤,高亮显示,分页等等,就不写出,详情参考《lucene in action》

学会了我这三个实例 基本流程 你就会了  所谓万变不离其中  

祝初学者  学习顺利 

加油一起!

转载于:https://my.oschina.net/dyyweb/blog/42850

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值