lucene4中简单的query,以及与lucene3的一些区别

lucene4中有一些方法有很大的变动,以下代码可以看看与3的区别,

下面代码中解释

  1. File2DocumentUtils类就不上传了,就是将File转换成lucene中的Document,如果需要运行的话再贴出来
  2. lucene包用的是4.1.0
  3. 分词器用了IKAnalyzer和lucene标准的分词器StandardAnalyzer
  4. 简单的一些查询都有@test可以直接junit

package helloworld;

import java.io.File;
import java.io.IOException;
import java.io.StringReader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import org.wltea.analyzer.lucene.IKAnalyzer;

import utils.File2DocumentUtils;

public class HelloWorld {

	String filePath1 = "C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneDatasource\\README.txt";
	String filePath2 = "C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneDatasource\\Copy of README.txt";
	File indexFile = new File("C:\\Users\\Administrator.chaney-pc\\workspace\\luceneDemo\\luceneIndex");

	Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);

	// Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_41);

	@Test
	public void createIndex() throws IOException {
		Directory indexPath = FSDirectory.open(indexFile);
		Document document1 = File2DocumentUtils.file2Document(filePath1);
		Document document2 = File2DocumentUtils.file2Document(filePath2);
		IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_41, analyzer);
		IndexWriter indexWriter = new IndexWriter(indexPath, indexWriterConfig);
		indexWriter.addDocument(document1);
		indexWriter.addDocument(document2);
		indexWriter.close();
		indexPath.close();
	}

	public void search(Query query) throws Exception {

		Directory directory = FSDirectory.open(indexFile);
		IndexReader indexReader = DirectoryReader.open(directory);
		IndexSearcher indexSearcher = new IndexSearcher(indexReader);

		Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
		Scorer fragmentScorer = new QueryScorer(query);
		Highlighter highlighter = new Highlighter(formatter, fragmentScorer);

		Fragmenter fragmenter = new SimpleFragmenter(100);
		highlighter.setTextFragmenter(fragmenter);

		TopDocs topDocs = indexSearcher.search(query, 1000);
		// List<Document> documents = new ArrayList<Document>();
		System.out.println("搜索到的总数:" + topDocs.totalHits);
		for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
			int docID = scoreDoc.doc;
			Document document = indexSearcher.doc(docID);
			String hc = highlighter.getBestFragment(analyzer, "content", document.get("content"));
			if (hc != null) {
				document.removeField("content");
				document.add(new Field("content", hc, Store.YES, Index.ANALYZED));
			}

			File2DocumentUtils.printDocumentInfo(document);
		}

		indexReader.close();
		directory.close();
	}

	@Test
	public void search() throws Exception {
		String[] fields = { "name", "content" };

		QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_41, fields, analyzer);

		Query query = queryParser.parse("Introduction");
		search(query);
	}

	@Test
	public void testTerm() throws Exception {
		Term t = new Term("content", "introduction");
		Query query = new TermQuery(t);
		search(query);
	}

	@Test
	public void testRange() throws Exception {
		// Term t = new Term("content", "read");
		Query query = NumericRangeQuery.newLongRange("size", Long.valueOf(400), Long.valueOf(1000), true, true);
		search(query);
	}
	
	@Test
	public void testWildcardQuery() throws Exception {
		// Term t = new Term("content", "read");
		Term term = new Term("content", "luc?");
		Query query = new WildcardQuery(term);
		search(query);
	}
	
	@Test
	public void testBooleanQuery() throws Exception {
		// Term t = new Term("content", "read");
		Query query1 = NumericRangeQuery.newLongRange("size", Long.valueOf(400), Long.valueOf(1000), true, true);
		Term term = new Term("content", "luc*");
		Query query2 = new WildcardQuery(term);
		BooleanQuery booleanQuery = new BooleanQuery();
		booleanQuery.add(query1, Occur.MUST_NOT);
		booleanQuery.add(query2, Occur.MUST);
		search(booleanQuery);
	}
	
	@Test
	public void testPhraseQuery() throws Exception {
		// Term t = new Term("content", "read");
		Term term1 = new Term("content", "main");
		Term term2 = new Term("content", "page");
		PhraseQuery query = new PhraseQuery();
		query.add(term1);
		query.add(term2);
		query.setSlop(1);
		search(query);
	}
	
	

	@Test
	public void testAnalyzer() throws Exception {
		String string = "我是一个中国人  sdjfkajsdfjak ";
		Analyzer analyzer = new IKAnalyzer();
		TokenStream tokenStream = analyzer.tokenStream("content", new StringReader(string));
		CharTermAttribute termAtt = (CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class);
		TypeAttribute typeAtt = (TypeAttribute) tokenStream.getAttribute(TypeAttribute.class);
		tokenStream.reset();
		while (tokenStream.incrementToken()) {
			System.out.print("["+termAtt.toString()+"]");
			System.out.println(typeAtt.type());
		}
		// System.out.println(tokenStream.toString());

	}
}

转载于:https://my.oschina.net/u/942651/blog/149095

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值