luence相关textFiled stringFiled store.NO.YES及相关搜索代码

main测试代码

package cn.tedu.test2;

import java.nio.file.Paths;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import cn.tedu.test.IKAnalyzer6x;

public class SearchIndex {
//单个词项匹配
@Test
public void termquery() throws Exception {
//1.指定文件夹索引位置
FSDirectory dir = FSDirectory.open(Paths.get(“c://index01”));
//2.根据文件夹位置生成reader流
DirectoryReader reader = DirectoryReader.open(dir);
//3.创建搜索对象
IndexSearcher search = new IndexSearcher(reader);
//4.准备搜索条件
Term term = new Term(“productCat”,“小”);
TermQuery query = new TermQuery(term);
/*
* 搜索封装了document大量标识数据对象(没有源数据)
* document 评分等,根据查询条件不同,自动计算评分
* 词项中,根据字符串匹配长度,长度大,评分越高
* 查询的条数一共多少条
*/
//4.查询前10条
TopDocs topDoc = search.search(query, 10);
System.out.println(“最高分:”+topDoc.getMaxScore());
System.out.println(“一共获取数据:”+topDoc.totalHits);
//5.利用浅查询得到的评分对象拿到doucumentId
ScoreDoc[] scoreDocs = topDoc.scoreDocs;
for (ScoreDoc scoreDoc : scoreDocs) {
//每次循环都获取返回结果中一个document评分相关内容
System.out.println(“当前docid:”+scoreDoc.doc);
System.out.println(“当前doc评分:”+scoreDoc.score);
//利用documentId获取源数据, 拿不到Store.NO的数据
Document doc = search.doc(scoreDoc.doc);
System.out.println(“productName”+doc.get(“productName”));
System.out.println(“productImage”+doc.get(“productImage”));

	}
	
}

//根据在url输入的字符,对字符进行分词,匹配luence分好的词
@Test
public void multilFieldQuery() throws Exception{
	Directory dir=FSDirectory
			.open(Paths.get("c://index01"));
	IndexReader reader=DirectoryReader.open(dir);
	IndexSearcher search=new IndexSearcher(reader);
	//生成多域查询条件
		//生成解析器 解析字符串形成多个分词的结果
	String[] fields={"productName","productCat"};
	Analyzer analyzer=new IKAnalyzer6x();
	MultiFieldQueryParser parser=new 
			MultiFieldQueryParser(fields, analyzer);
	//利用解析器生成多域查询条件
	Query query=parser.parse("大小功率的灯泡节能效果不一样");
	/*	productName productCat
	 *  功率 term(productName ,功率) term(productCat,功率)
		...
		以上任何一个词项查询的结果集 做并集处理
	 */
			
	TopDocs topDoc = search.search(query, 10);
	System.out.println("最高分:"+topDoc.getMaxScore());
	System.out.println("一共获取数据:"+topDoc.totalHits);
	ScoreDoc[] scoreDocs = topDoc.scoreDocs;
	for (ScoreDoc scoreDoc : scoreDocs) {
		//每次循环都获取返回结果中一个document评分相关内容
		System.out.println("当前docid:"+scoreDoc.doc);
		System.out.println("当前doc评分:"+scoreDoc.score);
		//利用documentId获取源数据 拿不到Store.NO的数据
		Document doc=search.doc(scoreDoc.doc);
		//解析所有属性值
		System.out.println("productName"+doc.get("productName"));
		System.out.println("productImage"+doc.get("productImage"));
	}
}



@Test
public void booleanQuery() throws Exception{
	Directory dir=FSDirectory
			.open(Paths.get("c://index01"));
	IndexReader reader=DirectoryReader.open(dir);
	IndexSearcher search=new IndexSearcher(reader);
	//准备boolean条件的子条件termQuery
	Query query1=new TermQuery(new Term("productName","灯泡"));
	Query query2=new TermQuery(new Term("productName","小功率灯泡"));
	//利用query1 2 封装子条件
	BooleanClause bc1=new BooleanClause(query1,Occur.FILTER);
	BooleanClause bc2=new BooleanClause(query2,Occur.MUST_NOT);
	//occur决定了查询结果与当前条件的逻辑关系
	/*MUST:查询结果必须包含这个条件的结果
	 *MUST_NOT:查询结果必须不包含这个条件的结果
	 *SHOULD:可包含可不包含,当他与MUST同时存在时,不生效
	 *FILTER:MUST效果一样的,但是通过FILTER子条件查询的结果没有评分
	 */
	Query query=new BooleanQuery.
			Builder().add(bc1).add(bc2).build();
			
	TopDocs topDoc = search.search(query, 10);
	System.out.println("最高分:"+topDoc.getMaxScore());
	System.out.println("一共获取数据:"+topDoc.totalHits);
	ScoreDoc[] scoreDocs = topDoc.scoreDocs;
	for (ScoreDoc scoreDoc : scoreDocs) {
		//每次循环都获取返回结果中一个document评分相关内容
		System.out.println("当前docid:"+scoreDoc.doc);
		System.out.println("当前doc评分:"+scoreDoc.score);
		//利用documentId获取源数据 拿不到Store.NO的数据
		Document doc=search.doc(scoreDoc.doc);
		//解析所有属性值
		System.out.println("productName"+doc.get("productName"));
		System.out.println("productImage"+doc.get("productImage"));
	}
}


//通过范围进行 匹配,比如京东的价格区间
@Test
public void rangeQuery() throws Exception{
	Directory dir=FSDirectory
			.open(Paths.get("c://index01"));
	IndexReader reader=DirectoryReader.open(dir);
	IndexSearcher search=new IndexSearcher(reader);
	//生成对price价钱做范围查询的query
	Query query=DoublePoint.newRangeQuery
			("productPrice", 555, 8000);
	TopDocs topDoc = search.search(query, 10);
	System.out.println("最高分:"+topDoc.getMaxScore());
	System.out.println("一共获取数据:"+topDoc.totalHits);
	ScoreDoc[] scoreDocs = topDoc.scoreDocs;
	for (ScoreDoc scoreDoc : scoreDocs) {
		//每次循环都获取返回结果中一个document评分相关内容
		System.out.println("当前docid:"+scoreDoc.doc);
		System.out.println("当前doc评分:"+scoreDoc.score);
		//利用documentId获取源数据 拿不到Store.NO的数据
		Document doc=search.doc(scoreDoc.doc);
		//解析所有属性值
		System.out.println("productName"+doc.get("productName"));
		System.out.println("productImage"+doc.get("productImage"));
		System.out.println("price"+doc.get("productPrice"));
	}
}

}

生成数据代码

package cn.tedu.test2;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.junit.Test;

import cn.tedu.test.IKAnalyzer6x;

/*

  • 创建测试需要的索引文件
    */
    public class CreateIndex {
    @Test
    public void createIndex() throws Exception {
    //1.准备索引文件夹
    Path path = Paths.get(“c://index01”);
    //2.将路径传递给lucene对象使用
    FSDirectory dir = FSDirectory.open(path);
    //3.封装数据doucument
    Document doc1 = new Document();
    Document doc2 = new Document();
    //productName productImage productPrice productCat(属性名称)S
    doc1.add(new TextField(“productName”, “小功率节能灯泡”, Store.YES));
    doc1.add(new StringField(“productImage”, “www.image.com”, Store.YES));
    doc1.add(new DoublePoint(“productPrice”, 5000));
    doc1.add(new TextField(“productCat”, “家居用品”, Store.YES));

     doc2.add(new TextField("productName", "大功率节能灯泡",Store.YES));
     doc2.add(new StringField("productImage", "www.easymall.com", Store.YES));
     doc2.add(new DoublePoint("productPrice", 355));
     doc2.add(new StringField("productPrice", "355元", Store.YES));
     doc2.add(new TextField("productCat", "家居用品",Store.NO));
     
             //TextField和StringField什么区别
     		//TextField分词,stringField不分词
     		//Store.YES/NO什么作用 YES存储值,NO不存储值
     		//数字特性的field和字符串有什么关系;数字特性和stringField配合使用才能保留值
     
     IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer6x());
     /*
      * create:每次调用都覆盖原有内容
      * append:每次调用都将新数据追加到原有内容索引
      * create_or_append:无则建,有则追加
      */
     config.setOpenMode(OpenMode.CREATE);
     
     
     IndexWriter writer = new IndexWriter(dir, config);
     writer.addDocument(doc1);
     writer.addDocument(doc2);
     writer.commit();
    

    }
    }

Ik分词类工具

package cn.tedu.test;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;

public class IKAnalyzer6x extends Analyzer{
private boolean useSmart;
public boolean useSmart(){
return useSmart;
}
public void setUseSmart(boolean useSmart){
this.useSmart=useSmart;
}
public IKAnalyzer6x(){
this(false);//IK分词器lucene analyzer接口实现类,默认细粒度切分算法
}
//重写最新版本createComponents;重载analyzer接口,构造分词组件
@Override
protected TokenStreamComponents createComponents(String filedName) {
Tokenizer _IKTokenizer=new IKTokenizer6x(this.useSmart);
return new TokenStreamComponents(_IKTokenizer);
}
public IKAnalyzer6x(boolean useSmart){
super();
this.useSmart=useSmart;
}

}


package cn.tedu.test;

import java.io.IOException;

import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

public class IKTokenizer6x extends Tokenizer{
//ik分词器实现
private IKSegmenter _IKImplement;
//词元文本属性
private final CharTermAttribute termAtt;
//词元位移属性
private final OffsetAttribute offsetAtt;
//词元分类属性
private final TypeAttribute typeAtt;
//记录最后一个词元的结束位置
private int endPosition;
//构造函数,实现最新的Tokenizer
public IKTokenizer6x(boolean useSmart){
super();
offsetAtt=addAttribute(OffsetAttribute.class);
termAtt=addAttribute(CharTermAttribute.class);
typeAtt=addAttribute(TypeAttribute.class);
_IKImplement=new IKSegmenter(input, useSmart);
}

@Override
public final boolean incrementToken() throws IOException {
	//清除所有的词元属性
	clearAttributes();
	Lexeme nextLexeme=_IKImplement.next();
	if(nextLexeme!=null){
		//将lexeme转成attributes
		termAtt.append(nextLexeme.getLexemeText());
		termAtt.setLength(nextLexeme.getLength());
		offsetAtt.setOffset(nextLexeme.getBeginPosition(), 
				nextLexeme.getEndPosition());
		//记录分词的最后位置
		endPosition=nextLexeme.getEndPosition();
		typeAtt.setType(nextLexeme.getLexemeText());
		return true;//告知还有下个词元
	}
	return false;//告知词元输出完毕
}

@Override
public void reset() throws IOException {
	super.reset();
	_IKImplement.reset(input);
}

@Override
public final void end(){
	int finalOffset = correctOffset(this.endPosition);
	offsetAtt.setOffset(finalOffset, finalOffset);
}

}

本地ikjar包配置

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值