lucene通过修改boost值改善index索引

并不是所有的Document和Field是平等创建的。Document增量是个使得这种需求能够简单实现的一个特征。默认情况下,所有的Document都没有增量,他们都有相同的增量因数1.0。通过改变某个Document的增量因数,可以让Lucene认为它比索引中的其他Document更重要或更不重要。在索引的时候只需执行setBoost(float)方法。

看两个示例,就能明白其中的用法。

示例1,使用默认的的boost

package com.cn;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class TT {

	public static void main(String []args) throws Exception {
		String [] ids = {"1","2","3","4","5"};
		String [] province = {"shanghai","beijing","liaoning","liaoning","zhejiang"};
		String [] contents = {"shanghai is a city","beijing is a city","jinzhou is a city","shenyang is a city","hangzhou is a city"};
		String [] city = {"shanghai","beijing","jinzhou","shenyang","hangzhou"};
		Directory directory = new RAMDirectory();
		
		IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)));
		for(int i = 0;i < ids.length;i++){
			Document doc = new Document();
			doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("province",province[i],Field.Store.YES,Field.Index.ANALYZED));
			doc.add(new Field("contents",contents[i],Field.Store.YES,Field.Index.ANALYZED));
			doc.add(new Field("city",city[i],Field.Store.YES,Field.Index.ANALYZED));
			
			indexWriter.addDocument(doc);
		}
		System.out.println("total:"+indexWriter.numDocs());
		indexWriter.close();
		
		queryMethod(directory,"contents","city");
	}
	
	public static void queryMethod(Directory directory,String item,String txt)throws Exception {
		Term term = new Term(item,txt);
		Query query = new TermQuery(term);
		IndexSearcher indexSearcher = new IndexSearcher(directory);
		TopDocs topDocs = indexSearcher.search(query, 10);
		System.out.println("it has "+topDocs.totalHits+" "+txt+" in "+item);
		ScoreDoc [] scoreDoc = topDocs.scoreDocs;
		for(int i=0;i<scoreDoc.length;i++){
			Document d = indexSearcher.doc(scoreDoc[i].doc);
			System.out.println("city:"+d.get("city"));
			System.out.println("contents:"+d.get("contents"));
		}
	}
}

运行结果为:

total:5
it has 5 city in contents
province:shanghai  city:shanghai
contents:shanghai is a city
province:beijing  city:beijing
contents:beijing is a city
province:liaoning  city:jinzhou
contents:jinzhou is a city
province:liaoning  city:shenyang
contents:shenyang is a city
province:zhejiang  city:hangzhou
contents:hangzhou is a city


示例2,增加了boost,代码为

package com.cn;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

public class TT {

	public static void main(String []args) throws Exception {
		String [] ids = {"1","2","3","4","5"};
		String [] province = {"shanghai","beijing","liaoning","liaoning","zhejiang"};
		String [] contents = {"shanghai is a city","beijing is a city","jinzhou is a city","shenyang is a city","hangzhou is a city"};
		String [] city = {"shanghai","beijing","jinzhou","shenyang","hangzhou"};
		Directory directory = new RAMDirectory();
		
		IndexWriter indexWriter = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_34, new StandardAnalyzer(Version.LUCENE_34)));
		for(int i = 0;i < ids.length;i++){
			Document doc = new Document();
			doc.add(new Field("id",ids[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
			doc.add(new Field("province",province[i],Field.Store.YES,Field.Index.ANALYZED));
			doc.add(new Field("contents",contents[i],Field.Store.YES,Field.Index.ANALYZED));
			doc.add(new Field("city",city[i],Field.Store.YES,Field.Index.ANALYZED));
			if(province[i].equals("liaoning")){
                if(city[i].equals("shenyang")){
                    doc.setBoost(5.0f);
                }else{
                    doc.setBoost(2.0f);
                }
            }
			indexWriter.addDocument(doc);
		}
		System.out.println("total:"+indexWriter.numDocs());
		indexWriter.close();
		
		queryMethod(directory,"contents","city");
	}
	
	public static void queryMethod(Directory directory,String item,String txt)throws Exception {
		Term term = new Term(item,txt);
		Query query = new TermQuery(term);
		IndexSearcher indexSearcher = new IndexSearcher(directory);
		TopDocs topDocs = indexSearcher.search(query, 10);
		System.out.println("it has "+topDocs.totalHits+" "+txt+" in "+item);
		ScoreDoc [] scoreDoc = topDocs.scoreDocs;
		for(int i=0;i<scoreDoc.length;i++){
			Document d = indexSearcher.doc(scoreDoc[i].doc);
			System.out.println("city:"+d.get("city"));
			System.out.println("contents:"+d.get("contents"));
		}
	}
}


运行结果为:

total:5
it has 5 city in contents
city:shenyang
contents:shenyang is a city
city:jinzhou
contents:jinzhou is a city
city:shanghai
contents:shanghai is a city
city:beijing
contents:beijing is a city
city:hangzhou
contents:hangzhou is a city

从两个例子中看出属于辽宁省的结果在靠前位置了,而且shenyang比jinzhou还要靠前。


  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值