Lucene3.6 之排序篇

最新推荐文章于 2015-05-23 01:43:22 发布

Ricky_Fung

最新推荐文章于 2015-05-23 01:43:22 发布

阅读量1.6k

点赞数

文章标签： lucene 全文搜索排序

本文链接：https://blog.csdn.net/top_code/article/details/8556073

版权

Lucene的默认排序是按照Document的得分进行排序的。当检索结果集中的两个Document的具有相同的得分时，默认按照Document的ID对结果进行排序。

一、使用Sort、SortField类实现排序

Lucene在查询的时候，可以通过以一个Sort作为参数构造一个检索器IndexSearcher，在构造Sort的时候，指定排序规则。调用sIndexSearcher.search，例如：
IndexSearcher.search(query, filter, n, sort);

关于Sort类，在其内部定义了3种构造方法：

关于SortField类，其构造方法方法如下：

其中type对应的取值如下：

SortField. SCORE 按积分排序
SortField. DOC 按文档排序
SortField. AUTO 域的值为int、long、float都有效
SortField.STRING 域按STRING排序
SortField..FLOAT
SortField.LONG
SortField.DOUBLE
SortField.SHORT
SortField.CUSTOM 通过比较器排序
SortField.BYTE

示例代码

1、对单个字段进行排序

@Test
	public void sortSingleField(){
		try {
			String path = "D://LuceneEx/day01";
			String keyword = "android";
			File file = new File(path);
			Directory mdDirectory = FSDirectory.open(file);
//			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
			// 使用 商业分词器
			Analyzer mAnalyzer = new IKAnalyzer();
			
			IndexReader reader = IndexReader.open(mdDirectory);

			IndexSearcher searcher = new IndexSearcher(reader);

			String[] fields = {"title","category"}; 	// (在多个Filed中搜索)
			QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36, fields, mAnalyzer);
//			String fieldName = "source"; 	
//			QueryParser parser = new QueryParser(Version.LUCENE_36, fieldName, mAnalyzer);
			Query query = parser.parse(keyword);

			SortField field = new SortField("reputation", SortField.FLOAT);
			Sort sort = new Sort(field );
			TopDocs tops = searcher.search(query, 50, sort );
			
			int count = tops.totalHits;
			
			System.out.println("totalHits="+count);
			
			ScoreDoc[] docs = tops.scoreDocs;
			
			for(int i=0;i<docs.length;i++){
				Document doc = searcher.doc(docs[i].doc);
				
				int id = Integer.parseInt(doc.get("id"));
				String title = doc.get("title");
				String author = doc.get("author");
				String publishTime = doc.get("publishTime");
				String source = doc.get("source");
				String category = doc.get("category");
				float reputation = Float.parseFloat(doc.get("reputation"));
				
				System.out.println(id+"\t"+title+"\t"+author+"\t"+publishTime+"\t"+source+"\t"+category+"\t"+reputation);
			}
			
			reader.close();
			searcher.close();
			
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

2、对多个字段进行排序

@Test
	public void sortMultiField(){
		try {
			String path = "D://LuceneEx/day01";
			String keyword = "Android";
			File file = new File(path);
			Directory mdDirectory = FSDirectory.open(file);
//			Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
			// 使用 商业分词器
			Analyzer mAnalyzer = new IKAnalyzer();
			
			IndexReader reader = IndexReader.open(mdDirectory);

			IndexSearcher searcher = new IndexSearcher(reader);

			String[] fields = {"title","category"}; 	// (在多个Filed中搜索)
			QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36, fields, mAnalyzer);
//			String fieldName = "source"; 	
//			QueryParser parser = new QueryParser(Version.LUCENE_36, fieldName, mAnalyzer);
			Query query = parser.parse(keyword);

			SortField sortF1 =new SortField("reputation", SortField.FLOAT);
			SortField sortF2 =new SortField("source", SortField.STRING);
			Sort sort =new Sort(new SortField[]{sortF1 , sortF2});
			
			TopDocs tops = searcher.search(query, null, 100, sort);
			int count = tops.totalHits;
			
			System.out.println("totalHits="+count);
			
			ScoreDoc[] docs = tops.scoreDocs;
			
			for(int i=0;i<docs.length;i++){
				Document doc = searcher.doc(docs[i].doc);
				
				int id = Integer.parseInt(doc.get("id"));
				String title = doc.get("title");
				String author = doc.get("author");
				String publishTime = doc.get("publishTime");
				String source = doc.get("source");
				String category = doc.get("category");
				float reputation = Float.parseFloat(doc.get("reputation"));
				
				System.out.println(id+"\t"+title+"\t"+author+"\t"+publishTime+"\t"+source+"\t"+category+"\t"+reputation);
			}
			
			reader.close();
			searcher.close();
			
		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}

用到的两个工具方法代码

/**
	 * 创建文档对象的工具方法
	 * @param book
	 * @return
	 */
	public Document createDocument(Book book){
		Document doc = new Document();

		Field id = new Field("id", book.getId() + "", Store.YES,
				Index.ANALYZED);
		Field title = new Field("title", book.getTitle(), Store.YES,
				Index.ANALYZED);
		Field author = new Field("author", book.getAuthor(), Store.YES,
				Index.ANALYZED);
		Field publishTime = new Field("publishTime", book.getPublishTime(),
				Store.YES, Index.ANALYZED);
		Field source = new Field("source", book.getSource(), Store.YES,
				Index.ANALYZED);
		Field category = new Field("category", book.getCategory(),
				Store.YES, Index.ANALYZED);
		Field reputation = new Field("reputation", book.getReputation()
				+ "", Store.YES, Index.ANALYZED);

		doc.add(id);
		doc.add(title);
		doc.add(author);
		doc.add(publishTime);
		doc.add(source);
		doc.add(category);
		doc.add(reputation);
		
		return doc;
	}
	
	/**
	 * 创建Book对象
	 * @param title
	 * @param author
	 * @param publishTime
	 * @param category
	 * @param reputation
	 * @return
	 */
	public Book createBook(String title,String author,String publishTime,String category,float reputation){
		
		Random r = new Random();
		int id = r.nextInt(10000);
		
		Book book = new Book();
		book.setId(id);
		book.setAuthor(author);
		book.setTitle(title);
		book.setCategory(category);
		book.setPublishTime(publishTime);
		book.setReputation(reputation);
		book.setSource("清华大学出版社");
		
		return book;
	}

二、改变boost(激励因子)

1、改变Document的boost(激励因子)
改变boost的大小，会导致Document的得分的改变，从而按照Lucene默认的对检索结果集的排序方式，改变检索结果中Document的排序的提前或者靠后。在计算得分的时候，使用到了boost的值，默认boost的值为1.0，也就说默认情况下Document的得分与boost的无关的。一旦改变了默认的boost的值，也就从Document的得分与boost无关，变为相关了：boost值越大，Document的得分越高。

2、改变Field的boost(激励因子)
改变Field的boost值，和改变Document的boost值是一样的。因为Document的boost是通过添加到Docuemnt中Field体现的，所以改变Field的boost值，可以改变Document的boost值。

示例代码

@Test
	public void testBoost(){
		try {
			String path = "D://LuceneEx/day02";
			String keyword = "android";
			File file = new File(path);
			Directory mdDirectory = FSDirectory.open(file);
			// 使用 商业分词器
			Analyzer mAnalyzer = new IKAnalyzer();

			IndexReader reader = IndexReader.open(mdDirectory);

			IndexSearcher searcher = new IndexSearcher(reader);

			String[] fields = { "title", "category" }; // (在多个Filed中搜索)
			QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_36,
					fields, mAnalyzer);
			Query query = parser.parse(keyword);

			TopDocs tops = searcher.search(query, null, 50);

			int count = tops.totalHits;

			System.out.println("totalHits=" + count);

			ScoreDoc[] docs = tops.scoreDocs;

			for (int i = 0; i < docs.length; i++) {
				
				Document doc = searcher.doc(docs[i].doc);

				float score = docs[i].score;
				
				int id = Integer.parseInt(doc.get("id"));
				String title = doc.get("title");
				String author = doc.get("author");
				String publishTime = doc.get("publishTime");
				String source = doc.get("source");
				String category = doc.get("category");
				float reputation = Float.parseFloat(doc.get("reputation"));

				System.out.println(id + "\t" + title + "\t" + author + "\t"
						+ publishTime + "\t" + source + "\t" + category + "\t"
						+ reputation+"\t"+score);
			}

			reader.close();
			searcher.close();

		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		} catch (ParseException e) {
			e.printStackTrace();
		}
	}
	
	@Test
	public void testAdd() {

		try {
			String path = "D://LuceneEx/day02";
			File file = new File(path);
			Directory mdDirectory = FSDirectory.open(file);

			// 使用Lucene提供的分词器
			// Analyzer mAnalyzer = new StandardAnalyzer(Version.LUCENE_36);
			// 使用 商业分词器
			Analyzer mAnalyzer = new IKAnalyzer();
			IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
					mAnalyzer);

			IndexWriter writer = new IndexWriter(mdDirectory, config);

			Book book1 = createBook("Android内核揭秘", "ABC", "2010-07", "android 移动开发", 8.9f);
			Document doc1 = createDocument(book1);
			doc1.setBoost(2.0F); //boost：设置得分,2F在当前得分的基础上*2，使得分增高
			
			Book book2 = createBook("Android多媒体开发", "BCD", "2011-07", "android 多媒体", 8.5f);
			Document doc2 = createDocument(book2);
			doc2.setBoost(2.5F); //boost：设置得分,2F在当前得分的基础上*2，使得分增高
			
			Book book3 = createBook("Android企业应用开发", "QAB", "2012-05", "android 企业应用", 8.2f);
			Document doc3 = createDocument(book3);
			doc3.setBoost(1.5F); //boost：设置得分,2F在当前得分的基础上*2，使得分增高

			writer.addDocument(doc1);
			writer.addDocument(doc2);
			writer.addDocument(doc3);

			writer.close();

		} catch (CorruptIndexException e) {
			e.printStackTrace();
		} catch (LockObtainFailedException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}
	}

运行结果

totalHits=3
3383 Android多媒体开发BCD2011-07清华大学出版社android 多媒体8.51.259212
891 Android内核揭秘ABC2010-07清华大学出版社android 移动开发8.91.0073696
2919 Android企业应用开发QAB2012-05清华大学出版社android 企业应用8.20.75552726