lucene(4)——版本5.X写排序

1.元旦后的第一天上班,精神恍惚,什么都不顺,哎!新的一年不是一个号的开头啊!

先上代码,先写入内容(写篇文章用来发泄,分词器还是用我们上一篇文章,我们自己写的分词器)

package cn.com.demo.comparator;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;

import cn.com.demo.chnese.MsgAnalyzi;
import cn.com.demo.chnese.MySameContext;

public class PublicComparator {
	private static Map<String,String[]> map=new HashMap<String,String[]>();
	static{
		map.put("中国", new String[]{"天朝","中原"});
		map.put("我", new String[]{"朕","俺"});
		map.put("主义", new String[]{"注意","猪亿"});
	}
	public static void main(String[] args) throws Exception {
		String spath="D:\\lunece\\chinese";
		String str="D:\\lunece\\file";
		IndexWriter write=null;
		try {
			write=new IndexWriter(FSDirectory.open(Paths.get(spath)), new IndexWriterConfig(new MsgAnalyzi(new MySameContext(map))));
			Document doc=null;
			StringBuffer sb=null;
			BufferedReader br=null;
			for(File file:new File(str).listFiles()){
				doc=new Document();
				Random ran=new Random();
				long lo=file.length();
				doc.add(new NumericDocValuesField("fileLength", lo));//存储一样的是用'fileLength'排序,主要存储long类型的
				doc.add(new LongField("fileLength", lo, Store.YES));
				doc.add(new TextField("filePath", file.getPath(), Store.YES));
				doc.add(new TextField("fileName", file.getName(), Store.YES));
				doc.add(new SortedDocValuesField("fileName", new BytesRef(file.getName().getBytes())));//存储一样的是用'fileLength'排序,主要存储string类型的
				br=new BufferedReader(new InputStreamReader(new FileInputStream(file), "gb2312"));
				sb=new StringBuffer();
				String line=null;
				while((line=br.readLine())!=null){
					sb.append(line);
				}
				doc.add(new TextField("fileContent", sb.toString(), Store.YES));//这个我用流写入,我用doc.add(new TextField("fileContent", new FileReader(file)));写不进去,不知道为什么
				br.close();
				write.addDocument(doc);
			}
			write.commit();
		} catch (IOException e) {
			e.printStackTrace();
		}finally{
			try {
				write.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
}
package cn.com.demo.comparator;

import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;

import cn.com.demo.chnese.MsgAnalyzi;
import cn.com.demo.chnese.MySameContext;

public class PublicReader {
	private static Map<String,String[]> map=new HashMap<String,String[]>();
	static{
		map.put("中国", new String[]{"天朝","中原"});
		map.put("我", new String[]{"朕","俺"});
		map.put("主义", new String[]{"注意","猪亿"});
	}
	public static void main(String[] args) {
		String spath="D:\\lunece\\chinese";
		DirectoryReader reader=null;
		try {
			reader=DirectoryReader.open(FSDirectory.open(Paths.get(spath)));
			IndexSearcher searcher=new IndexSearcher(reader);
			QueryParser parser=new QueryParser("fileContent", new MsgAnalyzi(new MySameContext(map)));
			que(parser.parse("健康"), searcher, Sort.RELEVANCE);//按评分降序,如果评分一样,就按索引id进行升序
			System.out.println("----------------------------------------");
			que(parser.parse("健康"), searcher, Sort.INDEXORDER);//直接按索引id进行升序
			System.out.println("----------------------------------------");
			que(parser.parse("健康"), searcher, new Sort(new SortField("fileLength", Type.LONG)));//这个通过fileLength字段的大小排序,一样的话就通过id升序
			System.out.println("----------------------------------------");
			que(parser.parse("健康"), searcher, new Sort(new SortField("fileName", Type.STRING,true)));//这个我目前不明白,读者可以试试百度
		} catch (Exception e) {
			e.printStackTrace();
		}finally{
			try {
				reader.close();
			} catch (IOException e) {
				e.printStackTrace();
			}
		}
	}
	public static void que(Query query,IndexSearcher searcher,Sort sort){
		try {
			TopDocs top=searcher.search(query, 10,sort,true,false);
			ScoreDoc[] scores=top.scoreDocs;
			for(ScoreDoc score:scores){
				Document doc=searcher.doc(score.doc);
				System.out.println(score.doc+",,,"+score.score+",,,"+doc.get("fileLength")+",,,"+doc.get("filePath")+",,,"+doc.get("fileName"));
			}
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}

一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary getInstance
信息: try to load dir=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadDic
信息: chars loaded time=53ms, line=12638, on file=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data\chars.dic
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadWord
信息: words loaded time=182ms, line=149853, on file=file:\E:\eclipse\MyEclipse\java1\lib\mmseg4j-core-1.10.0.jar!\data\words.dic
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadWord
信息: words loaded time=44ms, line=149853, on file=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data\words.dic
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadDic
信息: load all dic use time=282ms
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadUnit
信息: unit loaded time=1ms, line=22, on file=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data\units.dic
2,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
12,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
22,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
19,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
29,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
5,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
15,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
25,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
0,,,0.0755633,,,473,,,D:\lunece\file\不要想如果当初.txt,,,不要想如果当初.txt
----------------------------------------
0,,,0.0755633,,,473,,,D:\lunece\file\不要想如果当初.txt,,,不要想如果当初.txt
1,,,0.0755633,,,461,,,D:\lunece\file\保持单纯.txt,,,保持单纯.txt
2,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
3,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
4,,,0.0755633,,,442,,,D:\lunece\file\发生意见.txt,,,发生意见.txt
5,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
6,,,0.0755633,,,496,,,D:\lunece\file\悄悄悄悄地回归平静.txt,,,悄悄悄悄地回归平静.txt
7,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
8,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
----------------------------------------
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
19,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
29,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
3,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
13,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
23,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
5,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
15,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
25,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
2,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
----------------------------------------
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
19,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
29,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
8,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
18,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
28,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
7,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
17,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
27,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
6,,,0.0755633,,,496,,,D:\lunece\file\悄悄悄悄地回归平静.txt,,,悄悄悄悄地回归平静.txt

我的可能写的不够详细,读者可以参考

http://iamyida.iteye.com/blog/2197839

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值