1.元旦后的第一天上班,精神恍惚,什么都不顺,哎!新的一年不是一个号的开头啊!
先上代码,先写入内容(写篇文章用来发泄,分词器还是用我们上一篇文章,我们自己写的分词器)
package cn.com.demo.comparator;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import cn.com.demo.chnese.MsgAnalyzi;
import cn.com.demo.chnese.MySameContext;
public class PublicComparator {
private static Map<String,String[]> map=new HashMap<String,String[]>();
static{
map.put("中国", new String[]{"天朝","中原"});
map.put("我", new String[]{"朕","俺"});
map.put("主义", new String[]{"注意","猪亿"});
}
public static void main(String[] args) throws Exception {
String spath="D:\\lunece\\chinese";
String str="D:\\lunece\\file";
IndexWriter write=null;
try {
write=new IndexWriter(FSDirectory.open(Paths.get(spath)), new IndexWriterConfig(new MsgAnalyzi(new MySameContext(map))));
Document doc=null;
StringBuffer sb=null;
BufferedReader br=null;
for(File file:new File(str).listFiles()){
doc=new Document();
Random ran=new Random();
long lo=file.length();
doc.add(new NumericDocValuesField("fileLength", lo));//存储一样的是用'fileLength'排序,主要存储long类型的
doc.add(new LongField("fileLength", lo, Store.YES));
doc.add(new TextField("filePath", file.getPath(), Store.YES));
doc.add(new TextField("fileName", file.getName(), Store.YES));
doc.add(new SortedDocValuesField("fileName", new BytesRef(file.getName().getBytes())));//存储一样的是用'fileLength'排序,主要存储string类型的
br=new BufferedReader(new InputStreamReader(new FileInputStream(file), "gb2312"));
sb=new StringBuffer();
String line=null;
while((line=br.readLine())!=null){
sb.append(line);
}
doc.add(new TextField("fileContent", sb.toString(), Store.YES));//这个我用流写入,我用doc.add(new TextField("fileContent", new FileReader(file)));写不进去,不知道为什么
br.close();
write.addDocument(doc);
}
write.commit();
} catch (IOException e) {
e.printStackTrace();
}finally{
try {
write.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
package cn.com.demo.comparator;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import cn.com.demo.chnese.MsgAnalyzi;
import cn.com.demo.chnese.MySameContext;
public class PublicReader {
private static Map<String,String[]> map=new HashMap<String,String[]>();
static{
map.put("中国", new String[]{"天朝","中原"});
map.put("我", new String[]{"朕","俺"});
map.put("主义", new String[]{"注意","猪亿"});
}
public static void main(String[] args) {
String spath="D:\\lunece\\chinese";
DirectoryReader reader=null;
try {
reader=DirectoryReader.open(FSDirectory.open(Paths.get(spath)));
IndexSearcher searcher=new IndexSearcher(reader);
QueryParser parser=new QueryParser("fileContent", new MsgAnalyzi(new MySameContext(map)));
que(parser.parse("健康"), searcher, Sort.RELEVANCE);//按评分降序,如果评分一样,就按索引id进行升序
System.out.println("----------------------------------------");
que(parser.parse("健康"), searcher, Sort.INDEXORDER);//直接按索引id进行升序
System.out.println("----------------------------------------");
que(parser.parse("健康"), searcher, new Sort(new SortField("fileLength", Type.LONG)));//这个通过fileLength字段的大小排序,一样的话就通过id升序
System.out.println("----------------------------------------");
que(parser.parse("健康"), searcher, new Sort(new SortField("fileName", Type.STRING,true)));//这个我目前不明白,读者可以试试百度
} catch (Exception e) {
e.printStackTrace();
}finally{
try {
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
public static void que(Query query,IndexSearcher searcher,Sort sort){
try {
TopDocs top=searcher.search(query, 10,sort,true,false);
ScoreDoc[] scores=top.scoreDocs;
for(ScoreDoc score:scores){
Document doc=searcher.doc(score.doc);
System.out.println(score.doc+",,,"+score.score+",,,"+doc.get("fileLength")+",,,"+doc.get("filePath")+",,,"+doc.get("fileName"));
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary getInstance
信息: try to load dir=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadDic
信息: chars loaded time=53ms, line=12638, on file=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data\chars.dic
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadWord
信息: words loaded time=182ms, line=149853, on file=file:\E:\eclipse\MyEclipse\java1\lib\mmseg4j-core-1.10.0.jar!\data\words.dic
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadWord
信息: words loaded time=44ms, line=149853, on file=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data\words.dic
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadDic
信息: load all dic use time=282ms
一月 03, 2017 4:28:26 下午 com.chenlb.mmseg4j.Dictionary loadUnit
信息: unit loaded time=1ms, line=22, on file=C:\Users\Administrator\Desktop\mmseg4j-core-1.10.0\data\units.dic
2,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
12,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
22,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
19,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
29,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
5,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
15,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
25,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
0,,,0.0755633,,,473,,,D:\lunece\file\不要想如果当初.txt,,,不要想如果当初.txt
----------------------------------------
0,,,0.0755633,,,473,,,D:\lunece\file\不要想如果当初.txt,,,不要想如果当初.txt
1,,,0.0755633,,,461,,,D:\lunece\file\保持单纯.txt,,,保持单纯.txt
2,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
3,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
4,,,0.0755633,,,442,,,D:\lunece\file\发生意见.txt,,,发生意见.txt
5,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
6,,,0.0755633,,,496,,,D:\lunece\file\悄悄悄悄地回归平静.txt,,,悄悄悄悄地回归平静.txt
7,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
8,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
----------------------------------------
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
19,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
29,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
3,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
13,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
23,,,0.0755633,,,396,,,D:\lunece\file\偶尔的出离轨道.txt,,,偶尔的出离轨道.txt
5,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
15,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
25,,,0.09067595,,,401,,,D:\lunece\file\学会沉默.txt,,,学会沉默.txt
2,,,0.13087946,,,420,,,D:\lunece\file\偶尔俗气.txt,,,偶尔俗气.txt
----------------------------------------
9,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
19,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
29,,,0.10578861,,,271,,,D:\lunece\file\至少平静.txt,,,至少平静.txt
8,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
18,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
28,,,0.0755633,,,488,,,D:\lunece\file\控制情绪别浪费了.txt,,,控制情绪别浪费了.txt
7,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
17,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
27,,,0.0755633,,,534,,,D:\lunece\file\抓住最好的时机绝不错过.txt,,,抓住最好的时机绝不错过.txt
6,,,0.0755633,,,496,,,D:\lunece\file\悄悄悄悄地回归平静.txt,,,悄悄悄悄地回归平静.txt
我的可能写的不够详细,读者可以参考