今天练习了一下加权操作,感觉还可以挺简单的
package jim.testLucene2;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;
public class testLcuene2 {
private static Map<String,Float> scores = new HashMap<String,Float>();
public static void main(String[] args) throws IOException, ParseException {
scores.put("tom.com", 2.0f);
String[] num = {"1","2","3","4"};
String[] email = {"jim@tom1.com","zhai@tom1.com","jimzhai@tom.com","love@tom.com"};
String[] name = {"jim","zhai","john","tom"};
String[] contents = {"Hello jim","Hello zhai","Hello jhon","Hello tom"};
Directory directory = FSDirectory.open(new File("index"));
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35,new IKAnalyzer()));
Document doc = null;
Field field = null;
String et = null;
for(int i = 0 ; i < num.length ; i++){
doc = new Document();
et = email[i].substring(email[i].lastIndexOf("@")+1);
if(scores.containsKey(et)){
doc.setBoost(scores.get(et));//加权操作
}
else{
doc.setBoost(0.5f);//加权操作
}
field = new Field("num",num[i],Store.YES,Index.NOT_ANALYZED_NO_NORMS);
doc.add(field);
field = new Field("email",email[i],Store.YES,Index.NOT_ANALYZED);
doc.add(field);
field = new Field("name",name[i],Store.YES,Index.NOT_ANALYZED_NO_NORMS);
doc.add(field);
field = new Field("content",contents[i],Store.YES,Index.ANALYZED);
doc.add(field);
writer.addDocument(doc);
System.out.println(num[i]+" has indexed");
}
writer.close();
IndexReader reader =IndexReader.open(directory);
System.out.println("reader.numDocs():"+reader.numDocs()+" reader.maxDocs(): "+reader.maxDoc());//numDocs显示document的数量(不包含删除的),maxDoc显示所有的document的数量(包含删除的)
writer = new IndexWriter(directory,new IndexWriterConfig(Version.LUCENE_35,new IKAnalyzer()));
writer.deleteDocuments(new Term("num","1"));
System.out.println("reader.numDocs():"+reader.numDocs()+" reader.maxDocs(): "+reader.maxDoc());
System.out.println("reader.numDeletedDocs(): "+reader.numDeletedDocs());
writer.close();
reader.close();
IndexReader Reader = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(Reader);
QueryParser parser = new QueryParser(Version.LUCENE_35,"content", new StandardAnalyzer(Version.LUCENE_35));
Query query = parser.parse("Hello");
TopDocs tps = searcher.search(query, 10);
ScoreDoc[] sds = tps.scoreDocs;
Document Doc = null;
for(ScoreDoc sd : sds){
Doc = searcher.doc(sd.doc);
System.out.println("NUM: "+Doc.get("id")+" name: "+Doc.get("name")+" email "+Doc.get("email")+" content: "+Doc.get("content"));
}
Reader.close();
}
}