solr源码类里的一些方法


//SolrIndexSearcher.java

/**
* 获得docID的方法
*/
private void getDocListC(QueryResult qr, QueryCommand cmd) throws IOException {
// old parameters: DocListAndSet out, Query query, List<Query> filterList, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocListAndSet out = new DocListAndSet();
qr.setDocListAndSet(out);
QueryResultKey key=null;

//request里传过来的要返回的document数目,默认是10条
int maxDocRequested = cmd.getOffset() + cmd.getLen();

// check for overflow, and check for # docs in index
if (maxDocRequested < 0 || maxDocRequested > maxDoc()) maxDocRequested = maxDoc();
int supersetMaxDoc= maxDocRequested;
DocList superset;

// we can try and look up the complete query in the cache.
// we can't do that if filter!=null though (we don't want to
// do hashCode() and equals() for a big DocSet).
if (queryResultCache != null && cmd.getFilter()==null) {
// all of the current flags can be reused during warming,
// so set all of them on the cache key.
/**
* 根据用户输入的查询关键字生成的key,作为存放到queryResultCache里面的Key
*/
key = new QueryResultKey(cmd.getQuery(), cmd.getFilterList(), cmd.getSort(), cmd.getFlags());
if ((cmd.getFlags() & NO_CHECK_QCACHE)==0) {
superset = (DocList)queryResultCache.get(key);

if (superset != null) {
// check that the cache entry has scores recorded if we need them
if ((cmd.getFlags() & GET_SCORES)==0 || superset.hasScores()) {
// NOTE: subset() returns null if the DocList has fewer docs than
// requested
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}
}
if (out.docList != null) {
// found the docList in the cache... now check if we need the docset too.
// OPT: possible future optimization - if the doclist contains all the matches,
// use it to make the docset instead of rerunning the query.
if (out.docSet==null && ((cmd.getFlags() & GET_DOCSET)!=0) ) {
if (cmd.getFilterList()==null) {
out.docSet = getDocSet(cmd.getQuery());
} else {
List<Query> newList = new ArrayList<Query>(cmd.getFilterList()
.size()+1);
newList.add(cmd.getQuery());
newList.addAll(cmd.getFilterList());
out.docSet = getDocSet(newList);
}
}
return;
}
}

// If we are going to generate the result, bump up to the
// next resultWindowSize for better caching.

// handle 0 special case as well as avoid idiv in the common case.
if (maxDocRequested < queryResultWindowSize) {
supersetMaxDoc=queryResultWindowSize;
} else {
supersetMaxDoc = ((maxDocRequested -1)/queryResultWindowSize + 1)*queryResultWindowSize;
if (supersetMaxDoc < 0) supersetMaxDoc=maxDocRequested;
}
}


// OK, so now we need to generate an answer.
// One way to do that would be to check if we have an unordered list
// of results for the base query. If so, we can apply the filters and then
// sort by the resulting set. This can only be used if:
// - the sort doesn't contain score
// - we don't want score returned.

// check if we should try and use the filter cache
boolean useFilterCache=false;
if ((cmd.getFlags() & (GET_SCORES|NO_CHECK_FILTERCACHE))==0 && useFilterForSortedQuery && cmd.getSort() != null && filterCache != null) {
useFilterCache=true;
SortField[] sfields = cmd.getSort().getSort();
for (SortField sf : sfields) {
if (sf.getType() == SortField.SCORE) {
useFilterCache=false;
break;
}
}
}

if (useFilterCache) {
// now actually use the filter cache.
// for large filters that match few documents, this may be
// slower than simply re-executing the query.
if (out.docSet == null) {
out.docSet = getDocSet(cmd.getQuery(),cmd.getFilter());
DocSet bigFilt = getDocSet(cmd.getFilterList());
if (bigFilt != null) out.docSet = out.docSet.intersection(bigFilt);
}
// todo: there could be a sortDocSet that could take a list of
// the filters instead of anding them first...
// perhaps there should be a multi-docset-iterator
superset = sortDocSet(out.docSet,cmd.getSort(),supersetMaxDoc);
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
} else {
// do it the normal way...
cmd.setSupersetMaxDoc(supersetMaxDoc);
if ((cmd.getFlags() & GET_DOCSET)!=0) {
DocSet qDocSet = getDocListAndSetNC(qr,cmd);
// cache the docSet matching the query w/o filtering
if (filterCache!=null && !qr.isPartialResults()) filterCache.put(cmd.getQuery(),qDocSet);
} else {

/**
* 此方法获取documentID,存放在docListAndSet对象的docList里面,
*/
getDocListNC(qr,cmd);
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
}
//
superset = out.docList;
out.docList = superset.subset(cmd.getOffset(),cmd.getLen());
}

// lastly, put the superset in the cache if the size is less than or equal
// to queryResultMaxDocsCached

if (key != null && superset.size() <= queryResultMaxDocsCached && !qr.isPartialResults()) {

//将用户查询的关键字产生的key,和根据这个关键字查询出的DocId集合保存到queryResultCache里面去
queryResultCache.put(key, superset);

}
}

--------------------------------------------------------------------

/**
* 此方法获取documentID,存放在docListAndSet对象的docList里面,
*/
private void getDocListNC(QueryResult qr,QueryCommand cmd) throws IOException {
//Parameters: cmd.getQuery(),theFilt,cmd.getSort(),0,supersetMaxDoc,cmd.getFlags(),cmd.getTimeAllowed(),responseHeader);
//Query query, DocSet filter, Sort lsort, int offset, int len, int flags, long timeAllowed, NamedList<Object> responseHeader
DocSet filter = cmd.getFilter()!=null ? cmd.getFilter() : getDocSet(cmd.getFilterList());
final long timeAllowed = cmd.getTimeAllowed();
int len = cmd.getSupersetMaxDoc();
int last = len;
if (last < 0 || last > maxDoc()) last=maxDoc();
final int lastDocRequested = last;
int nDocsReturned;
int totalHits;
float maxScore;
int[] ids;
float[] scores;

/**
* 产生一个query
*/
Query query = QueryUtils.makeQueryable(cmd.getQuery());

// handle zero case...
if (lastDocRequested<=0) {
final DocSet filt = filter;
final float[] topscore = new float[] { Float.NEGATIVE_INFINITY };
final int[] numHits = new int[1];

HitCollector hc = new HitCollector() {
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
numHits[0]++;
if (score > topscore[0]) topscore[0]=score;
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}

nDocsReturned=0;
ids = new int[nDocsReturned];
scores = new float[nDocsReturned];
totalHits = numHits[0];
maxScore = totalHits>0 ? topscore[0] : 0.0f;
} else if (cmd.getSort() != null) {
// can't use TopDocs if there is a sort since it
// will do automatic score normalization.
// NOTE: this changed late in Lucene 1.9

final DocSet filt = filter;
final int[] numHits = new int[1];
final FieldSortedHitQueue hq = new FieldSortedHitQueue(reader, cmd.getSort().getSort(), len);

HitCollector hc = new HitCollector() {
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
numHits[0]++;
hq.insert(new FieldDoc(doc, score));
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
searcher.search(query, hc );
}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}

totalHits = numHits[0]; //总个数
maxScore = totalHits>0 ? hq.getMaxScore() : 0.0f;

nDocsReturned = hq.size();
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
for (int i = nDocsReturned -1; i >= 0; i--) {
FieldDoc fieldDoc = (FieldDoc)hq.pop();
// fillFields is the point where score normalization happens
// hq.fillFields(fieldDoc)
ids[i] = fieldDoc.doc;
if (scores != null) scores[i] = fieldDoc.score;
}
} else {
// No Sort specified (sort by score descending)
// This case could be done with TopDocs, but would currently require
// getting a BitSet filter from a DocSet which may be inefficient.

final DocSet filt = filter;
final ScorePriorityQueue hq = new ScorePriorityQueue(lastDocRequested);
final int[] numHits = new int[1];
HitCollector hc = new HitCollector() {
float minScore=Float.NEGATIVE_INFINITY; // minimum score in the priority queue
public void collect(int doc, float score) {
if (filt!=null && !filt.exists(doc)) return;
if (numHits[0]++ < lastDocRequested || score >= minScore) {
// TODO: if docs are always delivered in order, we could use "score>minScore"
// instead of "score>=minScore" and avoid tiebreaking scores
// in the priority queue.
// but might BooleanScorer14 might still be used and deliver docs out-of-order?
hq.insert(new ScoreDoc(doc, score));
minScore = ((ScoreDoc)hq.top()).score;
}
}
};
if( timeAllowed > 0 ) {
hc = new TimeLimitedCollector( hc, timeAllowed );
}
try {
/**
* 查询,把查询的结果放到hq里面
*/
searcher.search(query, hc );


}
catch( TimeLimitedCollector.TimeExceededException x ) {
log.warning( "Query: " + query + "; " + x.getMessage() );
qr.setPartialResults(true);
}

totalHits = numHits[0];
nDocsReturned = hq.size();
ids = new int[nDocsReturned];
scores = (cmd.getFlags()&GET_SCORES)!=0 ? new float[nDocsReturned] : null;
ScoreDoc sdoc =null;
for (int i = nDocsReturned -1; i >= 0; i--) {
sdoc = (ScoreDoc)hq.pop();

ids[i] = sdoc.doc;
if (scores != null) scores[i] = sdoc.score;
}
maxScore = sdoc ==null ? 0.0f : sdoc.score;
}


int sliceLen = Math.min(lastDocRequested,nDocsReturned);
if (sliceLen < 0) sliceLen=0;
qr.setDocList(new DocSlice(0,sliceLen,ids,scores,totalHits,maxScore));


/**************** older implementation using TopDocs *******************


Filter lfilter=null;
if (filter != null) {
final BitSet bits = filter.getBits(); // avoid if possible
lfilter = new Filter() {
public BitSet bits(IndexReader reader) {
return bits;
}
};
}

int lastDocRequested=offset+len;

// lucene doesn't allow 0 to be passed for nDocs
if (lastDocRequested==0) lastDocRequested=1;

// TopFieldDocs sortedDocs; // use TopDocs so both versions can use it
TopDocs sortedDocs;
if (lsort!=null) {
sortedDocs = searcher.search(query, lfilter, lastDocRequested, lsort);
} else {
sortedDocs = searcher.search(query, lfilter, lastDocRequested);
}

int nDocsReturned = sortedDocs.scoreDocs.length;
int[] docs = new int[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
docs[i] = sortedDocs.scoreDocs[i].doc;
}
float[] scores=null;
float maxScore=0.0f;
if ((flags & GET_SCORES) != 0) {
scores = new float[nDocsReturned];
for (int i=0; i<nDocsReturned; i++) {
scores[i] = sortedDocs.scoreDocs[i].score;
}
if (nDocsReturned>0) {
maxScore=sortedDocs.scoreDocs[0].score;
}
}
int sliceLen = Math.min(offset+len,nDocsReturned) - offset;
if (sliceLen < 0) sliceLen=0;
return new DocSlice(offset,sliceLen,docs,scores,sortedDocs.totalHits, maxScore);

**********************************************************************************/

}





//SolrIndexSearcher.java

/**
* Retrieve the {@link Document} instance corresponding to the document id.
*
* Note: The document will have all fields accessable, but if a field
* filter is provided, only the provided fields will be loaded (the
* remainder will be available lazily).
* 此方法从传入的docId号,到documentCached里面获取document,若没有获取到,这到索引里获取document,并加入到documentCached里面
*/
public Document doc(int i, Set<String> fields) throws IOException {

log.info("docId: " + i);


Document d;
if (documentCache != null) {
//重缓存里获取
d = (Document)documentCache.get(i);
if (d!=null) return d; //获取到返回
}

if(!enableLazyFieldLoading || fields == null) {
d = searcher.getIndexReader().document(i);
} else {
//直接到索引里获取document
d = searcher.getIndexReader().document(i,
new SetNonLazyFieldSelector(fields));
}

if (documentCache != null) {
//加入到documentCached缓存里面
documentCache.put(i, d);
}

return d;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值