FACET 案例!
添加对应的字段.
&facet=true &facet.field=menu &facet.field=camera_type
返回的结果:
"facet_fields" : { "menu" : [ "Canon USA" , 25, "Olympus" , 21, "Sony" , 12, "Panasonic" , 9, "Nikon" , 4 ], "camera_type" : [ "Compact" , 17, "Ultracompact" , 11, "SLR" , 9, "Full body" , 8 ] }
对于facet对应的源码,可以从FacetComponent这个类分析.
其类结构的说明可以浏览--
https://lucene.apache.org/solr/5_3_0/solr-core/org/apache/solr/handler/component/FacetComponent.html
FacetParams
http://www.docjar.com/docs/api/org/apache/solr/common/params/FacetParams.html
源码笔记如下--
1 /** 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package org.apache.solr.handler.component; 19 20 import java.io.IOException; 21 import java.net.URL; 22 import java.util; 23 24 import org.apache.solr.common.params.CommonParams; 25 import org.apache.solr.common.params.FacetParams; 26 import org.apache.solr.common.params.SolrParams; 27 import org.apache.solr.common.params.ModifiableSolrParams; 28 import org.apache.solr.common.util.NamedList; 29 import org.apache.solr.common.util.SimpleOrderedMap; 30 import org.apache.solr.common.SolrException; 31 import org.apache.solr.request.SimpleFacets; 32 import org.apache.solr.util.OpenBitSet; 33 import org.apache.solr.schema.SchemaField; 34 import org.apache.solr.search.QueryParsing; 35 import org.apache.lucene.queryParser.ParseException; 36 37 /** 38 * TODO! 39 * 40 * @version $Id: FacetComponent.java 692551 2008-09-05 21:02:35Z yonik $ 41 * @since solr 1.3 42 */ 43 public class FacetComponent extends SearchComponent //继承自SearchComponent 44 { 45 public static final String COMPONENT_NAME = "facet"; 46 47 @Override 48 public void prepare(ResponseBuilder rb) throws IOException 49 {
// method. Called for every incoming request.
50 if (rb.req.getParams().getBool(FacetParams.FACET,false)) { 51 rb.setNeedDocSet( true ); 52 rb.doFacets = true; 53 } 54 } 55 56 /** 57 * Actually run the query 58 * @param rb 59 */ 60 @Override 61 public void process(ResponseBuilder rb) throws IOException 62 { 63 if (rb.doFacets) { 64 SolrParams params = rb.req.getParams(); 65 SimpleFacets f = new SimpleFacets(rb.req, 66 rb.getResults().docSet, 67 params ); 68 69 // TODO ???? add this directly to the response, or to the builder? 70 rb.rsp.add( "facet_counts", f.getFacetCounts() ); 71 } 72 } 73 74 75 @Override 分布式处理请求 76 public int distributedProcess(ResponseBuilder rb) throws IOException { 77 if (!rb.doFacets) { 78 return ResponseBuilder.STAGE_DONE; 79 } 80 81 if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) { 82 // overlap facet refinement requests (those shards that we need a count for 83 // particular facet values from), where possible, with 84 // the requests to get fields (because we know that is the 85 // only other required phase). 86 // We do this in distributedProcess so we can look at all of the 87 // requests in the outgoing queue at once. 88 89 for (int shardNum=0; shardNum<rb.shards.length; shardNum++) { 90 List<String> fqueries = rb._facetInfo._toRefine[shardNum]; 91 if (fqueries == null || fqueries.size()==0) continue; 92 93 String shard = rb.shards[shardNum]; 94 95 ShardRequest refine = null; 96 boolean newRequest = false; 97 98 // try to find a request that is already going out to that shard. 99 // If nshards becomes to great, we way want to move to hashing for better 100 // scalability. 101 for (ShardRequest sreq : rb.outgoing) { 102 if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0 103 && sreq.shards != null & sreq.shards.length==1 104 && sreq.shards[0].equals(shard)) 105 { 106 refine = sreq; 107 break; 108 } 109 } 110 111 if (refine == null) { 112 // we didn't find any other suitable requests going out to that shard, so 113 // create one ourselves. 114 newRequest = true; 115 refine = new ShardRequest(); 116 refine.shards = new String[]{rb.shards[shardNum]}; 117 refine.params = new ModifiableSolrParams(rb.req.getParams()); 118 // don't request any documents 119 refine.params.remove(CommonParams.START); 120 refine.params.set(CommonParams.ROWS,"0"); 121 } 122 123 refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS; 124 refine.params.set(FacetParams.FACET,"true"); 125 refine.params.remove(FacetParams.FACET_FIELD); 126 // TODO: perhaps create a more compact facet.terms method? 127 refine.params.set(FacetParams.FACET_QUERY, fqueries.toArray(new String[fqueries.size()])); 128 129 if (newRequest) { 130 rb.addRequest(this, refine); 131 } 132 } 133 } 134 135 return ResponseBuilder.STAGE_DONE; 136 } 137 138 @Override 139 public void modifyRequest(ResponseBuilder rb, SearchComponent who, ShardRequest sreq) { 140 if (!rb.doFacets) return; 141 142 if ((sreq.purpose & ShardRequest.PURPOSE_GET_TOP_IDS) != 0) { 143 sreq.purpose |= ShardRequest.PURPOSE_GET_FACETS; 144 145 FacetInfo fi = rb._facetInfo; 146 if (fi == null) { 147 rb._facetInfo = fi = new FacetInfo(); 148 fi.parse(rb.req.getParams(), rb); 149 // should already be true... 150 // sreq.params.set(FacetParams.FACET, "true"); 151 } 152 153 sreq.params.remove(FacetParams.FACET_MINCOUNT); 154 sreq.params.remove(FacetParams.FACET_OFFSET); 155 sreq.params.remove(FacetParams.FACET_LIMIT); 156 157 for (DistribFieldFacet dff : fi.topFacets.values()) { 158 String paramStart = "f." + dff.field + '.'; 159 sreq.params.remove(paramStart + FacetParams.FACET_MINCOUNT); 160 sreq.params.remove(paramStart + FacetParams.FACET_OFFSET); 161 162 if(dff.limit > 0) { 163 // set the initial limit higher in increase accuracy 164 dff.initialLimit = dff.offset + dff.limit; 165 dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10; 166 } else { 167 dff.initialLimit = dff.limit; 168 } 169 170 // TEST: Uncomment the following line when testing to supress over-requesting facets and 171 // thus cause more facet refinement queries. 172 // if (dff.limit > 0) dff.initialLimit = dff.offset + dff.limit; 173 174 sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit); 175 } 176 } else { 177 // turn off faceting on other requests 178 sreq.params.set(FacetParams.FACET, "false"); 179 // we could optionally remove faceting params 180 } 181 } 182 183 @Override 184 public void handleResponses(ResponseBuilder rb, ShardRequest sreq) { 185 if (!rb.doFacets) return; 186 187 if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) { 188 countFacets(rb, sreq); 189 } else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) { 190 refineFacets(rb, sreq); 191 } 192 } 193 194 195 196 //<span style="font-family:Hiragino Sans GB, Microsoft Yahei, 微软雅黑, sans-serif;color:#666666;"><span style="font-size: 13.92px; line-height: 24px;">计数排序</span></span> 197 private void countFacets(ResponseBuilder rb, ShardRequest sreq) { 198 FacetInfo fi = rb._facetInfo; 199 200 for (ShardResponse srsp: sreq.responses) { 201 int shardNum = rb.getShardNum(srsp.getShard()); 202 NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); 203 204 // handle facet queries 205 NamedList facet_queries = (NamedList)facet_counts.get("facet_queries"); 206 if (facet_queries != null) { 207 for (int i=0; i<facet_queries.size(); i++) { 208 String facet_q = (String)facet_queries.getName(i); 209 long count = ((Number)facet_queries.getVal(i)).longValue(); 210 Long prevCount = fi.queryFacets.get(facet_q); 211 if (prevCount != null) count += prevCount; 212 fi.queryFacets.put(facet_q, count); 213 } 214 } 215 216 // step through each facet.field, adding results from this shard 217 NamedList facet_fields = (NamedList)facet_counts.get("facet_fields"); 218 for (DistribFieldFacet dff : fi.topFacets.values()) { 219 dff.add(shardNum, (NamedList)facet_fields.get(dff.field), dff.initialLimit); 220 } 221 } 222 223 224 // 225 // This code currently assumes that there will be only a single 226 // request ((with responses from all shards) sent out to get facets... 227 // otherwise we would need to wait until all facet responses were received. 228 // 229 230 // list of queries to send each shard 231 List<String>[] toRefine = new List[rb.shards.length]; 232 fi._toRefine = toRefine; 233 for (int i=0; i<toRefine.length; i++) { 234 toRefine[i] = new ArrayList<String>(); 235 } 236 237 238 for (DistribFieldFacet dff : fi.topFacets.values()) { 239 if (dff.limit <= 0) continue; // no need to check these facets for refinement 240 ShardFacetCount[] counts = dff.getSorted(); 241 int ntop = Math.min(counts.length, dff.offset + dff.limit); 242 long smallestCount = counts.length == 0 ? 0 : counts[ntop-1].count; 243 244 for (int i=0; i<counts.length; i++) { 245 ShardFacetCount sfc = counts[i]; 246 String query = null; 247 boolean needRefinement = false; 248 249 if (i<ntop) { 250 // automatically flag the top values for refinement 251 needRefinement = true; 252 } else { 253 // calculate the maximum value that this term may have 254 // and if it is >= smallestCount, then flag for refinement 255 long maxCount = sfc.count; 256 for (int shardNum=0; shardNum<rb.shards.length; shardNum++) { 257 OpenBitSet obs = dff.counted[shardNum]; 258 if (!obs.get(sfc.termNum)) { 259 // if missing from this shard, add the max it could be 260 maxCount += dff.maxPossible(sfc,shardNum); 261 } 262 } 263 if (maxCount >= smallestCount) { 264 // TODO: on a tie, we could check the term values 265 needRefinement = true; 266 } 267 } 268 269 if (needRefinement) { 270 // add a query for each shard missing the term that needs refinement 271 for (int shardNum=0; shardNum<rb.shards.length; shardNum++) { 272 OpenBitSet obs = dff.counted[shardNum]; 273 if (!obs.get(sfc.termNum) && dff.maxPossible(sfc,shardNum)>0) { 274 dff.needRefinements = true; 275 if (query==null) query = dff.makeQuery(sfc); 276 toRefine[shardNum].add(query); 277 } 278 } 279 } 280 } 281 } 282 } 283 284 //提炼结果 285 private void refineFacets(ResponseBuilder rb, ShardRequest sreq) { 286 FacetInfo fi = rb._facetInfo; 287 288 for (ShardResponse srsp: sreq.responses) { 289 // int shardNum = rb.getShardNum(srsp.shard); 290 NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts"); 291 NamedList facet_queries = (NamedList)facet_counts.get("facet_queries"); 292 293 // These are single term queries used to fill in missing counts 294 // for facet.field queries 295 for (int i=0; i<facet_queries.size(); i++) { 296 try { 297 298 String facet_q = (String)facet_queries.getName(i); 299 long count = ((Number)facet_queries.getVal(i)).longValue(); 300 301 // expect {!field f=field}value style params 302 SolrParams qparams = QueryParsing.getLocalParams(facet_q,null); 303 if (qparams == null) continue; // not a refinement 304 String field = qparams.get(QueryParsing.F); 305 String val = qparams.get(QueryParsing.V); 306 307 // Find the right field.facet for this field 308 DistribFieldFacet dff = fi.topFacets.get(field); 309 if (dff == null) continue; // maybe this wasn't for facet count refinement 310 311 // Find the right constraint count for this value 312 ShardFacetCount sfc = dff.counts.get(val); 313 314 if (sfc == null) { 315 continue; 316 // Just continue, since other components might have added 317 // this facet.query for other purposes. But if there are charset 318 // issues then the values coming back may not match the values sent. 319 } 320 321 // TODO REMOVE 322 // System.out.println("Got " + facet_q + " , refining count: " + sfc + " += " + count); 323 324 sfc.count += count; 325 326 } catch (ParseException e) { 327 // shouldn't happen, so fail for now rather than covering it up 328 throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); 329 } 330 } 331 } 332 } 333 334 @Override 335 public void finishStage(ResponseBuilder rb) { 336 if (!rb.doFacets || rb.stage != ResponseBuilder.STAGE_GET_FIELDS) return; 337 // wait until STAGE_GET_FIELDS 338 // so that "result" is already stored in the response (for aesthetics) 339 340 341 FacetInfo fi = rb._facetInfo; 342 343 NamedList facet_counts = new SimpleOrderedMap(); 344 NamedList facet_queries = new SimpleOrderedMap(); 345 facet_counts.add("facet_queries",facet_queries); 346 for (Map.Entry<String,Long> entry : fi.queryFacets.entrySet()) { 347 facet_queries.add(entry.getKey(), num(entry.getValue())); 348 } 349 350 NamedList facet_fields = new SimpleOrderedMap(); 351 facet_counts.add("facet_fields", facet_fields); 352 353 for (DistribFieldFacet dff : fi.topFacets.values()) { 354 NamedList fieldCounts = new NamedList(); // order is more important for facets 355 facet_fields.add(dff.field, fieldCounts); 356 357 ShardFacetCount[] counts = dff.countSorted; 358 if (counts == null || dff.needRefinements) { 359 counts = dff.getSorted(); 360 } 361 362 int end = dff.limit < 0 ? counts.length : Math.min(dff.offset + dff.limit, counts.length); 363 for (int i=dff.offset; i<end; i++) { 364 if (counts[i].count < dff.minCount) break; 365 fieldCounts.add(counts[i].name, num(counts[i].count)); 366 } 367 368 if (dff.missing) { 369 fieldCounts.add(null, num(dff.missingCount)); 370 } 371 } 372 373 // TODO: list facets (sorted by natural order) 374 // TODO: facet dates 375 facet_counts.add("facet_dates", new SimpleOrderedMap()); 376 377 rb.rsp.add("facet_counts", facet_counts); 378 379 rb._facetInfo = null; // could be big, so release asap 380 } 381 382 383 // use <int> tags for smaller facet counts (better back compatibility) 384 private Number num(long val) { 385 if (val < Integer.MAX_VALUE) return (int)val; 386 else return val; 387 } 388 private Number num(Long val) { 389 if (val.longValue() < Integer.MAX_VALUE) return val.intValue(); 390 else return val; 391 } 392 393 394 / 395 /// SolrInfoMBean 396 397 398 @Override 399 public String getDescription() { 400 return "Handle Faceting"; 401 } 402 403 @Override 404 public String getVersion() { 405 return "$Revision: 692551 $"; 406 } 407 408 @Override 409 public String getSourceId() { 410 return "$Id: FacetComponent.java 692551 2008-09-05 21:02:35Z yonik $"; 411 } 412 413 @Override 414 public String getSource() { 415 return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.3/src/java/org/apache/solr/handler/component/FacetComponent.java $"; 416 } 417 418 @Override 419 public URL[] getDocs() { 420 return null; 421 } 422 } 423 424 425 426 class FacetInfo { 427 List<String>[] _toRefine; 428 429 void parse(SolrParams params, ResponseBuilder rb) { 430 queryFacets = new LinkedHashMap<String,Long>(); 431 topFacets = new LinkedHashMap<String,DistribFieldFacet>(); 432 listFacets = new LinkedHashMap<String,DistribFieldFacet>(); 433 434 String[] facetQs = params.getParams(FacetParams.FACET_QUERY); 435 if (facetQs != null) { 436 for (String query : facetQs) { 437 queryFacets.put(query,0L); 438 } 439 } 440 441 String[] facetFs = params.getParams(FacetParams.FACET_FIELD); 442 if (facetFs != null) { 443 for (String field : facetFs) { 444 DistribFieldFacet ff = new DistribFieldFacet(rb, field); 445 ff.fillParams(params, field); 446 if (ff.sort) { 447 topFacets.put(field, ff); 448 } else { 449 listFacets.put(field, ff); 450 } 451 } 452 } 453 } 454 455 LinkedHashMap<String,Long> queryFacets; 456 LinkedHashMap<String,DistribFieldFacet> topFacets; // field facets that order by constraint count (sort=true) 457 LinkedHashMap<String,DistribFieldFacet> listFacets; // field facets that list values in term order 458 } 459 460 461 class FieldFacet { 462 String field; 463 int offset; 464 int limit; 465 int minCount; 466 boolean sort; 467 boolean missing; 468 String prefix; 469 long missingCount; 470 471 void fillParams(SolrParams params, String field) { 472 this.field = field; 473 this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0); 474 this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100); 475 Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT); 476 if (mincount==null) { 477 Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS); 478 // mincount = (zeros!=null && zeros) ? 0 : 1; 479 mincount = (zeros!=null && !zeros) ? 1 : 0; 480 // current default is to include zeros. 481 } 482 this.minCount = mincount; 483 this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false); 484 // default to sorting if there is a limit. 485 this.sort = params.getFieldBool(field, FacetParams.FACET_SORT, limit>0); 486 this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX); 487 } 488 } 489 490 class DistribFieldFacet extends FieldFacet { 491 SchemaField sf; 492 493 // the max possible count for a term appearing on no list 494 long missingMaxPossible; 495 // the max possible count for a missing term for each shard (indexed by shardNum) 496 long[] missingMax; 497 OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen 498 HashMap<String,ShardFacetCount> counts = new HashMap<String,ShardFacetCount>(128); 499 int termNum; 500 String queryPrefix; 501 502 int initialLimit; // how many terms requested in first phase 503 boolean needRefinements; 504 ShardFacetCount[] countSorted; 505 506 DistribFieldFacet(ResponseBuilder rb, String field) { 507 sf = rb.req.getSchema().getField(field); 508 missingMax = new long[rb.shards.length]; 509 counted = new OpenBitSet[rb.shards.length]; 510 queryPrefix = "{!field f=" + field + '}'; 511 } 512 513 void add(int shardNum, NamedList shardCounts, int numRequested) { 514 int sz = shardCounts.size(); 515 int numReceived = sz; 516 517 OpenBitSet terms = new OpenBitSet(termNum+sz); 518 519 long last = 0; 520 for (int i=0; i<sz; i++) { 521 String name = shardCounts.getName(i); 522 long count = ((Number)shardCounts.getVal(i)).longValue(); 523 if (name == null) { 524 missingCount += count; 525 numReceived--; 526 } else { 527 ShardFacetCount sfc = counts.get(name); 528 if (sfc == null) { 529 sfc = new ShardFacetCount(); 530 sfc.name = name; 531 sfc.termNum = termNum++; 532 counts.put(name, sfc); 533 } 534 sfc.count += count; 535 terms.fastSet(sfc.termNum); 536 last = count; 537 } 538 } 539 540 // the largest possible missing term is 0 if we received less 541 // than the number requested (provided mincount==0 like it should be for 542 // a shard request) 543 if (numRequested<0 || numRequested != 0 && numReceived < numRequested) { 544 last = 0; 545 } 546 547 missingMaxPossible += last; 548 missingMax[shardNum] = last; 549 counted[shardNum] = terms; 550 } 551 552 //对应块的比较 553 ShardFacetCount[] getSorted() { 554 ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]); 555 Arrays.sort(arr, new Comparator<ShardFacetCount>() { 556 public int compare(ShardFacetCount o1, ShardFacetCount o2) { 557 if (o2.count < o1.count) return -1; 558 else if (o1.count < o2.count) return 1; 559 // TODO: handle tiebreaks for types other than strings 560 return o1.name.compareTo(o2.name); 561 } 562 }); 563 countSorted = arr; 564 return arr; 565 } 566 567 String makeQuery(ShardFacetCount sfc) { 568 return queryPrefix + sfc.name; 569 } 570 571 // returns the max possible value this ShardFacetCount could have for this shard 572 // (assumes the shard did not report a count for this value) 573 long maxPossible(ShardFacetCount sfc, int shardNum) { 574 return missingMax[shardNum]; 575 // TODO: could store the last term in the shard to tell if this term 576 // comes before or after it. If it comes before, we could subtract 1 577 } 578 579 } 580 581 582 class ShardFacetCount { 583 String name; 584 long count; 585 int termNum; // term number starting at 0 (used in bit arrays) 586 587 public String toString() { 588 return "{term="+name+",termNum="+termNum+",count="+count+"}"; 589 } 590 }
要了解一点就是其分布式处理相关的类:ResponseBuilder内部</span>的几个状态,顾名思义。
1.STAGE_START
2.STAGE_PARSE_QUERY
3.STAGE_EXECUTE_QUERY
4.STAGE_GET_FIELDS
5.STAGE_DONE
对于DistribFieldFacet这个类,继承于FieldFacet,除了本身提取对应查询的参数能力外,还有一个特点就是存在一个HashMap对每个shard对应字段的计算有一个映射存储。
下次推出group by和solr parallel SQL的心得文章。