nvm，nvd生成和分数计算

最新推荐文章于 2024-07-19 17:37:28 发布

chuanyangwang

最新推荐文章于 2024-07-19 17:37:28 发布

阅读量163

点赞数

分类专栏： ES 文章标签： java

本文链接：https://blog.csdn.net/chuanyangwang/article/details/121098275

版权

ES 专栏收录该内容

50 篇文章 0 订阅

订阅专栏

    public void finish() throws IOException {
      if (fieldInfo.omitsNorms() == false) {
        long normValue;
        if (invertState.length == 0) {
          // the field exists in this document, but it did not have
          // any indexed tokens, so we assign a default value of zero
          // to the norm
          normValue = 0;
        } else {
            // 用相似度方法计算 norm
          normValue = similarity.computeNorm(invertState);
          if (normValue == 0) {
            throw new IllegalStateException("Similarity " + similarity + " return 0 for non-empty field");
          }
        }
        norms.addValue(docState.docID, normValue);
      }

      termsHashPerField.finish();
    }

BM25计算的方式

  @Override
  public final long computeNorm(FieldInvertState state) {
    final int numTerms;
    if (state.getIndexOptions() == IndexOptions.DOCS && state.getIndexCreatedVersionMajor() >= 8) {
      numTerms = state.getUniqueTermCount();
    } else if (discountOverlaps) {
      numTerms = state.getLength() - state.getNumOverlap();
    } else {
      numTerms = state.getLength();
    }
    return SmallFloat.intToByte4(numTerms);
  }

  @Override
  public final SimScorer scorer(float boost, CollectionStatistics collectionStats, TermStatistics... termStats) {
    Explanation idf = termStats.length == 1 ? idfExplain(collectionStats, termStats[0]) : idfExplain(collectionStats, termStats);
    float avgdl = avgFieldLength(collectionStats);

    float[] cache = new float[256];
    // 计算出所有的情况，并缓存起来
    for (int i = 0; i < cache.length; i++) {
      cache[i] = 1f / (k1 * ((1 - b) + b * LENGTH_TABLE[i] / avgdl));
    }
    return new BM25Scorer(boost, k1, b, idf, avgdl, cache);
  }

计算分数：

    BM25Scorer(float boost, float k1, float b, Explanation idf, float avgdl, float[] cache) {
      this.boost = boost;
      this.idf = idf;
      this.avgdl = avgdl;
      this.k1 = k1;
      this.b = b;
      this.cache = cache;
      this.weight = boost * idf.getValue().floatValue();
    }

    @Override
    public float score(float freq, long encodedNorm) {
      // In order to guarantee monotonicity with both freq and norm without
      // promoting to doubles, we rewrite freq / (freq + norm) to
      // 1 - 1 / (1 + freq * 1/norm).
      // freq * 1/norm is guaranteed to be monotonic for both freq and norm due
      // to the fact that multiplication and division round to the nearest
      // float. And then monotonicity is preserved through composition via
      // x -> 1 + x and x -> 1 - 1/x.
      // Finally we expand weight * (1 - 1 / (1 + freq * 1/norm)) to
      // weight - weight / (1 + freq * 1/norm), which runs slightly faster.
      float normInverse = cache[((byte) encodedNorm) & 0xFF];
      return weight - weight / (1f + freq * normInverse);
    }


  /** Score the provided document assuming the given term document frequency.
   *  This method must be called on non-decreasing sequences of doc ids.
   *  @see SimScorer#score(float, long) */
  public float score(int doc, float freq) throws IOException {
    return scorer.score(freq, getNormValue(doc));
  }

  private long getNormValue(int doc) throws IOException {
    if (norms != null) {
      boolean found = norms.advanceExact(doc);
      assert found;
      return norms.longValue();
    } else {
      return 1L; // default norm
    }
  }



---------------

  private final NumericDocValues norms;

  /**
   * Sole constructor: Score documents of {@code reader} with {@code scorer}.
   */
  public LeafSimScorer(SimScorer scorer, LeafReader reader, String field, boolean needsScores) throws IOException {
    this.scorer = Objects.requireNonNull(scorer);
    norms = needsScores ? reader.getNormValues(field) : null;
  }





  @Override
  public final NumericDocValues getNormValues(String field) throws IOException {
    ensureOpen();
    FieldInfo fi = getFieldInfos().fieldInfo(field);
    if (fi == null || fi.hasNorms() == false) {
      // Field does not exist or does not index norms
      return null;
    }

    return getNormsReader().getNorms(fi);
  }



      if (coreFieldInfos.hasNorms()) {
        normsProducer = codec.normsFormat().normsProducer(segmentReadState);
        assert normsProducer != null;
      } else {
        normsProducer = null;
      }






  private final NormsFormat normsFormat = new Lucene80NormsFormat();

  @Override
  public final NormsFormat normsFormat() {
    return normsFormat;
  }







public class Lucene80NormsFormat extends NormsFormat {

  /** Sole Constructor */
  public Lucene80NormsFormat() {}
  
  @Override
  public NormsConsumer normsConsumer(SegmentWriteState state) throws IOException {
    return new Lucene80NormsConsumer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
  }

  @Override
  public NormsProducer normsProducer(SegmentReadState state) throws IOException {
    return new Lucene80NormsProducer(state, DATA_CODEC, DATA_EXTENSION, METADATA_CODEC, METADATA_EXTENSION);
  }
  
  private static final String DATA_CODEC = "Lucene80NormsData";
  private static final String DATA_EXTENSION = "nvd";
  private static final String METADATA_CODEC = "Lucene80NormsMetadata";
  private static final String METADATA_EXTENSION = "nvm";
  static final int VERSION_START = 0;
  static final int VERSION_CURRENT = VERSION_START;
}

chuanyangwang

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
nvm，nvd生成和分数计算

public void finish() throws IOException { if (fieldInfo.omitsNorms() == false) { long normValue; if (invertState.length == 0) { // the field exists in this document, but it did not have // any indexed tokens, ...
复制链接

扫一扫