Lucene 源码分析集------- BitSet & OpenBitSet

这里面主要给boost库里面的dynamic_bitset提供一层封装,主要是区间处理的时候,还有越界时resize,判断是否相等等

class LPPAPI BitSet : public LuceneObject {
public:
    BitSet(uint32_t size = 0);
    virtual ~BitSet();

    LUCENE_CLASS(BitSet);

protected:
    typedef boost::dynamic_bitset<uint64_t> bitset_type;
    bitset_type bitSet;

public:
    const uint64_t* getBits();
    void clear();
    void clear(uint32_t bitIndex);
    void fastClear(uint32_t bitIndex);
    void clear(uint32_t fromIndex, uint32_t toIndex);
    void fastClear(uint32_t fromIndex, uint32_t toIndex);
    void set(uint32_t bitIndex);
    void fastSet(uint32_t bitIndex);
    void set(uint32_t bitIndex, bool value);
    void fastSet(uint32_t bitIndex, bool value);
    void set(uint32_t fromIndex, uint32_t toIndex);
    void fastSet(uint32_t fromIndex, uint32_t toIndex);
    void set(uint32_t fromIndex, uint32_t toIndex, bool value);
    void fastSet(uint32_t fromIndex, uint32_t toIndex, bool value);
    void flip(uint32_t bitIndex);
    void fastFlip(uint32_t bitIndex);
    void flip(uint32_t fromIndex, uint32_t toIndex);
    void fastFlip(uint32_t fromIndex, uint32_t toIndex);
    uint32_t size() const;
    uint32_t numBlocks() const;
    bool isEmpty() const;
    bool get(uint32_t bitIndex) const;
    bool fastGet(uint32_t bitIndex) const;
    int32_t nextSetBit(uint32_t fromIndex) const;
    void _and(const BitSetPtr& set);
    void _or(const BitSetPtr& set);
    void _xor(const BitSetPtr& set);
    void andNot(const BitSetPtr& set);
    bool intersectsBitSet(const BitSetPtr& set) const;
    uint32_t cardinality();
    void resize(uint32_t size);

    virtual bool equals(const LuceneObjectPtr& other);
    virtual int32_t hashCode();
    virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr());
}

 

OpenBitSet 相对来说提供更多操作接口访问存储的bits数据

class LPPAPI OpenBitSet : public DocIdSet {
public:
    /// Constructs an OpenBitSet large enough to hold numBits.
    OpenBitSet(int64_t numBits = 64);

    /// Constructs an OpenBitSet from an existing LongArray.
    ///
    /// The first 64 bits are in long[0], with bit index 0 at the least significant bit, and bit
    /// index 63 at the most significant.  Given a bit index, the word containing it is long[index/64],
    /// and it is at bit number index%64 within that word.
    ///
    /// numWords are the number of elements in the array that contain set bits (non-zero longs).
    /// numWords should be <= bits.length(), and any existing words in the array at position >=
    /// numWords should be zero.
    OpenBitSet(LongArray bits, int32_t numWords);

    virtual ~OpenBitSet();

    LUCENE_CLASS(OpenBitSet);

protected:
    LongArray bits;
    int32_t wlen; // number of words (elements) used in the array

public:
    virtual DocIdSetIteratorPtr iterator();

    /// This DocIdSet implementation is cacheable.
    virtual bool isCacheable();

    /// Returns the current capacity in bits (1 greater than the index of the last bit)
    int64_t capacity();

    /// Returns the current capacity of this set.  Included for compatibility.  This is *not*
    /// equal to {@link #cardinality}
    int64_t size();

    /// Returns true if there are no set bits
    bool isEmpty();

    /// Returns the long[] storing the bits
    LongArray getBits();

    /// Sets a new long[] to use as the bit storage
    void setBits(LongArray bits);

    /// Gets the number of longs in the array that are in use
    int32_t getNumWords();

    /// Sets the number of longs in the array that are in use
    void setNumWords(int32_t numWords);

    /// Returns true or false for the specified bit index.
    bool get(int32_t index);

    /// Returns true or false for the specified bit index.
    /// The index should be less than the OpenBitSet size
    bool fastGet(int32_t index);

    /// Returns true or false for the specified bit index
    bool get(int64_t index);

    /// Returns true or false for the specified bit index.
    /// The index should be less than the OpenBitSet size.
    bool fastGet(int64_t index);

    /// Returns 1 if the bit is set, 0 if not.
    /// The index should be less than the OpenBitSet size
    int32_t getBit(int32_t index);

    /// Sets a bit, expanding the set size if necessary
    void set(int64_t index);

    /// Sets the bit at the specified index.
    /// The index should be less than the OpenBitSet size.
    void fastSet(int32_t index);

    /// Sets the bit at the specified index.
    /// The index should be less than the OpenBitSet size.
    void fastSet(int64_t index);

    /// Sets a range of bits, expanding the set size if necessary
    /// @param startIndex lower index
    /// @param endIndex one-past the last bit to set
    void set(int64_t startIndex, int64_t endIndex);

    /// Clears a bit.
    /// The index should be less than the OpenBitSet size.
    void fastClear(int32_t index);

    /// Clears a bit.
    /// The index should be less than the OpenBitSet size.
    void fastClear(int64_t index);

    /// Clears a bit, allowing access beyond the current set size without changing the size.
    void clear(int64_t index);

    /// Clears a range of bits.  Clearing past the end does not change the size of the set.
    /// @param startIndex lower index
    /// @param endIndex one-past the last bit to clear
    void clear(int32_t startIndex, int32_t endIndex);

    /// Clears a range of bits.  Clearing past the end does not change the size of the set.
    /// @param startIndex lower index
    /// @param endIndex one-past the last bit to clear
    void clear(int64_t startIndex, int64_t endIndex);

    /// Sets a bit and returns the previous value.
    /// The index should be less than the OpenBitSet size.
    bool getAndSet(int32_t index);

    /// Sets a bit and returns the previous value.
    /// The index should be less than the OpenBitSet size.
    bool getAndSet(int64_t index);

    /// Flips a bit.
    /// The index should be less than the OpenBitSet size.
    void fastFlip(int32_t index);

    /// Flips a bit.
    /// The index should be less than the OpenBitSet size.
    void fastFlip(int64_t index);

    /// Flips a bit, expanding the set size if necessary
    void flip(int64_t index);

    /// Flips a bit and returns the resulting bit value.
    /// The index should be less than the OpenBitSet size.
    bool flipAndGet(int32_t index);

    /// Flips a bit and returns the resulting bit value.
    /// The index should be less than the OpenBitSet size.
    bool flipAndGet(int64_t index);

    /// Flips a range of bits, expanding the set size if necessary
    /// @param startIndex lower index
    /// @param endIndex one-past the last bit to flip
    void flip(int64_t startIndex, int64_t endIndex);

    /// @return the number of set bits
    int64_t cardinality();

    /// Returns the popcount or cardinality of the intersection of the two sets.
    /// Neither set is modified.
    static int64_t intersectionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b);

    /// Returns the popcount or cardinality of the union of the two sets.
    /// Neither set is modified.
    static int64_t unionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b);

    /// Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))".
    /// Neither set is modified.
    static int64_t andNotCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b);

    /// Returns the popcount or cardinality of the exclusive-or of the two sets.
    /// Neither set is modified.
    static int64_t xorCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b);

    /// Returns the index of the first set bit starting at the index specified.
    /// -1 is returned if there are no more set bits.
    int32_t nextSetBit(int32_t index);

    /// Returns the index of the first set bit starting at the index specified.
    /// -1 is returned if there are no more set bits.
    int64_t nextSetBit(int64_t index);

    virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr());

    /// this = this AND other
    void intersect(const OpenBitSetPtr& other);

    /// this = this OR other
    void _union(const OpenBitSetPtr& other);

    /// Remove all elements set in other. this = this AND_NOT other
    void remove(const OpenBitSetPtr& other);

    /// this = this XOR other
    void _xor(const OpenBitSetPtr& other);

    /// see {@link intersect}
    void _and(const OpenBitSetPtr& other);

    /// see {@link union}
    void _or(const OpenBitSetPtr& other);

    /// see {@link remove}
    void andNot(const OpenBitSetPtr& other);

    /// Returns true if the sets have any elements in common
    bool intersects(const OpenBitSetPtr& other);

    /// Expand the LongArray with the size given as a number of words (64 bit longs).
    /// getNumWords() is unchanged by this call.
    void ensureCapacityWords(int32_t numWords);

    /// Ensure that the LongArray is big enough to hold numBits, expanding it if necessary.
    /// getNumWords() is unchanged by this call.
    void ensureCapacity(int64_t numBits);

    /// Lowers numWords, the number of words in use, by checking for trailing zero words.
    void trimTrailingZeros();

    /// Returns the number of 64 bit words it would take to hold numBits.
    static int32_t bits2words(int64_t numBits);

    /// Returns true if both sets have the same bits set
    virtual bool equals(const LuceneObjectPtr& other);

    virtual int32_t hashCode();

protected:
    int32_t expandingWordNum(int64_t index);
}
Lucene是一个全文检索引擎,它的核心数据结构包括倒排索引和正排索引。其中,倒排索引是Lucene最重要的数据结构之一,它通过将文档中的每个词都映射到包含该词的文档列表来实现快速的文本搜索。 Lucene中的Term Dictionary和Term Index是倒排索引中的两个重要组成部分。Term Dictionary用于存储所有唯一的词项(term),而Term Index则用于快速定位某个词项的位置。 在Lucene中,Term Dictionary和Term Index通常存储在磁盘上。Term Dictionary通常使用一种称为Trie树的数据结构来实现。Trie树是一种树形数据结构,它可以快速地查找某个字符串是否存在,以及在字符串合中查找前缀匹配的字符串。 Term Index则通常存储在一个称为倒排索引表(Inverted Index Table)的结构中。倒排索引表是由一系列的倒排索引条目(Inverted Index Entry)组成的,每个倒排索引条目包含了一个词项及其在倒排索引中的位置信息,例如该词项在文档列表中出现的次数、该词项在哪些文档中出现等。 当进行文本搜索时,Lucene会首先在Term Dictionary中查找搜索关键词是否存在,然后通过Term Index快速定位到包含该词的文档列表,最后根据文档列表中的文档ID查找正排索引中具体的文档内容。这种基于倒排索引的搜索方式可以实现非常高效的文本搜索,是Lucene等全文检索引擎的核心技术之一。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值