DiskIndexQword_c--getDocNext()


// small bitvector of 256 elements.
class CSphSmallBitvec
{
public:
	static const int iTOTALBITS = 256;

private:
	typedef unsigned long ELTYPE;
	static const int iELEMBITS = sizeof ( ELTYPE ) * 8;
	static const int iBYTESIZE = iTOTALBITS / 8;
	static const int IELEMENTS = iTOTALBITS / iELEMBITS;
	static const ELTYPE uALLBITS = ~(0UL);
	STATIC_ASSERT ( IELEMENTS>=1, 8_BITS_MINIMAL_SIZE_OF_VECTOR );

private:
	ELTYPE m_dFieldsMask[IELEMENTS];

public:
	// no custom cstr and d-tor - to be usable from inside unions
	// deep copy for it is ok - so, no explicit copying constructor and operator=

	// old-fashion layer to work with DWORD (32-bit) mask.
	// all bits above 32 assumed to be unset.
	void Assign32 ( DWORD uMask )
	{
		Unset();
		m_dFieldsMask[0] = uMask;
	}

	DWORD GetMask32 () const
	{
		return (DWORD) ( m_dFieldsMask[0] & 0xFFFFFFFFUL );
	}

	// set n-th bit, or all
	void Set ( int iIdx=-1 )
	{
		assert ( iIdx < iTOTALBITS );
		if ( iIdx<0 )
		{
			for ( int i=0; i<IELEMENTS; i++ )
				m_dFieldsMask[i] = uALLBITS;
			return;
		}
		m_dFieldsMask[iIdx/iELEMBITS] |= 1UL << ( iIdx & ( iELEMBITS-1 ) );
	}

	// unset n-th bit, or all
	void Unset ( int iIdx=-1 )
	{
		assert ( iIdx < iTOTALBITS );
		if ( iIdx<0 )
		{
			for ( int i=0; i<IELEMENTS; i++ )
				m_dFieldsMask[i] = 0UL;
			return;
		}
		m_dFieldsMask[iIdx/iELEMBITS] &= ~(1UL << ( iIdx & ( iELEMBITS-1 ) ));
	}

	// test if n-th bit is set
	bool Test ( int iIdx ) const
	{
		assert ( iIdx>=0 && iIdx<iTOTALBITS );
		return ( m_dFieldsMask[iIdx/iELEMBITS] & ( 1UL << ( iIdx & ( iELEMBITS-1 ) ) ) )!=0;
	}

	// test the given mask (with &-operator)
	bool Test ( const CSphSmallBitvec& dParam ) const
	{
		for ( int i=0; i<IELEMENTS; i++ )
			if ( m_dFieldsMask[i] & dParam.m_dFieldsMask[i] )
				return true;
		return false;
	}

	// test if all bits are set or unset
	bool TestAll ( bool bSet=false ) const
	{
		ELTYPE uTest = bSet?uALLBITS:0;
		for ( int i=0; i<IELEMENTS; i++ )
			if ( m_dFieldsMask[i]!=uTest )
				return false;
		return true;
	}

	// returns number or set bits in low 32 DWORD
	unsigned short NumOfBits32 () const
	{
		return (unsigned short) sphBitCount ( GetMask32() );
	}

	friend CSphSmallBitvec operator & ( const CSphSmallBitvec& dFirst, const CSphSmallBitvec& dSecond );
	friend CSphSmallBitvec operator | ( const CSphSmallBitvec& dFirst, const CSphSmallBitvec& dSecond );
	friend bool operator == ( const CSphSmallBitvec& dFirst, const CSphSmallBitvec& dSecond );
	CSphSmallBitvec& operator |= ( const CSphSmallBitvec& dSecond )
	{
		if ( &dSecond!=this )
			for ( int i=0; i<IELEMENTS; i++ )
				m_dFieldsMask[i] |= dSecond.m_dFieldsMask[i];
		return *this;
	}

	// cut out all the bits over given number
	void LimitBits ( int iBits )
	{
		if ( iBits>=iTOTALBITS )
			return;

		int iMaskPos = iBits / iELEMBITS;
		ELTYPE uMask = ( 1UL << ( iBits % iELEMBITS ) ) - 1;
		m_dFieldsMask[iMaskPos++] &= uMask;
		for ( ; iMaskPos < IELEMENTS; iMaskPos++ )
			m_dFieldsMask[iMaskPos] = 0UL;
	}

	void Negate()
	{
		for ( int i=0; i<IELEMENTS; i++ )
			m_dFieldsMask[i] = ~m_dFieldsMask[i];
	}
};


/// term, searcher view
class ISphQword
{
public:
	// setup by query parser
	CSphString		m_sWord;		///< my copy of word
	CSphString		m_sDictWord;	///< word after being processed by dict (eg. stemmed)
	SphWordID_t		m_iWordID;		///< word ID, from dictionary
	int				m_iTermPos;
	int				m_iAtomPos;		///< word position, from query
	bool			m_bExpanded;	///< added by prefix expansion
	bool			m_bExcluded;	///< excluded by the query (rval to operator NOT)

	// setup by QwordSetup()
	int				m_iDocs;		///< document count, from wordlist
	int				m_iHits;		///< hit count, from wordlist
	bool			m_bHasHitlist;	///< hitlist presence flag

	// iterator state
	CSphSmallBitvec m_dFields;		///< current match fields
	bool			m_bAllFieldsKnown; ///< whether the all match fields is known, or only low 32.
	DWORD			m_uMatchHits;	///< current match hits count
	SphOffset_t		m_iHitlistPos;	///< current position in hitlist, from doclist

public:
	ISphQword ()
		: m_iWordID ( 0 )
		, m_iTermPos ( 0 )
		, m_iAtomPos ( 0 )
		, m_bExpanded ( false )
		, m_bExcluded ( false )
		, m_iDocs ( 0 )
		, m_iHits ( 0 )
		, m_bHasHitlist ( true )
		, m_bAllFieldsKnown ( false )
		, m_uMatchHits ( 0 )
		, m_iHitlistPos ( 0 )
	{
		m_dFields.Unset();
	}
	virtual ~ISphQword () {}

	virtual const CSphMatch &	GetNextDoc ( DWORD * pInlineDocinfo ) = 0;
	virtual void				SeekHitlist ( SphOffset_t uOff ) = 0;
	virtual Hitpos_t			GetNextHit () = 0;
	virtual void				CollectHitMask ()
	{
		m_bAllFieldsKnown = true;
	}

	virtual void Reset ()
	{
		m_iDocs = 0;
		m_iHits = 0;
		m_dFields.Unset();
		m_bAllFieldsKnown = false;
		m_uMatchHits = 0;
		m_iHitlistPos = 0;
	}
};



/// query word from the searcher's point of view
class DiskIndexQwordTraits_c : public ISphQword
{
	static const int	MINIBUFFER_LEN = 1024;

public:
	SphOffset_t		m_uHitPosition;
	Hitpos_t		m_uInlinedHit;
	DWORD			m_uHitState;

	bool			m_bDupe;		///< whether the word occurs only once in current query

	CSphMatch		m_tDoc;			///< current match (partial)
	Hitpos_t		m_iHitPos;		///< current hit postition, from hitlist

	BYTE			m_dDoclistBuf [ MINIBUFFER_LEN ];
	BYTE			m_dHitlistBuf [ MINIBUFFER_LEN ];
	CSphReader		m_rdDoclist;	///< my doclist reader
	CSphReader		m_rdHitlist;	///< my hitlist reader

	SphDocID_t		m_iMinID;		///< min ID to fixup
	int				m_iInlineAttrs;	///< inline attributes count
	CSphRowitem *	m_pInlineFixup;	///< inline attributes fixup (POINTER TO EXTERNAL DATA, NOT MANAGED BY THIS CLASS!)

#ifndef NDEBUG
	bool			m_bHitlistOver;
#endif

public:
	explicit DiskIndexQwordTraits_c ( bool bUseMini, bool bExcluded )
		: m_uHitPosition ( 0 )
		, m_uHitState ( 0 )
		, m_bDupe ( false )
		, m_iHitPos ()
		, m_rdDoclist ( bUseMini ? m_dDoclistBuf : NULL, bUseMini ? MINIBUFFER_LEN : 0 )
		, m_rdHitlist ( bUseMini ? m_dHitlistBuf : NULL, bUseMini ? MINIBUFFER_LEN : 0 )
		, m_iMinID ( 0 )
		, m_iInlineAttrs ( 0 )
		, m_pInlineFixup ( NULL )
#ifndef NDEBUG
		, m_bHitlistOver ( true )
#endif
	{
		m_iHitPos = EMPTY_HIT;
		m_bExcluded = bExcluded;
	}
};


/// query word from the searcher's point of view
template < bool INLINE_HITS, bool INLINE_DOCINFO, bool DISABLE_HITLIST_SEEK >
class DiskIndexQword_c : public DiskIndexQwordTraits_c
{
public:
	explicit DiskIndexQword_c ( bool bUseMinibuffer, bool bExcluded )
		: DiskIndexQwordTraits_c ( bUseMinibuffer, bExcluded )
	{
	}

	virtual void Reset ()
	{
		m_uHitPosition = 0;
		m_uHitState = 0;
		m_rdDoclist.Reset ();
		m_rdHitlist.Reset ();
		ISphQword::Reset();
		m_iHitPos = EMPTY_HIT;
		m_iInlineAttrs = 0;
	}

	void GetHitlistEntry ()
	{
		assert ( !m_bHitlistOver );
		DWORD iDelta = m_rdHitlist.UnzipInt ();
		if ( iDelta )
		{
			m_iHitPos += iDelta;
		} else
		{
			m_iHitPos = EMPTY_HIT;
#ifndef NDEBUG
			m_bHitlistOver = true;
#endif
		}
	}

	virtual void CollectHitMask()
	{
		SeekHitlist ( m_iHitlistPos );
		for ( Hitpos_t uHit = GetNextHit(); uHit!=EMPTY_HIT; uHit = GetNextHit() )
			m_dFields.Set ( HITMAN::GetField ( uHit ) );
		m_bAllFieldsKnown = true;
	}

	virtual const CSphMatch & GetNextDoc ( DWORD * pDocinfo )
	{
		SphDocID_t iDelta = m_rdDoclist.UnzipDocid();
		if ( iDelta )
		{
			m_bAllFieldsKnown = false;
			m_tDoc.m_iDocID += iDelta;
			if ( INLINE_DOCINFO )
			{
				assert ( pDocinfo );
				for ( int i=0; i<m_iInlineAttrs; i++ )
					pDocinfo[i] = m_rdDoclist.UnzipInt() + m_pInlineFixup[i];
			}

			if ( INLINE_HITS )
			{
				m_uMatchHits = m_rdDoclist.UnzipInt();
				const DWORD uFirst = m_rdDoclist.UnzipInt();
				if ( m_uMatchHits==1 )
				{
					const DWORD uField = m_rdDoclist.UnzipInt(); // field and end marker
					m_iHitlistPos = uFirst | ( uField << 23 ) | ( U64C(1)<<63 );
					m_dFields.Unset();
					m_dFields.Set ( uField >> 1 );
					m_bAllFieldsKnown = true;
				} else
				{
					m_dFields.Assign32 ( uFirst );
					m_uHitPosition += m_rdDoclist.UnzipOffset();
					m_iHitlistPos = m_uHitPosition;
				}
			} else
			{
				SphOffset_t iDeltaPos = m_rdDoclist.UnzipOffset();
				assert ( iDeltaPos>=0 );

				m_iHitlistPos += iDeltaPos;

				m_dFields.Assign32 ( m_rdDoclist.UnzipInt() );
				m_uMatchHits = m_rdDoclist.UnzipInt();
			}
		} else
		{
			m_tDoc.m_iDocID = 0;
		}
		return m_tDoc;
	}

	virtual void SeekHitlist ( SphOffset_t uOff )
	{
		if ( uOff >> 63 )
		{
			m_uHitState = 1;
			m_uInlinedHit = (DWORD)uOff; // truncate high dword
		} else
		{
			m_uHitState = 0;
			m_iHitPos = EMPTY_HIT;
			if ( DISABLE_HITLIST_SEEK )
				assert ( m_rdHitlist.GetPos()==uOff ); // make sure we're where caller thinks we are.
			else
				m_rdHitlist.SeekTo ( uOff, READ_NO_SIZE_HINT );
		}
#ifndef NDEBUG
		m_bHitlistOver = false;
#endif
	}

	virtual Hitpos_t GetNextHit ()
	{
		assert ( m_bHasHitlist );
		switch ( m_uHitState )
		{
			case 0: // read hit from hitlist
				GetHitlistEntry ();
				return m_iHitPos;

			case 1: // return inlined hit
				m_uHitState = 2;
				return m_uInlinedHit;

			case 2: // return end-of-hitlist marker after inlined hit
				#ifndef NDEBUG
				m_bHitlistOver = true;
				#endif
				m_uHitState = 0;
				return EMPTY_HIT;
		}
		sphDie ( "INTERNAL ERROR: impossible hit emitter state" );
		return EMPTY_HIT;
	}
};



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值