// small bitvector of 256 elements.
class CSphSmallBitvec
{
public:
static const int iTOTALBITS = 256;
private:
typedef unsigned long ELTYPE;
static const int iELEMBITS = sizeof ( ELTYPE ) * 8;
static const int iBYTESIZE = iTOTALBITS / 8;
static const int IELEMENTS = iTOTALBITS / iELEMBITS;
static const ELTYPE uALLBITS = ~(0UL);
STATIC_ASSERT ( IELEMENTS>=1, 8_BITS_MINIMAL_SIZE_OF_VECTOR );
private:
ELTYPE m_dFieldsMask[IELEMENTS];
public:
// no custom cstr and d-tor - to be usable from inside unions
// deep copy for it is ok - so, no explicit copying constructor and operator=
// old-fashion layer to work with DWORD (32-bit) mask.
// all bits above 32 assumed to be unset.
void Assign32 ( DWORD uMask )
{
Unset();
m_dFieldsMask[0] = uMask;
}
DWORD GetMask32 () const
{
return (DWORD) ( m_dFieldsMask[0] & 0xFFFFFFFFUL );
}
// set n-th bit, or all
void Set ( int iIdx=-1 )
{
assert ( iIdx < iTOTALBITS );
if ( iIdx<0 )
{
for ( int i=0; i<IELEMENTS; i++ )
m_dFieldsMask[i] = uALLBITS;
return;
}
m_dFieldsMask[iIdx/iELEMBITS] |= 1UL << ( iIdx & ( iELEMBITS-1 ) );
}
// unset n-th bit, or all
void Unset ( int iIdx=-1 )
{
assert ( iIdx < iTOTALBITS );
if ( iIdx<0 )
{
for ( int i=0; i<IELEMENTS; i++ )
m_dFieldsMask[i] = 0UL;
return;
}
m_dFieldsMask[iIdx/iELEMBITS] &= ~(1UL << ( iIdx & ( iELEMBITS-1 ) ));
}
// test if n-th bit is set
bool Test ( int iIdx ) const
{
assert ( iIdx>=0 && iIdx<iTOTALBITS );
return ( m_dFieldsMask[iIdx/iELEMBITS] & ( 1UL << ( iIdx & ( iELEMBITS-1 ) ) ) )!=0;
}
// test the given mask (with &-operator)
bool Test ( const CSphSmallBitvec& dParam ) const
{
for ( int i=0; i<IELEMENTS; i++ )
if ( m_dFieldsMask[i] & dParam.m_dFieldsMask[i] )
return true;
return false;
}
// test if all bits are set or unset
bool TestAll ( bool bSet=false ) const
{
ELTYPE uTest = bSet?uALLBITS:0;
for ( int i=0; i<IELEMENTS; i++ )
if ( m_dFieldsMask[i]!=uTest )
return false;
return true;
}
// returns number or set bits in low 32 DWORD
unsigned short NumOfBits32 () const
{
return (unsigned short) sphBitCount ( GetMask32() );
}
friend CSphSmallBitvec operator & ( const CSphSmallBitvec& dFirst, const CSphSmallBitvec& dSecond );
friend CSphSmallBitvec operator | ( const CSphSmallBitvec& dFirst, const CSphSmallBitvec& dSecond );
friend bool operator == ( const CSphSmallBitvec& dFirst, const CSphSmallBitvec& dSecond );
CSphSmallBitvec& operator |= ( const CSphSmallBitvec& dSecond )
{
if ( &dSecond!=this )
for ( int i=0; i<IELEMENTS; i++ )
m_dFieldsMask[i] |= dSecond.m_dFieldsMask[i];
return *this;
}
// cut out all the bits over given number
void LimitBits ( int iBits )
{
if ( iBits>=iTOTALBITS )
return;
int iMaskPos = iBits / iELEMBITS;
ELTYPE uMask = ( 1UL << ( iBits % iELEMBITS ) ) - 1;
m_dFieldsMask[iMaskPos++] &= uMask;
for ( ; iMaskPos < IELEMENTS; iMaskPos++ )
m_dFieldsMask[iMaskPos] = 0UL;
}
void Negate()
{
for ( int i=0; i<IELEMENTS; i++ )
m_dFieldsMask[i] = ~m_dFieldsMask[i];
}
};
/// term, searcher view
class ISphQword
{
public:
// setup by query parser
CSphString m_sWord; ///< my copy of word
CSphString m_sDictWord; ///< word after being processed by dict (eg. stemmed)
SphWordID_t m_iWordID; ///< word ID, from dictionary
int m_iTermPos;
int m_iAtomPos; ///< word position, from query
bool m_bExpanded; ///< added by prefix expansion
bool m_bExcluded; ///< excluded by the query (rval to operator NOT)
// setup by QwordSetup()
int m_iDocs; ///< document count, from wordlist
int m_iHits; ///< hit count, from wordlist
bool m_bHasHitlist; ///< hitlist presence flag
// iterator state
CSphSmallBitvec m_dFields; ///< current match fields
bool m_bAllFieldsKnown; ///< whether the all match fields is known, or only low 32.
DWORD m_uMatchHits; ///< current match hits count
SphOffset_t m_iHitlistPos; ///< current position in hitlist, from doclist
public:
ISphQword ()
: m_iWordID ( 0 )
, m_iTermPos ( 0 )
, m_iAtomPos ( 0 )
, m_bExpanded ( false )
, m_bExcluded ( false )
, m_iDocs ( 0 )
, m_iHits ( 0 )
, m_bHasHitlist ( true )
, m_bAllFieldsKnown ( false )
, m_uMatchHits ( 0 )
, m_iHitlistPos ( 0 )
{
m_dFields.Unset();
}
virtual ~ISphQword () {}
virtual const CSphMatch & GetNextDoc ( DWORD * pInlineDocinfo ) = 0;
virtual void SeekHitlist ( SphOffset_t uOff ) = 0;
virtual Hitpos_t GetNextHit () = 0;
virtual void CollectHitMask ()
{
m_bAllFieldsKnown = true;
}
virtual void Reset ()
{
m_iDocs = 0;
m_iHits = 0;
m_dFields.Unset();
m_bAllFieldsKnown = false;
m_uMatchHits = 0;
m_iHitlistPos = 0;
}
};
/// query word from the searcher's point of view
class DiskIndexQwordTraits_c : public ISphQword
{
static const int MINIBUFFER_LEN = 1024;
public:
SphOffset_t m_uHitPosition;
Hitpos_t m_uInlinedHit;
DWORD m_uHitState;
bool m_bDupe; ///< whether the word occurs only once in current query
CSphMatch m_tDoc; ///< current match (partial)
Hitpos_t m_iHitPos; ///< current hit postition, from hitlist
BYTE m_dDoclistBuf [ MINIBUFFER_LEN ];
BYTE m_dHitlistBuf [ MINIBUFFER_LEN ];
CSphReader m_rdDoclist; ///< my doclist reader
CSphReader m_rdHitlist; ///< my hitlist reader
SphDocID_t m_iMinID; ///< min ID to fixup
int m_iInlineAttrs; ///< inline attributes count
CSphRowitem * m_pInlineFixup; ///< inline attributes fixup (POINTER TO EXTERNAL DATA, NOT MANAGED BY THIS CLASS!)
#ifndef NDEBUG
bool m_bHitlistOver;
#endif
public:
explicit DiskIndexQwordTraits_c ( bool bUseMini, bool bExcluded )
: m_uHitPosition ( 0 )
, m_uHitState ( 0 )
, m_bDupe ( false )
, m_iHitPos ()
, m_rdDoclist ( bUseMini ? m_dDoclistBuf : NULL, bUseMini ? MINIBUFFER_LEN : 0 )
, m_rdHitlist ( bUseMini ? m_dHitlistBuf : NULL, bUseMini ? MINIBUFFER_LEN : 0 )
, m_iMinID ( 0 )
, m_iInlineAttrs ( 0 )
, m_pInlineFixup ( NULL )
#ifndef NDEBUG
, m_bHitlistOver ( true )
#endif
{
m_iHitPos = EMPTY_HIT;
m_bExcluded = bExcluded;
}
};
/// query word from the searcher's point of view
template < bool INLINE_HITS, bool INLINE_DOCINFO, bool DISABLE_HITLIST_SEEK >
class DiskIndexQword_c : public DiskIndexQwordTraits_c
{
public:
explicit DiskIndexQword_c ( bool bUseMinibuffer, bool bExcluded )
: DiskIndexQwordTraits_c ( bUseMinibuffer, bExcluded )
{
}
virtual void Reset ()
{
m_uHitPosition = 0;
m_uHitState = 0;
m_rdDoclist.Reset ();
m_rdHitlist.Reset ();
ISphQword::Reset();
m_iHitPos = EMPTY_HIT;
m_iInlineAttrs = 0;
}
void GetHitlistEntry ()
{
assert ( !m_bHitlistOver );
DWORD iDelta = m_rdHitlist.UnzipInt ();
if ( iDelta )
{
m_iHitPos += iDelta;
} else
{
m_iHitPos = EMPTY_HIT;
#ifndef NDEBUG
m_bHitlistOver = true;
#endif
}
}
virtual void CollectHitMask()
{
SeekHitlist ( m_iHitlistPos );
for ( Hitpos_t uHit = GetNextHit(); uHit!=EMPTY_HIT; uHit = GetNextHit() )
m_dFields.Set ( HITMAN::GetField ( uHit ) );
m_bAllFieldsKnown = true;
}
virtual const CSphMatch & GetNextDoc ( DWORD * pDocinfo )
{
SphDocID_t iDelta = m_rdDoclist.UnzipDocid();
if ( iDelta )
{
m_bAllFieldsKnown = false;
m_tDoc.m_iDocID += iDelta;
if ( INLINE_DOCINFO )
{
assert ( pDocinfo );
for ( int i=0; i<m_iInlineAttrs; i++ )
pDocinfo[i] = m_rdDoclist.UnzipInt() + m_pInlineFixup[i];
}
if ( INLINE_HITS )
{
m_uMatchHits = m_rdDoclist.UnzipInt();
const DWORD uFirst = m_rdDoclist.UnzipInt();
if ( m_uMatchHits==1 )
{
const DWORD uField = m_rdDoclist.UnzipInt(); // field and end marker
m_iHitlistPos = uFirst | ( uField << 23 ) | ( U64C(1)<<63 );
m_dFields.Unset();
m_dFields.Set ( uField >> 1 );
m_bAllFieldsKnown = true;
} else
{
m_dFields.Assign32 ( uFirst );
m_uHitPosition += m_rdDoclist.UnzipOffset();
m_iHitlistPos = m_uHitPosition;
}
} else
{
SphOffset_t iDeltaPos = m_rdDoclist.UnzipOffset();
assert ( iDeltaPos>=0 );
m_iHitlistPos += iDeltaPos;
m_dFields.Assign32 ( m_rdDoclist.UnzipInt() );
m_uMatchHits = m_rdDoclist.UnzipInt();
}
} else
{
m_tDoc.m_iDocID = 0;
}
return m_tDoc;
}
virtual void SeekHitlist ( SphOffset_t uOff )
{
if ( uOff >> 63 )
{
m_uHitState = 1;
m_uInlinedHit = (DWORD)uOff; // truncate high dword
} else
{
m_uHitState = 0;
m_iHitPos = EMPTY_HIT;
if ( DISABLE_HITLIST_SEEK )
assert ( m_rdHitlist.GetPos()==uOff ); // make sure we're where caller thinks we are.
else
m_rdHitlist.SeekTo ( uOff, READ_NO_SIZE_HINT );
}
#ifndef NDEBUG
m_bHitlistOver = false;
#endif
}
virtual Hitpos_t GetNextHit ()
{
assert ( m_bHasHitlist );
switch ( m_uHitState )
{
case 0: // read hit from hitlist
GetHitlistEntry ();
return m_iHitPos;
case 1: // return inlined hit
m_uHitState = 2;
return m_uInlinedHit;
case 2: // return end-of-hitlist marker after inlined hit
#ifndef NDEBUG
m_bHitlistOver = true;
#endif
m_uHitState = 0;
return EMPTY_HIT;
}
sphDie ( "INTERNAL ERROR: impossible hit emitter state" );
return EMPTY_HIT;
}
};