关闭

快速查询大量数据的hash_table

829人阅读 评论(0) 收藏 举报

      记得有一次面试题目:求一图片中RGB值最多的那个值,当时我将RGB作为索引,将这个RGB值出现的次数作为值放到std::map中,百万级的数据插入显得有些慢,感觉不好,前些日子我要处理100W条数据,使用的hash_table,对hash_table有所研究,今天针对这个题目花了1个多小时又写了一个hash_table,将一图片中的所有RGB中插到hash_table,查询速度在毫秒级,很适合处理百万组的数据

头文件:

struct hashFun
{
 size_t operator()(int nVal) const
 {
  char szVal[64] = {0};
  _itoa(nVal, szVal, 10);
  
  const char* psz = szVal;
  
  unsigned long _hash = 0;
  for (; *psz != 0; ++psz)
  {
   _hash *= 16777619;
   _hash ^= (unsigned long) (unsigned short&)*psz;
  }
  return _hash;
 }
};

struct tableNode
{
 int    nValue;
 DWORD    nKey;
 int    nCount;            //这个元素的个数
 tableNode*  pNext;
};
struct BoxVec
{
 int     nCount;        //这个桶下的元素的类型的个数,可能不同的数在同一个桶下
 int     nMaxNum;       //这个桶下这个元素出现的次数
 int     nVal;
 tableNode*  pHead;
};

class CMyHasTable 
{
public:
 CMyHasTable(const int* pBoxSize, int nBoxCount, int nLevel);
 ~CMyHasTable();
private:
 int        m_nBoxCount;        //桶增长级数个数
 const int*       m_pBoxSize;         //桶级数的值
 
 int        m_nBox;           //桶的大小
 int        m_nLow;           //桶个数的最小值
 int        m_nHight;         //桶个数的最大值
 int        m_nLevel;         //当前桶级数的索引
 
 BoxVec*    m_pBoxs;
 int        m_nSize;         //元素的个数
public:
 void  Insert(int nVale);
 int   FindMaxCount();
private:
 inline DWORD GetBoxIndex(DWORD key) const
 {
  return key & (m_nBox - 1);
 }

 inline DWORD GetBoxIndex(DWORD key, int nNewBox) const
 {
  return key & (nNewBox - 1);
 }
private:
 tableNode* Find(tableNode node);
 void IncreaseLevel();
 void SwitchLevel(int nNewLevel);
 void UpBoxMaxCount(int nIndex);
};

.cpp文件

CMyHasTable::CMyHasTable(const int* pBoxSize, int nBoxCount, int nLevel)
{
   m_pBoxSize = pBoxSize;
   m_nBoxCount = nBoxCount;
   m_nLevel = nLevel;

   m_nHight = m_nBox = m_pBoxSize[m_nLevel];
   m_nLow = m_nLevel > 0 ? m_pBoxSize[m_nLevel - 1]/2 : 0;

   m_pBoxs = new BoxVec[m_nBox];
   memset(m_pBoxs, 0, sizeof(BoxVec) * m_nBox);

   m_nSize = 0;
}

CMyHasTable::~CMyHasTable()
{
  for (int i = 0; i < m_nBox; ++i)
 {
  tableNode* p = m_pBoxs[i].pHead;
  while (p != NULL)
  {
   tableNode* q = p->pNext;
   delete p;
   p = q;
  }
 }
 delete[] m_pBoxs;
}

void CMyHasTable::Insert(int nVale)
{
 tableNode valNode;
 valNode.nValue = nVale;
 valNode.nKey = hashFun()(nVale);
 valNode.pNext = NULL;

 tableNode* pNode = Find(valNode);
 if (pNode == NULL)
 {
   m_nSize ++;

         tableNode* pNew = new tableNode;
   *pNew = valNode;

   pNew->nCount = 1;
  
   DWORD dwBoxIndex = GetBoxIndex(valNode.nKey);
   pNew->pNext = m_pBoxs[dwBoxIndex].pHead;
   m_pBoxs[dwBoxIndex].pHead = pNew;

   m_pBoxs[dwBoxIndex].nCount++;
   m_pBoxs[dwBoxIndex].nMaxNum = 1;
   m_pBoxs[dwBoxIndex].nVal = nVale;

   if (m_nSize >= m_nHight)
   IncreaseLevel();

 }
 else
 {
  pNode->nCount++;
  DWORD dwBoxIndex = GetBoxIndex(pNode->nKey);
  UpBoxMaxCount(dwBoxIndex);  
 }

}

int CMyHasTable::FindMaxCount()
{
   int nMaxCount = m_pBoxs[0].nMaxNum;
   int nIndex = 0;
   for (int i = 1; i < m_nBox; i++)
   {
    if (nMaxCount < m_pBoxs[i].nMaxNum)
    {
     nMaxCount = m_pBoxs[i].nMaxNum;
     nIndex = i;
    }
   }
   return m_pBoxs[nIndex].nVal;
}


void CMyHasTable::UpBoxMaxCount(int nIndex)
{
    tableNode* p = m_pBoxs[nIndex].pHead;
 tableNode* q = p;
    int nMax = p->nCount;
 while(p->pNext)
 {
  if (nMax < p->nCount)
  {
   nMax = p->nCount;
   q = p;
  }
  p = p->pNext;
 }
 m_pBoxs[nIndex].nMaxNum = nMax;
 m_pBoxs[nIndex].nVal = q->nValue;
}

tableNode* CMyHasTable::Find(tableNode node)
{
 DWORD short_key = GetBoxIndex(node.nKey);
 tableNode* p = m_pBoxs[short_key].pHead;

 while (p != NULL)
 {
  if (p->nKey == node.nKey)
  {
   if (p->nValue == node.nValue)
    return p;
  }
  p = p->pNext;
 }
 return NULL;
}


void CMyHasTable::IncreaseLevel()
{
  if (m_nLevel < m_nBoxCount - 1)
   SwitchLevel(m_nLevel + 1);
  else
   m_nHight = 0x20000000;
}

void CMyHasTable::SwitchLevel(int nNewLevel)
{
    m_nLevel = nNewLevel;
 int nNewBox = m_pBoxSize[m_nLevel];
 BoxVec* pNewBoxs = new BoxVec[nNewBox]; 
 memset(pNewBoxs, 0, sizeof(BoxVec) * nNewBox);


    for (int i = 0; i < m_nBox; ++i)
 {
  tableNode* p = m_pBoxs[i].pHead;
  tableNode*  q = NULL;;
        int nCout = m_pBoxs[i].nCount;
  int nMax = m_pBoxs[i].nMaxNum;
  while (p != NULL)
  {
   q = p->pNext;
   DWORD sht_key = GetBoxIndex(p->nKey, nNewBox);
   p->pNext = pNewBoxs[sht_key].pHead;
   pNewBoxs[sht_key].pHead = p;
   p = q;

   pNewBoxs[sht_key].nCount = nCout;
   pNewBoxs[sht_key].nMaxNum = nMax;

  }
 }
 
 m_nHight = m_nBox = m_pBoxSize[m_nLevel];
 m_nLow = m_nLevel > 0 ? m_pBoxSize[m_nLevel - 1]/2 : 0;
 
 delete m_pBoxs;
 m_pBoxs = pNewBoxs;
}

0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:33902次
    • 积分:566
    • 等级:
    • 排名:千里之外
    • 原创:21篇
    • 转载:3篇
    • 译文:0篇
    • 评论:7条
    最新评论