c++ 封装哈希表（Hash）以及实现迭代器（iterator）

最新推荐文章于 2024-08-09 22:42:55 发布

sumkee911

最新推荐文章于 2024-08-09 22:42:55 发布

阅读量3.5k

点赞数

分类专栏： c/c++ 文章标签： c++ c hash iterator 迭代器

本文链接：https://blog.csdn.net/sumkee911/article/details/50326439

版权

c/c++ 专栏收录该内容

6 篇文章 0 订阅

订阅专栏

程序功能:

1.封装哈希表(Hash)

2.以及实现了迭代器(iterator)

3.允许string类，整形变量，浮点形变量，字符型变量等作为key值

废话不多说，马上上源码

Hash.h

/*
 *  Name: Hash
 *  Date: 14-12-2015
 *  Author: Sumkee
 *  Brief: 哈希模组
 *         1.能把string类，整形，浮点形，字符型变量当作key
 *         2.而且还实现了属于自己得迭代器（iterator）
 *
 */ 

#ifndef __HASH_H_
#define __HASH_H_

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
#include <stdint.h>
#include <iterator>


using namespace std;


template<typename __Key, typename __Value>
class Hash {
public:
    enum {
        MaxKey = 10000000,    // 最大可存储数据
        MaxCharLenght = 128,  // 最长字符串
    };

    Hash();
    Hash(Hash<__Key, __Value> &src);
    ~Hash();

    // 访问函数
    inline size_t count() 
    {   return m_val_count; }
    
    inline size_t table_size() 
    {   return m_tb_size;   };   
    
    // 查看key值相对应得数据
    const __Value operator[](__Key key) 
    {   long index;
        if((index = __hash_keyindex_by_key(key, hash_keyindex(key))) > -1) return m_table[index];
        else return *m_empty;    }

    // 如上
    const __Value value(__Key key) 
    {   long index;
        if((index = __hash_keyindex_by_key(key, hash_keyindex(key))) > -1) return m_table[index];
        else return *m_empty;    }
    // 查看key值是否存在
    bool has_key(__Key key)
    {   return __hash_keyindex_by_key(key, hash_keyindex(key)) > -1;    }

    // 用数据逆向查看相对应的key值
    __Key key(__Value val);
    
    // 写入或修改函数
    void insert(__Key key, __Value val);
    void insert(pair<__Key, __Value> &key_and_val);
    void remove(__Key key);
    void operator=(Hash<__Key,__Value> &src);
    void clear();

    // 迭代器（用法和stl里提供的一样）
    class __Iterator;
    typedef __Iterator iterator;
    
    // 迭代器开始
    __Iterator begin() 
    {   int i=0;
        for(; i< m_tb_size && !m_is_setted[i]; ++i);
        return iterator(this, i);   }
    
    // 迭代器结束
    __Iterator end()
    {   return iterator(this, m_tb_size);   }

private:
    __Value *m_table;           // 哈希key值所对应数据表
    __Key *m_keys;              // 哈希key值所对应的所有key数据
    bool *m_is_setted;          // 查看m_table或m_keys的某下标是否已插入数据
    long m_tb_size;             // 哈希数据表大小，也就是m_table或m_keys的大小
    long m_val_count;           // 已插入数据总数
    __Value *m_oldtable;      
    __Key *m_oldkeys;
    bool *m_old_issetted;
    long m_old_tbsize;
    __Value *m_empty;           // 空的数据变量，用来返回空变量时用的

    /*  Name: hash_keyindex
     *  Brief: 透过哈希key值来获取数据所在位置的下标，也就是m_table[index]；
     *         不过这个函数主要判断key是否符合类型，除了下面得数据，其他会出错；
     *         它进一步获取下标是通过__hash_keyindex实现。
     *  return: 返回key值所对应的下标
     */
    long hash_keyindex(int8_t);
    long hash_keyindex(int16_t);
    long hash_keyindex(int32_t);
    long hash_keyindex(int64_t);
    long hash_keyindex(uint8_t);
    long hash_keyindex(uint16_t);
    long hash_keyindex(uint32_t);
    long hash_keyindex(uint64_t);
    long hash_keyindex(float);
    long hash_keyindex(double);
    long hash_keyindex(string &);
    long __hash_keyindex(u_char *first, size_t size);
    
    /*  Name:  __hash_null_keyindex 
     *  Brief: hash_keyindex是获取下标，不确定下标是否已经被占用；
     *         如果要确保下标是未被占用的，就需要调用hash_keyindex后，
     *         再调用__hash_null_keyindex。
     *  return: 返回key值所对应的下标
     */
    long __hash_null_keyindex(__Key &key, long key_index);

    /*  Name: __hash_keyindex_by_key
     *  Brief: 获取key值所在的下标，而且key值必须是存在的，也就是查找函数（value()，[]()）的核心。
     *         在调用此函数之前也需要调用hash_keyindex函数。
     *  return: 如果key值存在，就返回下标；否则返回-1
     */ 
    long __hash_keyindex_by_key(__Key &key, long key_index);

    /*  Name: reallocate
     *  Brief: 为m_tables, m_keys, m_is_setted等数据重新分配内存
     */ 
    void reallocate(long size);
    
    /*  Name: need_rehash, rehash
     *  Brief: need_rehash是判断是否有需要重新分配更大的内存，
     *         如果有需要得话它会调用reallocate，而reallocate在重新
     *         分配完内存后就会调用rehash将旧的数据重新用哈希的方法
     *         存放到新分配的内存地址，从而确保数据不丢失。
     */ 
    void need_rehash();
    void rehash();
    
    /*  Name: put_error
     *  Brief: 输出错误信息
     */ 
    void put_error(string str) 
    {   cout << "Hash:" << str << endl;  }
};

#endif

Hash.cpp

#include "hash.h"

template<typename __Key, typename __Value>
Hash<__Key, __Value>::Hash() : m_keys(0), m_table(0) 
,m_is_setted(0), m_tb_size(0), m_val_count(0), m_empty(0) {
    // 初始化，默认表大小为10
    reallocate(10);
}

template<typename __Key, typename __Value>
Hash<__Key, __Value>::Hash(Hash<__Key, __Value> &src) : m_keys(0), m_table(0)
,m_is_setted(0),m_tb_size(0),m_val_count(0), m_empty(0) {
    // 初始化，将别的Hash类复制过来
    reallocate(src.m_tb_size);
    for(int i=0; i<src.m_tb_size; ++i) {
        if(src.m_is_setted[i]) {
            insert(src.m_keys[i], src.m_table[i]);
        }
    }
}

template<typename __Key, typename __Value>
Hash<__Key, __Value>::~Hash() {
    // 释放内存
    clear();
}

template<typename __Key, typename __Value>
void 
Hash<__Key, __Value>::operator=(Hash<__Key,__Value> &src) {
    // 释放内存
    clear();

    // 将别的Hash类复制过来
    reallocate(src.m_tb_size);
    for(int i=0; i<src.m_tb_size; ++i) {
        if(src.m_is_setted[i]) {
            insert(src.m_keys[i], src.m_table[i]);
        }
    }
}

template<typename __Key, typename __Value>
void 
Hash<__Key, __Value>::clear() {
    if(m_keys) delete [] m_keys;
    if(m_table) delete [] m_table;
    if(m_is_setted) delete [] m_is_setted;
    if(m_empty) delete m_empty;
    m_keys = 0;
    m_table = 0;
    m_is_setted = 0;
    m_empty = 0;
    m_tb_size = 0;
    m_val_count = 0;
    reallocate(10);
}

template<typename __Key, typename __Value>
__Key 
Hash<__Key, __Value>::key(__Value val) {
    for(int i=0; i<m_tb_size; ++i) {
        if(m_is_setted[i]) {
            if(val == m_table[i]) {
                return m_keys[i];
            }
        }
    }
}

template<typename __Key, typename __Value>
long 
Hash<__Key,__Value>::hash_keyindex(int8_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(int8_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(int16_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(int16_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(int32_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(int32_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(int64_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(int64_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(uint8_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(uint8_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(uint16_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(uint16_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(uint32_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(uint32_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(uint64_t key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(uint64_t));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(float key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(float));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(double key) {
    u_char *p = reinterpret_cast<u_char*>(&key);
    return __hash_keyindex(p, sizeof(double));
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::hash_keyindex(string &key) {
    char tmp_data[MaxCharLenght] = {0}; 
    ::memcpy(tmp_data, key.data(), key.size());
    return __hash_keyindex((u_char*)tmp_data, key.size());
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::__hash_keyindex(u_char *first, size_t size) {
    // 这里就是哈希算法，透过key值来查找下标
    ulong result=0;
    while(size--) {
        result = *first++ ^ result*0x1579532586542ul;
        result *= 0x35715ul;
    }
    return result%m_tb_size; 
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::__hash_null_keyindex(__Key &key, long key_index) {
    while(1) {
        key_index = (key_index+1) % m_tb_size;
        if(!m_is_setted[key_index] || m_keys[key_index]==key) {
            return key_index;
        }
    }
}

template<typename __Key, typename __Value>
long 
Hash<__Key, __Value>::__hash_keyindex_by_key(__Key &key, long key_index) {
    long end = key_index;
    do {
        key_index = (key_index+1) % m_tb_size;
        if(m_is_setted[key_index] && m_keys[key_index]==key) {
            return key_index;
        } else if(!m_is_setted[key_index]) {
            break;
        }
    } while (key_index != end);
    return -1;
}

template<typename __Key, typename __Value>
void 
Hash<__Key, __Value>::reallocate(long size) {
    if(size > MaxKey) {
        put_error("too many key");
        return;
    }
        
    // 分配一个空数据
    if(m_empty == 0) {
        m_empty = new __Value;
    }

    // 保存旧数据 
    m_oldkeys = m_keys;
    m_oldtable = m_table;
    m_old_issetted = m_is_setted;
    m_old_tbsize = m_tb_size;

    // 申请新内存空间
    try {
        m_keys = new __Key[size];
        m_table = new __Value[size];
        m_is_setted = new bool[size];
        m_val_count = 0;

        if(!m_keys || !m_table || !m_is_setted) {
            throw "memory allocation error";
        }

        ::memset(m_is_setted, 0, size*sizeof(bool));
        m_tb_size = size;
    } catch(string err) {
        put_error(err);
    }

    // 将旧数据复制到新内存空间
    rehash();
    // 释放旧数据的内存空间
    delete [] m_oldkeys;
    delete [] m_oldtable;
    delete m_old_issetted;
    m_old_tbsize = 0;
}

template<typename __Key, typename __Value> 
void 
Hash<__Key, __Value>::need_rehash() {
    if(m_tb_size >= MaxKey) {
        return;
    }

    // 如果当前数据表空间的占用达到七成，就要重新分多一倍得空间出来
    double ratio = (double)m_val_count/m_tb_size;
    if(ratio >= 0.7) { 
        long size = m_tb_size*2;
        size = size >= MaxKey ? MaxKey : size;
        reallocate(size);
    }
}

template<typename __Key, typename __Value> 
void 
Hash<__Key, __Value>::rehash() {
    // 将旧数据复制新内存空间
    for(int i=0; i<m_old_tbsize; ++i) {
        if(m_old_issetted[i]) {
            insert(m_oldkeys[i],m_oldtable[i]);
        }
    }
}

template<typename __Key, typename __Value>
void 
Hash<__Key, __Value>::insert(__Key key, __Value val) {
    if(m_val_count >= MaxKey) {
        put_error("too many key");
        return;
    }

    // 判断是否需要重新分配内存
    need_rehash();

    // 插入
    long new_key_index = __hash_null_keyindex(key, hash_keyindex(key));
    if(m_keys[new_key_index] == key && m_is_setted[new_key_index]) {
        // 如果对应的key值已经存在，就修改对应得数据
        m_table[new_key_index] = val;
    } else {
        // 插入key值，数据，和将下标设为已占用
        m_table[new_key_index] = val;
        m_keys[new_key_index] = key;
        m_is_setted[new_key_index] = true;
        ++m_val_count;
    }
}

template<typename __Key, typename __Value>
void 
Hash<__Key, __Value>::insert(pair<__Key, __Value> &key_and_val) {
    insert(key_and_val.first, key_and_val.second);
}

template<typename __Key, typename __Value>
void 
Hash<__Key, __Value>::remove(__Key key) {
    // 删除key值对应的数据，其实就是将其下标设为未占用而已
    long key_index = __hash_keyindex_by_key(key, hash_keyindex(key));
    if(key_index != -1) {
        m_is_setted[key_index] = false;
        --m_val_count;
    }
}

// 这个就是迭代器的类
template<typename __Key, typename __Value>
class Hash<__Key, __Value>::__Iterator : 
public std::iterator<std::input_iterator_tag, std::pair<__Key, __Value> > {
    Hash<__Key, __Value> *phash;            // 指向hash类
    long hash_index;                        // 当前下标，指向key值和数据
    pair<__Key, __Value> hash_data;         // 将要返回的key值和数据塞进里面
public:
    __Iterator() : phash(0) , hash_index(0) {}
    __Iterator(Hash<__Key, __Value> *p, long i) : phash(p) , hash_index(i) {}

    // 前置递增， 将hash_index定位到下一个已存在得下标
    const __Iterator operator++() {
        int i;
        for(i=hash_index+1; i<phash->m_tb_size; ++i) { 
            if(phash->m_is_setted[i])  {
                break;
            }
        }
        hash_index = i;
        return *this;
    }
    // 后置递增， 也是将hash_index定位到下一个已存在得下标
    const __Iterator operator++(int) {
        __Iterator old(*this);
        int i;
        for(i=hash_index+1; i<phash->m_tb_size; ++i) { 
            if(phash->m_is_setted[i])  {
                break;
            }
        }
        hash_index = i;
        return old;
    }

    bool operator==(__Iterator iter) 
    { return (iter.phash==phash) && (iter.hash_index==iter.hash_index); }

    bool operator!=(__Iterator iter) 
    { return (iter.phash!=phash) || (hash_index!=iter.hash_index); }

    // 利用hash_index获取key值和数据，打包成pair类，最后返回
    const pair<__Key,__Value> operator*() 
    { return pair<__Key&, __Value&>(phash->m_keys[hash_index], phash->m_table[hash_index]); }
    
    const pair<__Key,__Value>* operator->() 
    {   hash_data.first = phash->m_keys[hash_index];
        hash_data.second = phash->m_table[hash_index];
        return &hash_data; }
};

/***************************************************/
// 测试
int main() {
    Hash<string, string> hash;
    hash.insert("a", "apple");
    hash.insert("b", "boy");
    hash.insert("c", "cat");
    hash.insert("d", "dog");
    hash.insert("f", "fire");

    // 查找
    cout << hash["a"] << endl;
    cout << hash["c"] << endl;
    cout << hash["f"] << endl;

    // 删除
    hash.remove("c");
    cout << "not cat:" << hash["c"] << endl; 

    // 利用迭代器遍历
    Hash<string,string>::iterator iter = hash.begin();
    for(; iter !=hash.end() ;++iter) {
        cout << iter->first << ":" << iter->second << endl;
    }
    return 0;
}

备注：本人编程新手，有错误请多多指点，谢谢。