技术在于交流、沟通,本文为博主原创文章转载请注明出处并保持作品的完整性
在前面我介绍过hash的使用,本次主要介绍一下Hash Function
Hash Function即获得hash code的函数,根据其获得的hash code放到指定的bucket中,那么为了保证其hash的效率我们应尽量避免碰撞,所以hash Function所产生的hash code应足够的乱
下面介绍一个万用的hash function及其测试代码
首先我们创建一个客户类,它有三个成员变量 姓,名,年龄
class Customer { public: string mFirstName; string mLastName; string mAge; Customer(string firstName, string lastName, string age):mFirstName(firstName),mLastName(lastName),mAge(age){} };
下面我们来创建hash function
class CustomerHash { public: std::size_t operator()(const Customer& c) const { return hash_val(c.mFirstName, c.mLastName, c.mAge); } template <typename... Types> size_t hash_val(const Types&... args)const { size_t seed = 0;//seed 为需要返回的hash code hash_value(seed, args...);//C++11 新语法 我在http://www.cnblogs.com/LearningTheLoad/p/7208680.html中有介绍 return seed; } template <typename T, typename... Types> void hash_value(size_t& seed, const T& firstArg, const Types&... args) const { hash_combine(seed, firstArg); hash_value(seed, args...); } template <typename T> void hash_value(size_t& seed, const T& val) const //参数仅剩一个时 { hash_combine(seed, val); } template<typename T> void hash_combine(size_t& seed, const T& val) const { seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2); //0x9e3779b9 黄金分割比例 } };
测试代码
int main(int argc, char *argv[]) { unordered_multiset<Customer, CustomerHash> set; set.insert(Customer("a", "b", "1")); set.insert(Customer("c", "d", "2")); set.insert(Customer("e", "f", "3")); set.insert(Customer("g", "h", "4")); int myBucket_count = set.bucket_count();//返回有多少个篮子 cout << set.bucket_count() << endl; CustomerHash hh; cout << "bucket postion of " << hh(Customer("a", "b", "1")) %myBucket_count << endl;//取余后 得出落在哪个篮子上 cout << "bucket postion of " << hh(Customer("c", "d", "2")) %myBucket_count << endl; cout << "bucket postion of " << hh(Customer("e", "f", "3")) %myBucket_count << endl; cout << "bucket postion of " << hh(Customer("a", "b", "4")) %myBucket_count << endl; for (int i = 0; i< myBucket_count; i++) {//检测落在哪个篮子上 cout << "bucket at #: " << i << "has: " << set.bucket_size(i) << endl; } return 0; }
测试结果
全部测试代码
#include <iostream> #include <unordered_set> using namespace std; class Customer { public: string mFirstName; string mLastName; string mAge; Customer(string firstName, string lastName, string age):mFirstName(firstName),mLastName(lastName),mAge(age){} operator ==(const Customer& c) const { return (mFirstName == c.mFirstName && mLastName == c.mLastName && mAge == c.mAge); } }; class CustomerHash { public: std::size_t operator()(const Customer& c) const { return hash_val(c.mFirstName, c.mLastName, c.mAge); } template <typename... Types> size_t hash_val(const Types&... args)const { size_t seed = 0; hash_value(seed, args...); return seed; } template <typename T, typename... Types> void hash_value(size_t& seed, const T& firstArg, const Types&... args) const { hash_combine(seed, firstArg); hash_value(seed, args...); } template <typename T> void hash_value(size_t& seed, const T& val) const { hash_combine(seed, val); } template<typename T> void hash_combine(size_t& seed, const T& val) const { seed ^= std::hash<T>()(val) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } }; int main(int argc, char *argv[]) { unordered_multiset<Customer, CustomerHash> set; set.insert(Customer("a", "b", "1")); set.insert(Customer("c", "d", "2")); set.insert(Customer("e", "f", "3")); set.insert(Customer("g", "h", "4")); int myBucket_count = set.bucket_count(); cout << set.bucket_count() << endl; CustomerHash hh; cout << "bucket postion of " << hh(Customer("a", "b", "1")) %myBucket_count << endl; cout << "bucket postion of " << hh(Customer("c", "d", "2")) %myBucket_count << endl; cout << "bucket postion of " << hh(Customer("e", "f", "3")) %myBucket_count << endl; cout << "bucket postion of " << hh(Customer("a", "b", "4")) %myBucket_count << endl; for (int i = 0; i< myBucket_count; i++) { cout << "bucket at #: " << i << "has: " << set.bucket_size(i) << endl; } return 0; }
这是一个万用的hash function ,在我们自定义hash function时就可以定义为上面的函数
参考侯捷<<STL源码剖析>>