hash_set的源码

hash_set是以hashtable为底层机制。所以hash_set所供应的操作接口hashtable都
提供了,所以几乎所有的hash_set操作行为,都只是转调用hashtable的操作而已
RB-tree有自动排序功能,但是hashtable没有,相应的hashset也没有
set不像map那样可以同时拥有实值value和键值key,set元素的键值就是实值,实值就是键值
set的元素有自动排序功能,但是hash_set没有

template <class Value,class HashFcn=hash<Value>,
	class EqualKey=equal_to<Value>,class Alloc=alloc>
class hash_set{
private:
	//以下使用的identity<>定义于<stl_function.h>中
typedef hashtable<Value,Value,HashFcn,identity<Value>,EqualKey Alloc>ht;
	ht rep;//底层机制以hash table完成
public:
	typedef typename ht::key_type key_type;
	typedef typename ht::value_type value_type;
	typedef typename ht::hasher hasher;
	typedef typename ht::key_equal key_equal;

	typedef typename ht::size_type size_type;
	typedef typename ht::difference_type difference_type;
	typedef typename ht::const_pointer pointer;
	typedef typename ht::const_Pointer const_pointer;
	typedef typename ht::const_reference reference;
	typedef typename ht::const_reference const_reference;
	typedef typename ht::const_iterator iterator;
	typedef typename ht::const_iterator const_iterator;

	hasher hash_funct()const{return rep.hash_funct();}
	key_equal key_eq() const{return rep.key_eq();}
public:
	//缺省使用大小为100的表格,将被hash table调整为最接近且较大的质数
	hash_set():rep(100,hasher(),key_equal()){} 
	explicit hash_set(size_type n):rep(n,hf,key_equal()){}
	hash_set(size_type n,const hasher&hf,const key_equal&eql):rep(n,hf,eql){}
	//以下,插入操作全部使用insert_unique(),不允许键值重复
	template<class InpuIterator>
	hash_set(InputIterator f,InputIterator l):rep(100,hasher(),key_equal()){rep.insert_unique(f,l);}
	template<class InputIterator>
	hash_set(InputIterator f,InputIterator l,size_type n,const hasher&hf):rep(n,hf,key_equal()){rep.insert_unique(f,l);}
	template<class InputIterator>
	hash_set(InputIterator f,InputIterator l,size_type n,const hasher&hfmconst key_equal&eql):rep(n,hf,eql){rep.insert_unique(f,l);}
public:
	//所有操作几乎都有hash table的对应版本,传递调用就可以了
	size_type size()const{return rep.size();}
	size_type max_size()const{return rep.max_size();}
	bool empty()const{return rep.empty();}
	void swap(hash_set &hs){rep.swap(hs.rep);}
	friend bool operator==__STL_NULL_TMPL_ARGS(const hash_set&,const hash_set&);
	iterator begin()const{return rep.begin();}
	iterator end()const{return rep.end();}
public:
	pair<iterator,bool>insert(const value_type&obj){
		pair<typename ht::iterator,bool>p=rep.insert_unique(obj);
		return pair<iterator,bool>(p.first,p.second);
}
template <class InputIterator>
void insert(InputIterator f,InputIterator l){rep.insert_unique(f,l);}
pair<iterator,bool>insert_noresize(const value_type&obj){
	pair<typename ht::iterator,bool>p=rep.insert_unique_noresize(obj);
	return pair<iterator,bool>(p.first,p.second);
}
iterator find(const key_type&key)const{return rep.find(key);}
size_type count(const key_type&key)const {return rep.count(key);}
pair<iterator,iterator>equal_range(const key_type&key)const{
	return rep.equal_range(key);
}
size_type erase(const key_type&key){return rep.erase(key);}
void erase(iterator it){rep.erase(it);}
void erase(iterator f,iterator l){rep.erase(f,l);}
void clear(){rep.clear();}
public:
	void resize(size_type hint){rep.resize(hint);}
	size_type bucket_count()const{return rep.bucket_count();}
	size_type max_bucket_count()const{return rep.max_bucket_count();}
	size_type elems_in_bucket(size_type n)const
	{
		return rep.elems_in_bucket(n);
};
template <class Value,class HashFcn,class EqualKey,class Alloc>
inline bool operator==(const hash_set<Value,HashFcn,EqualKey,Alloc>&hsl,const hash_Set<Value,HashFcn,EqualKey,Alloc>&hs2){
	return hs1.rep==hs2.rep;
}
}

下面给出一个测试实例

#include<iostream>
#include<hash_set>
#include<cstring>
using namespace std;
struct eqstr{
	bool operator()(const char*s1,const char *s2)const{
		return strcmp(s1,s2)==0;
}
};
void lookup(const hash_set<const char*,hash<const char*>,eqstr>&Set,const char*word)
{
	hash_set<const char*,hash<const char*>,eqstr>::const_iterator it=Set.find(word);
	cout<<" "<<word<<" :"<<(it!=Set.end()?"present":"not present")<<endl;
}
int main(){
	hash_set<const char*,hash<const char*>,eqstr>Set;
	Set.insert("kiwi");
	Set.insert("plum");
	Set.insert("apple");
	Set.insert("mango");
	Set.insert("apricot");
	Set.insert("banana");
	lookup(Set,"mango");
	lookup(Set,"apple");
	lookup(Set,"durian");
	hash_set<>const char*,hash<const char*>,eqstr>::iterator ite1=Set.begin();
	hash_set<>const char*,hash<const char*>,eqstr>::iterator ite2=Set.end();
	for(;ite1!=ite2;++ite1)
	cout<<*ite1<<' ';//
	return 0;
	
}

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
hashjoin 是一种常用的关系型数据库的查询算法,用于将两个表中共同的列进行匹配并连接起来。具体实现可以参考 PostgreSQL 数据库中的代码实现。 在 PostgreSQL 中,hashjoin 的实现主要包括三个部分:构建哈希表、扫描哈希表和匹配结果。 构建哈希表: ```c static HashJoinTable ExecHashTableCreate(PlanState *parent, List *hashOperators, /* hash function to use for each join key */ long nbuckets, /* # buckets in hashtable */ Size entrysize, /* size of each entry in hashtable */ bool use_variable_hash_iv) { HashJoinTable hashtable; int nbuckets_est = nbuckets; int log2_nbuckets; /* Limit nbuckets to at most INT_MAX; must do this before sizing to power of 2 */ if ((double) nbuckets_est * (double) entrysize > (double) INT_MAX) nbuckets_est = (int) floor((double) INT_MAX / (double) entrysize); /* Size hash table to a power of 2 */ log2_nbuckets = my_log2(nbuckets_est); hashtable = (HashJoinTable) palloc0(HJTUPLE_OVERHEAD + sizeof(HashJoinTableData) + (entrysize * (1 << log2_nbuckets))); hashtable->nbuckets = nbuckets_est; hashtable->log2_nbuckets = log2_nbuckets; hashtable->buckets = (HashJoinTuple *) (((char *) hashtable) + HJTUPLE_OVERHEAD + sizeof(HashJoinTableData)); hashtable->hash_iv = GetPerTupleExprContext(parent)->ecxt_hashjoin_outer; /* Initialize all hash bucket headers to empty */ MemSet(hashtable->buckets, 0, sizeof(HashJoinTuple) << log2_nbuckets); /* Set up array containing OIDs of hash operators */ ExecChooseHashFuncs(hashOperators, hashtable->hashfunctions, hashtable->nbuckets, use_variable_hash_iv); return hashtable; } ``` 扫描哈希表: ```c static TupleTableSlot * ExecScanHashBucket(HashJoinState *hjstate, ExprContext *econtext) { HashJoinTable hashtable = hjstate->hj_HashTable; AttrNumber *hj_OuterHashKeys = hjstate->hj_OuterHashKeys; TupleTableSlot *innerTupleSlot = hjstate->hj_InnerTupleSlot; TupleTableSlot *outerTupleSlot = hjstate->hj_OuterTupleSlot; HashJoinTuple hashTuple; uint32 hashvalue; int bucketno; /* loop until we find a join tuple */ for (;;) { hashvalue = ExecHashGetBucket(hjstate, hashtable, hj_OuterHashKeys, econtext, false); bucketno = ExecHashGetBucketNumber(hashvalue, hashtable->log2_nbuckets); /* * Scan the bucket for matching tuples. */ for (hashTuple = hashtable->buckets[bucketno]; hashTuple != NULL; hashTuple = hashTuple->next) { if (hashTuple->hashvalue != hashvalue) continue; /* Found a match? Then report and save tuple */ if (ExecQualAndReset(hashTuple->hashressupport, econtext)) { /* save the matching tuple */ ExecStoreMinimalTuple(HJTUPLE_MINTUPLE(hashTuple), innerTupleSlot, false); /* set up for next join tuple, if any */ hjstate->hj_CurHashValue = hashvalue; hjstate->hj_CurBucketNo = bucketno; return outerTupleSlot; } } /* * No match in this bucket; check for additional matches in outer * batches. */ if (!ExecScanHashTableForUnmatched(hjstate, econtext)) return NULL; /* need new outer tuple */ } } ``` 匹配结果: ```c static TupleTableSlot * ExecHashJoin(HashJoinState *node) { PlanState *outerNode = outerPlanState(node); HashJoinTable hashtable = node->hj_HashTable; TupleTableSlot *innerTupleSlot = node->hj_InnerTupleSlot; TupleTableSlot *outerTupleSlot = node->hj_OuterTupleSlot; ExprContext *econtext = node->js.ps.ps_ExprContext; TupleTableSlot *result; MinimalTuple tuple; /* * Reset per-tuple memory context to free any expression evaluation * storage allocated in the previous tuple cycle. */ ResetExprContext(econtext); /* * if first time through, read all inner tuples into hashtable */ if (!node->hj_CurHashValue) { /* Reset hash table to empty */ ExecHashTableReset(hashtable); /* Load hashtable with inner tuples */ ExecHashJoinNewBatch(node); /* If inner relation is completely empty, return no rows */ if (hashtable->totalTuples == 0) return NULL; } /* * We read the outer tuple in the previous iteration, which means that we * have to check for additional join matches for it before continuing. */ if (node->hj_JoinState == HJ_NEED_NEW_OUTER) { if (!ExecScanHashTableForUnmatched(node, econtext)) return NULL; /* need new outer tuple */ } /* * Now check for any matches */ for (;;) { /* * If we've run out of inner tuples, then the current outer tuple * can't have a match, so we're done with it. */ if (node->hj_CurTuple == NULL) { if (!ExecScanHashTableForUnmatched(node, econtext)) break; /* need new outer tuple */ continue; /* search next hash bucket */ } /* * Check for join match. */ if (ExecQual(node->js.ps.qual, econtext)) { /* * qualification was satisfied so we project and return the * slot containing joined tuples, making sure that the slot is * labeled with the join's rowtype. */ ExecProject(node->js.ps.ps_ProjInfo); result = node->js.ps.ps_ProjInfo->pi_slot; /* * We return the first (and only) qualifying join tuple. The * executor doesn't support the idea of generating multiple * join rows from one outer tuple when there are multiple * matching inner tuples (compare the semantics of a nested * loops join). */ if (hashtable->nbatch == 1) { /* In single-batch case, just return the result */ return result; } else { /* * Before returning the first join tuple, force the * other tuples in the same join group to be fetched and * appended to the result list. */ tuple = ExecFetchSlotMinimalTuple(innerTupleSlot); ExecHashTableMarkCurBucket(hjstate); ExecHashTableGetBucketAndBatch(hashtable, node->hj_CurHashValue, &node->hj_CurBucketNo, &node->hj_CurTuple, &node->hj_CurBucketBuf); /* * Set the next tuple to return, if any. Done in this order * so that if there is only one tuple in the group, we don't * advance the pointers at all. */ if (node->hj_CurTuple != NULL) node->hj_NextTuple = node->hj_CurTuple->next; else node->hj_NextTuple = NULL; /* Remember there's a join tuple available */ node->hj_JoinState = HJ_NEED_NEW_OUTER; /* And return the first tuple */ return result; } } /* * Didn't match this time. Try next tuple in inner relation. */ node->hj_CurTuple = node->hj_CurTuple->next; } /* * no more matches */ return NULL; } ``` 以上代码是 PostgreSQL 中 hashjoin 的基本实现,可以作为参考。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值