原子化字符串

在编译器或者其它语言处理软件的开发过程中,字符串管理是非常重要的。
原子化字符串可以提升符号管理和文件名管理。

下面是区区编写的一个简单实现,intern一词是来源于emacs lisp的intern函数:
///Pimpl模式声明
class AtomsImpl;
class Atoms{
public:
const char * intern(const string &s);
const char * intern(const char *s);
Atoms();
~Atoms();
private:
AtomsImpl *pimpl;
};



///Atoms string container实现

struct AtomsImplNode{
size_t len;
size_t hash_val;
char * s;
AtomsImplNode * next;
};

class AtomsImpl{
public:
const char * intern(const string &s);
const char * intern(const char *s);
AtomsImpl();
~AtomsImpl();
private:
vector<AtomsImplNode*> buckets;
size_t atom_count;
};

AtomsImpl::AtomsImpl():buckets(1), atom_count(0){
}

AtomsImpl::~AtomsImpl(){
vector<AtomsImplNode*>::iterator beg = buckets.begin(), end = buckets.end();
for(; beg!=end; ++beg) {
AtomsImplNode *head = *beg, *last;
while(head) {
last = head;
head = head->next;
delete [] last->s;
delete last;
}
}
}

const char *AtomsImpl::intern(const string &s){
const char *ret = NULL;
if (atom_count > buckets.size()) { //rehash
size_t new_buckets_size = buckets.size() * 2;
vector<AtomsImplNode*> new_buckets(new_buckets_size);
vector<AtomsImplNode*>::iterator beg = buckets.begin(), end = buckets.end();
for(; beg!=end; ++beg) {
AtomsImplNode *head = *beg, *last;
while(head) {
last = head;
head = head->next;
size_t idx = last->hash_val % new_buckets_size;
if (new_buckets[idx]) {
AtomsImplNode *tail = new_buckets[idx];
while(tail->next)
tail = tail->next;
tail->next = last;
last->next = NULL;
}else{
new_buckets[idx] = last;
last->next = NULL;
}
}
}
buckets.swap(new_buckets);
}
size_t hash_val = 7, len = s.size(), hidx = 0;
while(hidx<len){
hash_val = hash_val * 31 + s[hidx++];
}
size_t idx = hash_val % buckets.size();
AtomsImplNode *tail = NULL; //tricky to reduce insert code
if (buckets[idx]){
AtomsImplNode *head = buckets[idx];
while(head){
if (head->len == len &&
head->hash_val == hash_val &&
!strcmp(s.c_str(), head->s))
return head->s;
tail = head;
head = head->next;
}
}
AtomsImplNode *newNode = new AtomsImplNode;
newNode->len = len;
newNode->hash_val = hash_val;
newNode->next = NULL;
newNode->s = new char[len+1];
strcpy(newNode->s, s.c_str());
if (tail){
tail->next = newNode;
} else {
buckets[idx] = newNode;
}
++atom_count;
return newNode->s;

}
const char *AtomsImpl::intern(const char *s){
return intern(string(s));
}

Atoms::Atoms(){
pimpl = new AtomsImpl;
}
Atoms::~Atoms(){
delete pimpl;
}
const char * Atoms::intern(const string &s){
return pimpl->intern(s);
}
const char * Atoms::intern(const char *s){
return pimpl->intern(s);
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值