之前在程序中看到这样一段代码
list_to_atom(List) when is_list(List) ->
case catch(erlang:list_to_existing_atom(List)) of
{'EXIT', _} -> erlang:list_to_atom(List);
Atom when is_atom(Atom) -> Atom
end.
一直没有去深究其中的问题,知道有个同事提到这个函数存在问题,里面的list_to_existing_atom是多余的,才关注到这点。
直接看源码
list_to_existing_atom
BIF_RETTYPE list_to_existing_atom_1(BIF_ALIST_1)
{
int i;
char *buf = (char *) erts_alloc(ERTS_ALC_T_TMP, MAX_ATOM_CHARACTERS);
if ((i = intlist_to_buf(BIF_ARG_1, buf, MAX_ATOM_CHARACTERS)) < 0) {
error:
erts_free(ERTS_ALC_T_TMP, (void *) buf);
BIF_ERROR(BIF_P, BADARG);
} else {
Eterm a;
if (erts_atom_get(buf, i, &a, ERTS_ATOM_ENC_LATIN1)) {
erts_free(ERTS_ALC_T_TMP, (void *) buf);
BIF_RET(a);
} else {
goto error;
}
}
}
再看看list_to_atom的实现
BIF_RETTYPE list_to_atom_1(BIF_ALIST_1)
{
Eterm res;
char *buf = (char *) erts_alloc(ERTS_ALC_T_TMP, MAX_ATOM_CHARACTERS);
int i = intlist_to_buf(BIF_ARG_1, buf, MAX_ATOM_CHARACTERS);
if (i < 0) {
erts_free(ERTS_ALC_T_TMP, (void *) buf);
i = erts_list_length(BIF_ARG_1);
if (i > MAX_ATOM_CHARACTERS) {
BIF_ERROR(BIF_P, SYSTEM_LIMIT);
}
BIF_ERROR(BIF_P, BADARG);
}
res = erts_atom_put((byte *) buf, i, ERTS_ATOM_ENC_LATIN1, 1);
ASSERT(is_atom(res));
erts_free(ERTS_ALC_T_TMP, (void *) buf);
BIF_RET(res);
}
几乎是一样的,差别在于list_to_atom多了参数长度的判断,并且list_to_existing_atom使用了erts_atom_get,而list_to_atom使用了erts_atom_put
继续看这两个函数的实现
int
erts_atom_get(const char *name, int len, Eterm* ap, ErtsAtomEncoding enc)
{
byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
Atom a;
int i;
int res;
a.len = (Sint16) len;
a.name = (byte *)name;
if (enc == ERTS_ATOM_ENC_LATIN1) {
latin1_to_utf8(utf8_copy, (const byte**)&a.name, &len);
a.len = (Sint16) len;
}
atom_read_lock();
i = index_get(&erts_atom_table, (void*) &a);
res = i < 0 ? 0 : (*ap = make_atom(i), 1);
atom_read_unlock();
return res;
}
Eterm
erts_atom_put(const byte *name, int len, ErtsAtomEncoding enc, int trunc)
{
byte utf8_copy[MAX_ATOM_SZ_FROM_LATIN1];
const byte *text = name;
int tlen = len;
Sint no_latin1_chars;
Atom a;
int aix;
#ifdef ERTS_ATOM_PUT_OPS_STAT
erts_smp_atomic_inc_nob(&atom_put_ops);
#endif
if (tlen < 0) {
if (trunc)
tlen = 0;
else
return THE_NON_VALUE;
}
switch (enc) {
case ERTS_ATOM_ENC_7BIT_ASCII:
if (tlen > MAX_ATOM_CHARACTERS) {
if (trunc)
tlen = MAX_ATOM_CHARACTERS;
else
return THE_NON_VALUE;
}
#ifdef DEBUG
for (aix = 0; aix < len; aix++) {
ASSERT((name[aix] & 0x80) == 0);
}
#endif
no_latin1_chars = tlen;
break;
case ERTS_ATOM_ENC_LATIN1:
if (tlen > MAX_ATOM_CHARACTERS) {
if (trunc)
tlen = MAX_ATOM_CHARACTERS;
else
return THE_NON_VALUE;
}
no_latin1_chars = tlen;
latin1_to_utf8(utf8_copy, &text, &tlen);
break;
case ERTS_ATOM_ENC_UTF8:
/* First sanity check; need to verify later */
if (tlen > MAX_ATOM_SZ_LIMIT && !trunc)
return THE_NON_VALUE;
break;
}
a.len = tlen;
a.name = (byte *) text;
atom_read_lock();
aix = index_get(&erts_atom_table, (void*) &a);
atom_read_unlock();
if (aix >= 0) {
/* Already in table no need to verify it */
return make_atom(aix);
}
if (enc == ERTS_ATOM_ENC_UTF8) {
/* Need to verify encoding and length */
byte *err_pos;
Uint no_chars;
switch (erts_analyze_utf8_x((byte *) text,
(Uint) tlen,
&err_pos,
&no_chars, NULL,
&no_latin1_chars,
MAX_ATOM_CHARACTERS)) {
case ERTS_UTF8_OK:
ASSERT(no_chars <= MAX_ATOM_CHARACTERS);
break;
case ERTS_UTF8_OK_MAX_CHARS:
/* Truncated... */
if (!trunc)
return THE_NON_VALUE;
ASSERT(no_chars == MAX_ATOM_CHARACTERS);
tlen = err_pos - text;
break;
default:
/* Bad utf8... */
return THE_NON_VALUE;
}
}
ASSERT(tlen <= MAX_ATOM_SZ_LIMIT);
ASSERT(-1 <= no_latin1_chars && no_latin1_chars <= MAX_ATOM_CHARACTERS);
a.len = tlen;
a.latin1_chars = (Sint16) no_latin1_chars;
a.name = (byte *) text;
atom_write_lock();
aix = index_put(&erts_atom_table, (void*) &a);
atom_write_unlock();
return make_atom(aix);
}
两个函数都会先进行index_get查找atom表里已经存在的数据,区别仅仅在于list_to_existing_atom不会创建新的atom,和文档中的描述一致。
那么为什么为什么开头的函数要这样写?
俗话说实践是检验整理的唯一标准,所以进行测试
创建1000000个atom,通过list_to_existing_atom和list_to_atom分别进行已有atom的创建,消耗时间几乎没有差别。
所以,此处的list_to_existing_atom没有作用,还多做了一次模式和is_atom判断,没有必要。