创建ETS表
ets:new/2 用于创建一个 ETS 表。表创建时,需要指定表名和选项,选项决定了表的类型、访问权限等属性。
@spec ets:new(Name, [Opt]) -> TableId
Name: 表的名称,必须是一个原子(例如 my_table)。
Opt: 选项列表,用于配置表的行为。常见的选项有:
- set | ordered_set | bag | duplicate_bag: 表类型,分别对应有序集合、集合、无序集合、重复集合。
- private: 私有表,仅所有者进程可读写。
- public: 公共表,所有进程可读写。
- protected: 受保护表,所有进程可读,只有所有者可写。
- named_table: 命名表,允许通过名称引用表。
- {keypos, K}: 指定元组中作为键的位置,通常是第一个元素。
表创建后,无法修改其选项(例如无法改变表类型或访问权限)。
表的所有者进程负责管理该表。若所有者进程终止或调用 ets:delete(TableId),表会被自动删除并释放内存。
测试案例:三字索引测试不同类型表的效率
-module(lib_trigrams).
-author("81391").
-export([for_each_trigram_in_the_english_language/2, make_tables/0, timer_tests/0, open/0, close/1, is_word/2,
how_many_trigrams/0, make_ets_set/0, make_ets_ordered_set/0, make_mod_set/0, lookup_all_ets/2, lookup_all_set/2]).
make_tables() ->
{Micro1, N} = timer:tc(?MODULE, how_many_trigrams, []),
io:format("Counting - No of trigrams=~p time/trigram=~p~n", [N, Micro1 / N]),
{Micro2, Ntri} = timer:tc(?MODULE, make_ets_ordered_set, []),
FileSize1 = filelib:file_size("trigramsOS.tab"),
io:format("Ets ordered Set size=~p time/trigram=~p~n", [FileSize1 / Ntri, Micro2 / N]),
{Micro3, _} = timer:tc(?MODULE, make_ets_set, []),
FileSize2 = filelib:file_size("trigramsS.tab"),
io:format("Ets set size=~p time/trigram=~p~n", [FileSize2 / Ntri, Micro3 / N]),
{Micro4, _} = timer:tc(?MODULE, make_mod_set, []),
FileSize3 = filelib:file_size("trigrams.set"),
io:format("Module sets size=~p time/trigram=~p~n", [FileSize3 / Ntri, Micro4 / N]).
make_ets_ordered_set() ->
make_a_set(ordered_set, "trigramsOS.tab").
make_ets_set() ->
make_a_set(set, "trigramsS.tab").
make_a_set(Type, FileName) ->
Tab = ets:new(table, [Type]),
F = fun(Str, _) -> ets:insert(Tab, {list_to_binary(Str)}) end,
for_each_trigram_in_the_english_language(F, 0),
ets:tab2file(Tab, FileName),
Size = ets:info(Tab, size),
ets:delete(Tab),
Size.
make_mod_set() ->
D = sets:new(),
F = fun
(Str, Set) ->
sets:add_element(list_to_binary(Str), Set)
end,
D1 = for_each_trigram_in_the_english_language(F, D),
file:write_file("trigrams.set", [term_to_binary(D1)]).
timer_tests() ->
time_lookup_ets_set("Ets ordered Set", "trigramsOS.tab"),
time_lookup_ets_set("Ets set", "trigramsS.tab"),
time_lookup_module_sets().
time_lookup_ets_set(Type, File) ->
{ok, Tab} = ets:file2tab(File),
L = ets:tab2list(Tab),
Size = length(L),
{M, _} = timer:tc(?MODULE, lookup_all_ets, [Tab, L]),
io:format("~s lookup=~p micro seconds~n", [Type, M / Size]),
ets:delete(Tab).
lookup_all_ets(Tab, L) ->
lists:foreach(fun({K}) -> ets:lookup(Tab, K) end, L).
time_lookup_module_sets() ->
{ok, Bin} = file:read_file("trigrams.set"),
Set = binary_to_term(Bin),
Keys = sets:to_list(Set),
Size = length(Keys),
{M, _} = timer:tc(?MODULE, lookup_all_set, [Set, Keys]),
io:format("Module set lookup=~p micro seconds~n", [M / Size]).
lookup_all_set(Set, L) ->
lists:foreach(fun(Key) -> sets:is_element(Key, Set) end, L).
how_many_trigrams() ->
F = fun
(_, N) ->
1 + N
end,
for_each_trigram_in_the_english_language(F, 0).
for_each_trigram_in_the_english_language(F, A0) ->
{ok, Bin0} = file:read_file("354984si.ngl.gz"),
Bin = zlib:gunzip(Bin0),
scan_word_list(binary_to_list(Bin), F, A0).
scan_word_list([], _, A) ->
A;
scan_word_list(L, F, A) ->
{Word, L1} = get_next_word(L, []),
A1 = scan_trigrams([$\s | Word], F, A),
scan_word_list(L1, F, A1).
get_next_word([$\r, $\n | T], L) ->
{lists:reverse([$\s | L]), T};
get_next_word([H | T], L) ->
get_next_word(T, [H | L]);
get_next_word([], L) ->
{lists:reverse([$\s | L]), []}.
scan_trigrams([X, Y, Z], F, A) ->
F([X, Y, Z], A);
scan_trigrams([X, Y, Z | T], F, A) ->
A1 = F([X, Y, Z], A),
scan_trigrams([Y, Z | T], F, A1);
scan_trigrams(_, _, A) ->
A.
%%判断是否是一个单词
is_word(Tab, Str) -> is_word1(Tab, "\s" ++ Str ++ "\s").
is_word1(Tab, [_, _, _] = X) -> is_this_a_trigram(Tab, X);
is_word1(Tab, [A, B, C | D]) ->
case is_this_a_trigram(Tab, [A, B, C]) of
true -> is_word1(Tab, [B, C | D]);
false -> false
end;
is_word1(_, _) ->
false.
is_this_a_trigram(Tab, X) ->
case ets:lookup(Tab, list_to_binary(X)) of
[] -> false;
_ -> true
end.
open() ->
{ok, I} = ets:file2tab(filename:dirname(code:which(?MODULE)) ++ "/trigramsS.tab"),
I.
close(Tab) ->
ets:delete(Tab).
测试:
1> c(lib_trigrams).
{ok,lib_trigrams}
2> lib_trigrams:make_tables().
Counting - No of trigrams=3357707 time/trigram=0.07445557340172922
Ets ordered Set size=19.0346696570414 time/trigram=0.5167216794080007
Ets set size=19.033922063358563 time/trigram=0.3210524325082564
Module sets size=9.433978132884777 time/trigram=0.7445557340172921
ok
3> lib_trigrams:timer_tests().
Ets ordered Set lookup=0.2729273160851379 micro seconds
Ets set lookup=0.1233418989814396 micro seconds
Module set lookup=0.105123610858793208 micro seconds
ok