ETS(二)

创建ETS表

ets:new/2 用于创建一个 ETS 表。表创建时,需要指定表名和选项,选项决定了表的类型、访问权限等属性。
@spec ets:new(Name, [Opt]) -> TableId
Name: 表的名称,必须是一个原子(例如 my_table)。
Opt: 选项列表,用于配置表的行为。常见的选项有:

  • set | ordered_set | bag | duplicate_bag: 表类型,分别对应有序集合、集合、无序集合、重复集合。
  • private: 私有表,仅所有者进程可读写。
  • public: 公共表,所有进程可读写。
  • protected: 受保护表,所有进程可读,只有所有者可写。
  • named_table: 命名表,允许通过名称引用表。
  • {keypos, K}: 指定元组中作为键的位置,通常是第一个元素。

表创建后,无法修改其选项(例如无法改变表类型或访问权限)。
表的所有者进程负责管理该表。若所有者进程终止或调用 ets:delete(TableId),表会被自动删除并释放内存。

测试案例:三字索引测试不同类型表的效率

-module(lib_trigrams).
-author("81391").

-export([for_each_trigram_in_the_english_language/2, make_tables/0, timer_tests/0, open/0, close/1, is_word/2,
  how_many_trigrams/0, make_ets_set/0, make_ets_ordered_set/0, make_mod_set/0, lookup_all_ets/2, lookup_all_set/2]).

make_tables() ->
  {Micro1, N} = timer:tc(?MODULE, how_many_trigrams, []),
  io:format("Counting - No of trigrams=~p time/trigram=~p~n", [N, Micro1 / N]),

  {Micro2, Ntri} = timer:tc(?MODULE, make_ets_ordered_set, []),
  FileSize1 = filelib:file_size("trigramsOS.tab"),
  io:format("Ets ordered Set size=~p time/trigram=~p~n", [FileSize1 / Ntri, Micro2 / N]),

  {Micro3, _} = timer:tc(?MODULE, make_ets_set, []),
  FileSize2 = filelib:file_size("trigramsS.tab"),
  io:format("Ets set size=~p time/trigram=~p~n", [FileSize2 / Ntri, Micro3 / N]),

  {Micro4, _} = timer:tc(?MODULE, make_mod_set, []),
  FileSize3 = filelib:file_size("trigrams.set"),
  io:format("Module sets size=~p time/trigram=~p~n", [FileSize3 / Ntri, Micro4 / N]).

make_ets_ordered_set() ->
  make_a_set(ordered_set, "trigramsOS.tab").
make_ets_set() ->
  make_a_set(set, "trigramsS.tab").

make_a_set(Type, FileName) ->
  Tab = ets:new(table, [Type]),
  F = fun(Str, _) -> ets:insert(Tab, {list_to_binary(Str)}) end,
  for_each_trigram_in_the_english_language(F, 0),
  ets:tab2file(Tab, FileName),
  Size = ets:info(Tab, size),
  ets:delete(Tab),
  Size.

make_mod_set() ->
  D = sets:new(),
  F = fun
        (Str, Set) ->
          sets:add_element(list_to_binary(Str), Set)
      end,
  D1 = for_each_trigram_in_the_english_language(F, D),
  file:write_file("trigrams.set", [term_to_binary(D1)]).

timer_tests() ->
  time_lookup_ets_set("Ets ordered Set", "trigramsOS.tab"),
  time_lookup_ets_set("Ets set", "trigramsS.tab"),
  time_lookup_module_sets().

time_lookup_ets_set(Type, File) ->
  {ok, Tab} = ets:file2tab(File),
  L = ets:tab2list(Tab),
  Size = length(L),
  {M, _} = timer:tc(?MODULE, lookup_all_ets, [Tab, L]),
  io:format("~s lookup=~p micro seconds~n", [Type, M / Size]),
  ets:delete(Tab).


lookup_all_ets(Tab, L) ->
  lists:foreach(fun({K}) -> ets:lookup(Tab, K) end, L).

time_lookup_module_sets() ->
  {ok, Bin} = file:read_file("trigrams.set"),
  Set = binary_to_term(Bin),
  Keys = sets:to_list(Set),
  Size = length(Keys),
  {M, _} = timer:tc(?MODULE, lookup_all_set, [Set, Keys]),
  io:format("Module set lookup=~p micro seconds~n", [M / Size]).

lookup_all_set(Set, L) ->
  lists:foreach(fun(Key) -> sets:is_element(Key, Set) end, L).


how_many_trigrams() ->
  F = fun
        (_, N) ->
          1 + N
      end,
  for_each_trigram_in_the_english_language(F, 0).

for_each_trigram_in_the_english_language(F, A0) ->
  {ok, Bin0} = file:read_file("354984si.ngl.gz"),
  Bin = zlib:gunzip(Bin0),
  scan_word_list(binary_to_list(Bin), F, A0).


scan_word_list([], _, A) ->
  A;
scan_word_list(L, F, A) ->
  {Word, L1} = get_next_word(L, []),
  A1 = scan_trigrams([$\s | Word], F, A),
  scan_word_list(L1, F, A1).


get_next_word([$\r, $\n | T], L) ->
  {lists:reverse([$\s | L]), T};
get_next_word([H | T], L) ->
  get_next_word(T, [H | L]);
get_next_word([], L) ->
  {lists:reverse([$\s | L]), []}.


scan_trigrams([X, Y, Z], F, A) ->
  F([X, Y, Z], A);
scan_trigrams([X, Y, Z | T], F, A) ->
  A1 = F([X, Y, Z], A),
  scan_trigrams([Y, Z | T], F, A1);
scan_trigrams(_, _, A) ->
  A.

%%判断是否是一个单词
is_word(Tab, Str) -> is_word1(Tab, "\s" ++ Str ++ "\s").
is_word1(Tab, [_, _, _] = X) -> is_this_a_trigram(Tab, X);
is_word1(Tab, [A, B, C | D]) ->
  case is_this_a_trigram(Tab, [A, B, C]) of
    true -> is_word1(Tab, [B, C | D]);
    false -> false
  end;
is_word1(_, _) ->
  false.


is_this_a_trigram(Tab, X) ->
  case ets:lookup(Tab, list_to_binary(X)) of
    [] -> false;
    _ -> true
  end.

open() ->
  {ok, I} = ets:file2tab(filename:dirname(code:which(?MODULE)) ++ "/trigramsS.tab"),
  I.

close(Tab) -> 
	ets:delete(Tab).

测试:

1> c(lib_trigrams).            
{ok,lib_trigrams}
2> lib_trigrams:make_tables().
Counting - No of trigrams=3357707 time/trigram=0.07445557340172922
Ets ordered Set size=19.0346696570414 time/trigram=0.5167216794080007
Ets set size=19.033922063358563 time/trigram=0.3210524325082564
Module sets size=9.433978132884777 time/trigram=0.7445557340172921
ok
3> lib_trigrams:timer_tests().
Ets ordered Set lookup=0.2729273160851379 micro seconds
Ets set lookup=0.1233418989814396 micro seconds
Module set lookup=0.105123610858793208 micro seconds
ok
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值