mnesia的disc单表有4GB的大小限制,因此在项目开始之初就要评估下数据大小,避免以后修改,扩容的麻烦。而且用了mnesia分片,读写方法将不能使用mnesia:read
,mnesia:write
了,需要统一使用mnesia:activity
来包装,声明使用mnesia_frag。
mnesia默认使用Linear Hashing进行分片管理,查询。
核心代码mnesia_frag_hash.erl:
init_state(_Tab, State) when State == undefined ->
#hash_state{n_fragments = 1,
next_n_to_split = 1,
n_doubles = 0,
function = phash2}.
add_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) ->
P = SplitN + 1,
NewN = N + 1,
State2 = case power2(L) + 1 of
P2 when P2 == P ->
State#hash_state{n_fragments = NewN,
n_doubles = L + 1,
next_n_to_split = 1};
_ ->
State#hash_state{n_fragments = NewN,
next_n_to_split = P}
end,
{State2, [SplitN], [NewN]}.
del_frag(#hash_state{next_n_to_split = SplitN, n_doubles = L, n_fragments = N} = State) ->
P = SplitN - 1,
if
P < 1 ->
L2 = L - 1,
MergeN = power2(L2),
State2 = State#hash_state{n_fragments = N - 1,
next_n_to_split = MergeN,
n_doubles = L2},
{State2, [N], [MergeN]};
true ->
MergeN = P,
State2 = State#hash_state{n_fragments = N - 1,
next_n_to_split = MergeN},
{State2, [N], [MergeN]}
end.
key_to_frag_number(#hash_state{function = phash2, n_fragments = N, n_doubles = L}, Key) ->
A = erlang:phash2(Key, power2(L + 1)) + 1,
if
A > N ->
A - power2(L);
true ->
A
end.
按照代码,列出8个分片生成过程中,各变量的值以及哪些分片的数据需要迁移去新分片
n_fragments | next_n_to_split | n_doubles | split_frag(分离分片) | write_frag(新增分片) |
---|---|---|---|---|
1 | 1 | 0 | ||
2 | 1 | 1 | 1 | 2 |
3 | 2 | 1 | 1 | 3 |
4 | 1 | 2 | 2 | 4 |
5 | 2 | 2 | 1 | 5 |
6 | 3 | 2 | 2 | 6 |
7 | 4 | 2 | 3 | 7 |
8 | 1 | 3 | 4 | 8 |
根据源码,我们可以总结出
- 每次只增加或减少一个分片
- 扩充时受影响分片中有将近一半数据迁到新分片中
- 缩减时一个分片中的数据都迁到另一个分片中
- 在大多数情况下,各分片中数据数量不均衡。因此推荐分片数量为2的N次方个,刚好分出一半数据来做分片,数据能均衡分布。