过程,由master的tick定时器定期往所有worker发送定时事件,systemstat和membasestat订阅这些事件然后定时上报状态
该tick守候进程只在master节点上启动
参与的类有
采集执行模块:
system_stats_collector 采集CPU,内存,磁盘情况
stats_collector 通过二进制命令往membase采集bucket数据
stats_archiver把采集的数据存储到mnesia中
stats_reader 读取模块
他们除了stats_reader外,其他的都是gen_server
辅助模块:
ns_tick 定时器守候进程,只在master运行
mb_master 逻辑模块,根据情况决定当前节点是master,candidate或者worker
mb_master_sup master节点的监控模块,把master节点需要的服务启动,如ns_tick
ns_pubsub 订阅辅助模块,采集执行体中相关的gen_server模块都是通过它从而获得订阅通知功能。
代码细节分析
mb_master判断和启动是否成为master
Master节点启动
init([]) ->
Self = self(),
ns_pubsub:subscribe(
ns_config_events,
fun ({nodes_wanted, Nodes}, State) ->
Self ! {peers, Nodes},
State;
(_, State) ->
State
end, empty),
erlang:process_flag(trap_exit, true),
{ok, _} = timer:send_interval(?HEARTBEAT_INTERVAL, send_heartbeat),
case ns_node_disco:nodes_wanted() of
[N] = P when N == node() ->
?log_info("I'm the only node, so I'm the master.", []),
{ok, master, start_master(#state{last_heard=now(), peers=P})};
Peers when is_list(Peers) ->
case lists:member(node(), Peers) of
false ->
%% We're a worker, but don't know who the master is yet
?log_info("Starting as worker. Peers: ~p", [Peers]),
{ok, worker, #state{last_heard=now()}};
true ->
%% We're a candidate
?log_info("Starting as candidate. Peers: ~p", [Peers]),
{ok, candidate, #state{last_heard=now(), peers=Peers}}
end
end.
自动接管:
handle_info(send_heartbeat, candidate, #state{peers=Peers} = StateData) ->
send_heartbeat(Peers, candidate, StateData),
case timer:now_diff(now(), StateData#state.last_heard) >= ?TIMEOUT of
true ->
%% Take over
?log_info("Haven't heard from a higher priority node or "
"a master, so I'm taking over.", []),
{ok, Pid} = mb_master_sup:start_link(),
{next_state, master, StateData#state{child=Pid, master=node()}};
false ->
{next_state, candidate, StateData}
end;
ns_tick对所有的节点发送统计命令
系统运行一个tick服务(gen_server),它定义了一个定时器(timer:send_interval(Interval, tick)),在定时器触发时对所有的node发出tick事件
handle_info(tick, State) ->
misc:verify_name(?MODULE), % MB-3180: make sure we're still registered
Now = misc:time_to_epoch_ms_int(now()),
lists:foreach(fun (Node) ->
gen_event:notify({?EVENT_MANAGER, Node}, {tick, Now})
end, [node() | nodes()]),
{noreply, State#state{time=Now}};
通用的no embed事件订阅
-module(ns_pubsub).
-behaviour(gen_event).
subscribe(Name) ->
subscribe(Name, msg_fun(self()), ignored).
subscribe(Name, Fun, State) ->
Ref = make_ref(),
ok = gen_event:add_sup_handler(Name, {?MODULE, Ref},
#state{func=Fun, func_state=State}),
Ref.
msg_fun(Pid) ->
fun (Event, ignored) ->
Pid ! Event,
ignored
end.
handle_event(Event, State = #state{func=Fun, func_state=FS}) ->
NewState = Fun(Event, FS),
{ok, State#state{func_state=NewState}};
系统监控:依赖于portsigar(sigar system-level stats for erlang) /RabbitMQ直接调用/usr/bin/vm_stat,/usr/sbin/prtconf和读取‘文件’"/proc/meminfo“
init([]) ->
Path = path_config:component_path(bin, "sigar_port"),
Port =
try open_port({spawn_executable, Path},
[stream, use_stdio, exit_status,
binary, eof, {arg0, lists:flatten(io_lib:format("portsigar for ~s", [node()]))}]) of
X ->
ns_pubsub:subscribe(ns_tick_event),
X
catch error:enoent ->
?log_warning("~s is missing. Will not collect system-level stats", [Path]),
undefined
end,
{ok, #state{port = Port}}.
handle_info({tick, TS}, #state{port = Port, prev_sample = PrevSample}) ->
case flush_ticks(0) of
0 -> ok;
N -> ?log_warning("lost ~p ticks", [N])
end,
port_command(Port, <<0:32/native>>),
Binary = recv_data(Port, [], ?STATS_BLOCK_SIZE),
{Stats0, NewPrevSample} = unpack_data(Binary, PrevSample),
case Stats0 of
undefined -> ok;
_ ->
Stats = lists:sort(Stats0),
gen_event:notify(ns_stats_event,
{stats, "@system", #stat_entry{timestamp = TS,
values = lists:sort(Stats)}})
end,
{noreply, #state{port = Port, prev_sample = NewPrevSample}};
协议解析
unpack_data(Bin, PrevSample) ->
<<Version:32/native,
StructSize:32/native,
CPULocalMS:64/native,
CPUIdleMS:64/native,
SwapTotal:64/native,
SwapUsed:64/native,
_SwapPageIn:64/native,
_SwapPageOut:64/native,
MemTotal:64/native,
MemUsed:64/native,
MemActualUsed:64/native,
MemActualFree:64/native>> = Bin,
StructSize = erlang:size(Bin),
Version = 0,
RawStats = [{cpu_local_ms, CPULocalMS},
{cpu_idle_ms, CPUIdleMS},
{swap_total, SwapTotal},
{swap_used, SwapUsed},
%% {swap_page_in, SwapPageIn},
%% {swap_page_out, SwapPageOut},
{mem_total, MemTotal},
{mem_used, MemUsed},
{mem_actual_used, MemActualUsed},
{mem_actual_free, MemActualFree}],
NowSamples = case PrevSample of
undefined -> undefined;
_ -> {_, OldCPULocal} = lists:keyfind(cpu_local_ms, 1, PrevSample),
{_, OldCPUIdle} = lists:keyfind(cpu_idle_ms, 1, PrevSample),
LocalDiff = CPULocalMS - OldCPULocal,
IdleDiff = CPUIdleMS - OldCPUIdle,
RV1 = lists:keyreplace(cpu_local_ms, 1, RawStats, {cpu_local_ms, LocalDiff}),
RV2 = lists:keyreplace(cpu_idle_ms, 1, RV1, {cpu_idle_ms, IdleDiff}),
[{mem_free, MemTotal - MemUsed},
{cpu_utilization_rate, try 100 * (LocalDiff - IdleDiff) / LocalDiff
catch error:badarith -> 0 end}
| RV2]
end,
{NowSamples, RawStats}.
stats_collector
连接本地memcached采集memcached状态数据。
持久化,stats_archived
do_handle_info({stats, Bucket, Sample}, State = #state{bucket=Bucket}) ->
Tab = table(Bucket, minute),
{atomic, ok} = mnesia:transaction(fun () ->
mnesia:write(Tab, Sample, write)
end, ?RETRIES),
gen_event:notify(ns_stats_event, {sample_archived, Bucket, Sample}),
{noreply, State};
stats_reader读取数据
resample(Bucket, Period, Step, N) ->
Seconds = N * Step,
Tab = stats_archiver:table(Bucket, Period),
case mnesia:dirty_last(Tab) of
'$end_of_table' ->
{ok, []};
Key ->
Oldest = Key - Seconds * 1000 + 500,
Handle = qlc:q([Sample || #stat_entry{timestamp=TS} = Sample
<- mnesia:table(Tab), TS > Oldest]),
F = fun (#stat_entry{timestamp = T} = Sample,
{T1, Acc, Chunk}) ->
case misc:trunc_ts(T, Step) of
T1 ->
{T1, Acc, [Sample|Chunk]};
T2 when T1 == undefined ->
{T2, Acc, [Sample]};
T2 ->
{T2, [avg(T1, Chunk)|Acc], [Sample]}
end
end,
case mnesia:activity(async_dirty, fun qlc:fold/3,
[F, {undefined, [], []},
Handle]) of
{error, _, _} = Error ->
Error;
{undefined, [], []} ->
{ok, []};
{T, Acc, LastChunk} ->
{ok, lists:reverse([avg(T, LastChunk)|Acc])}
end
end.