Erlang中有不同的监督策略,包括一对一监督(one-for-one)、一对多监督(one-for-all)和简单监督(simple-one-for-one)。不同的监督策略适用于不同的故障处理场景。
one-for-one
如果一个子进程终止了,仅该进程被重启
one-for-all
如果一个子进程终止了,那么所有其他的子进程都被终止然后,所有的子进程都被重启,包括原来被终止的那个。
rest_for_one
如果一个子进程终止了,那么后面的子进程——即在启动顺序上在这个终止了的进程后面的子进程——都被终止。然后该终止的进程和后面的子进程都被重启。
-module(sellaprime_supervisor).
-behaviour(supervisor). % see erl -man supervisor
-export([start/0, start_in_shell_for_testing/0, start_link/1, init/1]).
start() ->
spawn(fun() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, _Arg = [])
end).
start_in_shell_for_testing() ->
{ok, Pid} = supervisor:start_link({local, ?MODULE}, ?MODULE, _Arg = []),
unlink(Pid).
start_link(Args) ->
supervisor:start_link({local, ?MODULE}, ?MODULE, Args).
init([]) ->
gen_event:swap_handler(alarm_handler,
{alarm_handler, swap},
{my_alarm_handler, xyz}),
{ok, {{one_for_all, 3, 10},
[
{tag1,
{tcp_server, start, []},
permanent,
10000,
worker,
[area_server]},
{tag2,
{start_tcp_client, start_link, []},
permanent,
10000,
worker,
[prime_server]}
]}}.
-module(tcp_server).
-author("chen").
-behaviour(gen_server).
-export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3, start/0]).
-define(PORT,8003).
-record(state, {socket}).
start() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, ?PORT, []).
init(Port) ->
{ok, LSocket} = gen_tcp:listen(Port, [binary, {packet, 4}, {active, true}, {reuseaddr, true}]),
self() ! {to_accept, LSocket},
io:format("start tcp_server ~n"),
{ok, #state{socket = LSocket}}.
do_accept(LSocket) ->
{ok, Socket} = gen_tcp:accept(LSocket),
io:fwrite("Socket connected: ~w ~n", [Socket]),
{ok, Pid} = gen_server:start_link(server_rec, Socket, []),
gen_tcp:controlling_process(Socket, Pid),
do_accept(LSocket).
handle_info({to_accept, LSocket}, State) ->
do_accept(LSocket),
{noreply, State}.
handle_call(stop, _From, Tab) ->
{stop, normal, stopped, Tab}.
handle_cast(_Msg, State) ->
{noreply, State}.
terminate(_Reason, _State) ->
ok.
code_change(_OldVsn, State, _Extra) ->
{ok, State}.
通过启动一个监督程序,同时启动了两个进程,
客户端发送消息,tcp接收消息,崩溃。
服务重新启动。