我没有尝试过,但我建议主管使用 restart strategysimple_one_for_one
和 restart child spec启动另一个主管(每个进程一个) transient
。
然后这个主管使用重启策略one_for_one
和重启子规范启动进程本身permanent
,并且 maxrestarts 和 maxtime 适合您的需要。
你的问题有些奇怪,你说主管杀死了一个有缺陷的孩子达到 maxrestart 时启动的所有孩子,我认为 simple_one_for_one 策略让工人自己死去。
[编辑]
因为我很想测试这个想法,所以我写了一小部分模块来测试它。
她是最高主管的代码:
-module (factory).
-behaviour(supervisor).
-export([start_link/0]).
-export([init/1, start_process/1]).
-define(CHILD(I, Arglist), {I, {I, start_link, [Arglist]}, temporary, 5000, supervisor, [I]}).
start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
init([]) ->
{ok, { {simple_one_for_one, 0, 10}, [?CHILD(proc_sup, [])]} }.
start_process(Arglist)->
supervisor:start_child(?MODULE, [Arglist]).
然后是中间的代码,负责重启几次进程以防出现问题:
-module (proc_sup).
-behaviour(supervisor).
-export([start_link/2]).
-export([init/1]).
-define(CHILD(Mod, Start, Arglist), {Mod, {Mod, Start, Arglist}, permanent, 5000, worker, [Mod]}).
start_link(_,Arglist) ->
io:format("proc_sup arg = ~p~n",[Arglist]),
supervisor:start_link(?MODULE, [Arglist]).
init([[Mod,Start|[Arglist]]]) ->
{ok, { {one_for_one, 5, 10}, [?CHILD(Mod,Start,Arglist)]} }.
然后编写一个小模块的代码,可以停止,接收消息,被编程到一定时间后死掉,以测试机制。
-module(dumb).
-export([start_link/1,loop/2]).
start_link(Arg) ->
io:format("dumb start param = ~p~n",[Arg]),
{ok,spawn_link(?MODULE,loop,[Arg,init])}.
loop({die,T},_) ->
receive
after T -> ok
end;
loop(Arg,init) ->
io:format("loop pid ~p with arg ~p~n",[self(),Arg]),
loop(Arg,0);
loop(Arg,N) ->
io:format("loop ~p (~p) cycle ~p~n",[Arg,self(),N]),
receive
stop -> 'restart_:o)';
_ -> loop(Arg,N+1)
end.
最后是shell执行的副本:
1> factory:start_link().
{ok,<0.37.0>}
2>
2> factory:start_process([dumb,start_link,[loop_1]]).
proc_sup arg = [dumb,start_link,[loop_1]]
dumb start param = loop_1
loop pid <0.40.0> with arg loop_1
loop loop_1 (<0.40.0>) cycle 0
{ok,<0.39.0>}
3>
3> factory:start_process([dumb,start_link,[loop_1]]).
proc_sup arg = [dumb,start_link,[loop_1]]
dumb start param = loop_1
loop pid <0.43.0> with arg loop_1
loop loop_1 (<0.43.0>) cycle 0
{ok,<0.42.0>}
4>
4> factory:start_process([dumb,start_link,[loop_2]]).
proc_sup arg = [dumb,start_link,[loop_2]]
dumb start param = loop_2
loop pid <0.46.0> with arg loop_2
loop loop_2 (<0.46.0>) cycle 0
{ok,<0.45.0>}
5>
5> pid(0, 2310, 0) ! hello.
hello
6>
6> pid(0, 40, 0) ! hello.
loop loop_1 (<0.40.0>) cycle 1
hello
7> pid(0, 40, 0) ! hello.
loop loop_1 (<0.40.0>) cycle 2
hello
8> pid(0, 40, 0) ! hello.
loop loop_1 (<0.40.0>) cycle 3
hello
9> pid(0, 43, 0) ! hello.
loop loop_1 (<0.43.0>) cycle 1
hello
10> pid(0, 43, 0) ! hello.
loop loop_1 (<0.43.0>) cycle 2
hello
11> pid(0, 40, 0) ! stop.
dumb start param = loop_1
stop
loop pid <0.54.0> with arg loop_1
loop loop_1 (<0.54.0>) cycle 0
12> pid(0, 40, 0) ! stop.
stop
13> pid(0, 54, 0) ! stop.
dumb start param = loop_1
stop
loop pid <0.57.0> with arg loop_1
loop loop_1 (<0.57.0>) cycle 0
14> pid(0, 57, 0) ! hello.
loop loop_1 (<0.57.0>) cycle 1
hello
15> factory:start_process([dumb,start_link,[{die,5}]]).
proc_sup arg = [dumb,start_link,[{die,5}]]
dumb start param = {die,5}
{ok,<0.60.0>}
16>
dumb start param = {die,5}
dumb start param = {die,5}
dumb start param = {die,5}
dumb start param = {die,5}
dumb start param = {die,5}
16> factory:start_process([dumb,start_link,[{die,50000}]]).
proc_sup arg = [dumb,start_link,[{die,50000}]]
dumb start param = {die,50000}
{ok,<0.68.0>}
17>
dumb start param = {die,50000}
17>