riak_test/tests/repl_process_leak.erl
Micah Warren 7539ad53c8 Made test more reliable to check for leaks.
rpc or other processes will cause one or two persistent processes to start up during
the test, but the number shoould not increase after that. Altered the test to check for
massive increase in processes over a larger number of faults rather than an equality
of process_count over a relatively small number of faults.
2015-01-08 14:36:32 -06:00

144 lines
5.2 KiB
Erlang

%% @doc The purpose of thie test is to ensure the realtime helpers on both
%% the source and sink sides properly exit when a connection is flakey; ie
%% then there are errors and not out-right closes of the connection.
-module(repl_process_leak).
-behavior(riak_test).
-export([confirm/0]).
-include_lib("eunit/include/eunit.hrl").
-define(SEND_ERROR_INTERVAL, 500).
confirm() ->
Conf = [
{riak_repl, [
{fullsync_on_connect, false},
{fullsync_interval, disabled}
]}
],
lager:info("deploying 2 nodes"),
Nodes = rt:deploy_nodes(2, Conf, [riak_kv, riak_repl]),
[SourceNode, SinkNode] = Nodes,
lager:info("nameing clusters"),
repl_util:name_cluster(SourceNode, "source"),
repl_util:name_cluster(SinkNode, "sink"),
{ok, {_IP, Port}} = rpc:call(SinkNode, application, get_env, [riak_core, cluster_mgr]),
lager:info("connecting clusters using port ~p", [Port]),
repl_util:connect_cluster(SourceNode, "127.0.0.1", Port),
repl_util:wait_for_connection(SourceNode, "sink"),
lager:info("enabling and starting realtime"),
repl_util:enable_realtime(SourceNode, "sink"),
repl_util:start_realtime(SourceNode, "sink"),
lager:info("testing for leaks on flakey sink"),
flakey_sink(SourceNode, SinkNode),
lager:info("testing for leaks on flakey source"),
flakey_source(SourceNode, SinkNode),
pass.
flakey_sink(_SourceNode, SinkNode) ->
InitialCount = rpc:call(SinkNode, erlang, system_info, [process_count]),
ProcCounts = send_sink_tcp_errors(SinkNode, 20, [InitialCount]),
Smallest = lists:min(ProcCounts),
Biggest = lists:max(ProcCounts),
?assert(2 =< Biggest - Smallest),
%?assertEqual(InitialProcCount, PostProcCount),
% the process count is increasing, but the helper did die
true.
send_sink_tcp_errors(_SinkNode, 0, Acc) ->
Acc;
send_sink_tcp_errors(SinkNode, N, Acc) ->
case rpc:call(SinkNode, riak_repl2_rtsink_conn_sup, started, []) of
[] ->
timer:sleep(?SEND_ERROR_INTERVAL),
send_sink_tcp_errors(SinkNode, N, Acc);
[P | _] ->
SysStatus = sys:get_status(P),
{status, P, _Modul, [_PDict, _Status, _, _, Data]} = SysStatus,
[_Header, _Data1, Data2] = Data,
{data, [{"State", StateRec}]} = Data2,
[Helper | _] = lists:filter(fun(E) ->
is_pid(E)
end, tuple_to_list(StateRec)),
HelpMon = erlang:monitor(process, Helper),
P ! {tcp_error, <<>>, test},
Mon = erlang:monitor(process, P),
receive {'DOWN', Mon, process, P, _} -> ok end,
receive
{'DOWN', HelpMon, process, Helper, _} ->
ok
after 10000 ->
throw("helper didn't die")
end,
timer:sleep(?SEND_ERROR_INTERVAL),
Procs = rpc:call(SinkNode, erlang, system_info, [process_count]),
send_sink_tcp_errors(SinkNode, N - 1, [Procs | Acc])
end.
flakey_source(SourceNode, _SinkNode) ->
InitialProcCount = rpc:call(SourceNode, erlang, system_info, [process_count]),
ProcCounts = send_source_tcp_errors(SourceNode, 20, [InitialProcCount]),
Biggest = lists:max(ProcCounts),
Smallest = lists:min(ProcCounts),
%lager:info("initial: ~p; post: ~p", [InitialProcCount, PostProcCount]),
%?assertEqual(InitialProcCount, PostProcCount).
?assert(2 =< Biggest - Smallest),
true.
send_source_tcp_errors(_SourceNode, 0, Acc) ->
Acc;
send_source_tcp_errors(SourceNode, N, Acc) ->
List = rpc:call(SourceNode, riak_repl2_rtsource_conn_sup, enabled, []),
case proplists:get_value("sink", List) of
undefined ->
timer:sleep(?SEND_ERROR_INTERVAL),
send_source_tcp_errors(SourceNode, N, Acc);
Pid ->
lager:debug("Get the status"),
SysStatus = try sys:get_status(Pid) of
S -> S
catch
W:Y ->
lager:info("Sys failed due to ~p:~p", [W,Y]),
{status, Pid, undefined, [undefined, undefined, undefined, undefined, [undefined, undefined, {data, [{"State", {Pid}}]}]]}
end,
{status, Pid, _Module, [_PDict, _Status, _, _, Data]} = SysStatus,
[_Header, _Data1, Data2] = Data,
{data, [{"State", StateRec}]} = Data2,
[Helper | _] = lists:filter(fun(E) ->
is_pid(E)
end, tuple_to_list(StateRec)),
lager:debug("mon the hlepr"),
HelperMon = erlang:monitor(process, Helper),
lager:debug("Send the murder"),
Pid ! {tcp_error, <<>>, test},
Mon = erlang:monitor(process, Pid),
lager:debug("Wait for deaths"),
receive
{'DOWN', Mon, process, Pid, _} -> ok
end,
receive
{'DOWN', HelperMon, process, Helper, _} ->
ok
after 10000 ->
throw("Helper didn't die")
end,
timer:sleep(?SEND_ERROR_INTERVAL),
Count = rpc:call(SourceNode, erlang, system_info, [process_count]),
send_source_tcp_errors(SourceNode, N - 1, [Count | Acc])
end.