2014-12-30 22:37:23 +00:00
|
|
|
%% @doc The purpose of thie test is to ensure the realtime helpers on both
|
|
|
|
%% the source and sink sides properly exit when a connection is flakey; ie
|
|
|
|
%% then there are errors and not out-right closes of the connection.
|
|
|
|
|
|
|
|
-module(repl_process_leak).
|
|
|
|
-behavior(riak_test).
|
|
|
|
-export([confirm/0]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
|
|
-define(SEND_ERROR_INTERVAL, 500).
|
|
|
|
|
|
|
|
confirm() ->
|
|
|
|
Conf = [
|
|
|
|
{riak_repl, [
|
|
|
|
{fullsync_on_connect, false},
|
|
|
|
{fullsync_interval, disabled}
|
|
|
|
]}
|
|
|
|
],
|
|
|
|
|
|
|
|
lager:info("deploying 2 nodes"),
|
|
|
|
Nodes = rt:deploy_nodes(2, Conf, [riak_kv, riak_repl]),
|
|
|
|
|
|
|
|
[SourceNode, SinkNode] = Nodes,
|
|
|
|
|
|
|
|
lager:info("nameing clusters"),
|
|
|
|
repl_util:name_cluster(SourceNode, "source"),
|
|
|
|
repl_util:name_cluster(SinkNode, "sink"),
|
|
|
|
|
|
|
|
{ok, {_IP, Port}} = rpc:call(SinkNode, application, get_env, [riak_core, cluster_mgr]),
|
|
|
|
|
|
|
|
lager:info("connecting clusters using port ~p", [Port]),
|
|
|
|
repl_util:connect_cluster(SourceNode, "127.0.0.1", Port),
|
|
|
|
repl_util:wait_for_connection(SourceNode, "sink"),
|
|
|
|
|
|
|
|
lager:info("enabling and starting realtime"),
|
|
|
|
repl_util:enable_realtime(SourceNode, "sink"),
|
|
|
|
repl_util:start_realtime(SourceNode, "sink"),
|
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
lager:info("testing for leaks on flakey sink"),
|
2014-12-30 22:37:23 +00:00
|
|
|
flakey_sink(SourceNode, SinkNode),
|
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
lager:info("testing for leaks on flakey source"),
|
2014-12-30 22:37:23 +00:00
|
|
|
flakey_source(SourceNode, SinkNode),
|
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
pass.
|
2014-12-30 22:37:23 +00:00
|
|
|
|
|
|
|
flakey_sink(_SourceNode, SinkNode) ->
|
2015-01-08 20:36:32 +00:00
|
|
|
InitialCount = rpc:call(SinkNode, erlang, system_info, [process_count]),
|
|
|
|
ProcCounts = send_sink_tcp_errors(SinkNode, 20, [InitialCount]),
|
2014-12-30 22:37:23 +00:00
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
Smallest = lists:min(ProcCounts),
|
|
|
|
Biggest = lists:max(ProcCounts),
|
|
|
|
?assert(2 =< Biggest - Smallest),
|
|
|
|
%?assertEqual(InitialProcCount, PostProcCount),
|
|
|
|
% the process count is increasing, but the helper did die
|
|
|
|
true.
|
2014-12-30 22:37:23 +00:00
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
send_sink_tcp_errors(_SinkNode, 0, Acc) ->
|
|
|
|
Acc;
|
2014-12-30 22:37:23 +00:00
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
send_sink_tcp_errors(SinkNode, N, Acc) ->
|
2014-12-30 22:37:23 +00:00
|
|
|
case rpc:call(SinkNode, riak_repl2_rtsink_conn_sup, started, []) of
|
|
|
|
[] ->
|
|
|
|
timer:sleep(?SEND_ERROR_INTERVAL),
|
2015-01-08 20:36:32 +00:00
|
|
|
send_sink_tcp_errors(SinkNode, N, Acc);
|
2014-12-30 22:37:23 +00:00
|
|
|
[P | _] ->
|
2015-01-08 20:36:32 +00:00
|
|
|
SysStatus = sys:get_status(P),
|
|
|
|
{status, P, _Modul, [_PDict, _Status, _, _, Data]} = SysStatus,
|
|
|
|
[_Header, _Data1, Data2] = Data,
|
|
|
|
{data, [{"State", StateRec}]} = Data2,
|
|
|
|
[Helper | _] = lists:filter(fun(E) ->
|
|
|
|
is_pid(E)
|
|
|
|
end, tuple_to_list(StateRec)),
|
|
|
|
HelpMon = erlang:monitor(process, Helper),
|
2014-12-30 22:37:23 +00:00
|
|
|
P ! {tcp_error, <<>>, test},
|
2015-01-08 20:36:32 +00:00
|
|
|
Mon = erlang:monitor(process, P),
|
|
|
|
receive {'DOWN', Mon, process, P, _} -> ok end,
|
|
|
|
receive
|
|
|
|
{'DOWN', HelpMon, process, Helper, _} ->
|
|
|
|
ok
|
|
|
|
after 10000 ->
|
|
|
|
throw("helper didn't die")
|
|
|
|
end,
|
2014-12-30 22:37:23 +00:00
|
|
|
timer:sleep(?SEND_ERROR_INTERVAL),
|
2015-01-08 20:36:32 +00:00
|
|
|
Procs = rpc:call(SinkNode, erlang, system_info, [process_count]),
|
|
|
|
send_sink_tcp_errors(SinkNode, N - 1, [Procs | Acc])
|
2014-12-30 22:37:23 +00:00
|
|
|
end.
|
|
|
|
|
|
|
|
flakey_source(SourceNode, _SinkNode) ->
|
|
|
|
InitialProcCount = rpc:call(SourceNode, erlang, system_info, [process_count]),
|
2015-01-08 20:36:32 +00:00
|
|
|
ProcCounts = send_source_tcp_errors(SourceNode, 20, [InitialProcCount]),
|
2014-12-30 22:37:23 +00:00
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
Biggest = lists:max(ProcCounts),
|
|
|
|
Smallest = lists:min(ProcCounts),
|
|
|
|
%lager:info("initial: ~p; post: ~p", [InitialProcCount, PostProcCount]),
|
|
|
|
%?assertEqual(InitialProcCount, PostProcCount).
|
|
|
|
?assert(2 =< Biggest - Smallest),
|
|
|
|
true.
|
2014-12-30 22:37:23 +00:00
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
send_source_tcp_errors(_SourceNode, 0, Acc) ->
|
|
|
|
Acc;
|
2014-12-30 22:37:23 +00:00
|
|
|
|
2015-01-08 20:36:32 +00:00
|
|
|
send_source_tcp_errors(SourceNode, N, Acc) ->
|
2014-12-30 22:37:23 +00:00
|
|
|
List = rpc:call(SourceNode, riak_repl2_rtsource_conn_sup, enabled, []),
|
|
|
|
case proplists:get_value("sink", List) of
|
|
|
|
undefined ->
|
|
|
|
timer:sleep(?SEND_ERROR_INTERVAL),
|
2015-01-08 20:36:32 +00:00
|
|
|
send_source_tcp_errors(SourceNode, N, Acc);
|
2014-12-30 22:37:23 +00:00
|
|
|
Pid ->
|
2015-01-08 20:36:32 +00:00
|
|
|
lager:debug("Get the status"),
|
|
|
|
SysStatus = try sys:get_status(Pid) of
|
|
|
|
S -> S
|
|
|
|
catch
|
|
|
|
W:Y ->
|
|
|
|
lager:info("Sys failed due to ~p:~p", [W,Y]),
|
|
|
|
{status, Pid, undefined, [undefined, undefined, undefined, undefined, [undefined, undefined, {data, [{"State", {Pid}}]}]]}
|
|
|
|
end,
|
|
|
|
{status, Pid, _Module, [_PDict, _Status, _, _, Data]} = SysStatus,
|
|
|
|
[_Header, _Data1, Data2] = Data,
|
|
|
|
{data, [{"State", StateRec}]} = Data2,
|
|
|
|
[Helper | _] = lists:filter(fun(E) ->
|
|
|
|
is_pid(E)
|
|
|
|
end, tuple_to_list(StateRec)),
|
|
|
|
lager:debug("mon the hlepr"),
|
|
|
|
HelperMon = erlang:monitor(process, Helper),
|
|
|
|
lager:debug("Send the murder"),
|
2014-12-30 22:37:23 +00:00
|
|
|
Pid ! {tcp_error, <<>>, test},
|
2015-01-08 20:36:32 +00:00
|
|
|
Mon = erlang:monitor(process, Pid),
|
|
|
|
lager:debug("Wait for deaths"),
|
|
|
|
receive
|
|
|
|
{'DOWN', Mon, process, Pid, _} -> ok
|
|
|
|
end,
|
|
|
|
receive
|
|
|
|
{'DOWN', HelperMon, process, Helper, _} ->
|
|
|
|
ok
|
|
|
|
after 10000 ->
|
|
|
|
throw("Helper didn't die")
|
|
|
|
end,
|
2014-12-30 22:37:23 +00:00
|
|
|
timer:sleep(?SEND_ERROR_INTERVAL),
|
2015-01-08 20:36:32 +00:00
|
|
|
Count = rpc:call(SourceNode, erlang, system_info, [process_count]),
|
|
|
|
send_source_tcp_errors(SourceNode, N - 1, [Count | Acc])
|
2014-12-30 22:37:23 +00:00
|
|
|
end.
|
|
|
|
|