riak_test/tests/proxy_overload_recovery.erl

490 lines
16 KiB
Erlang

%%
%% QuickCheck test to verify that the vnode proxy overloads when the
%% mailbox estimate is in overload and that it recovers from overload
%% state once the vnode has processed the message queue. In the real
%% system, messages are sent to the vnode outside of the proxy so this
%% is also modelled.
%%
%% Properties -
%% 1) If the vnode message queue has cleared, the vnode proxy should not
%% be in overload.
%% 2) If the proxy has been sent more messages than the overload threshold
%% while the vnode is suspended, it should be in overload.
%% 3) If the proxy has been sent less than or equal to the overload threshold
%% it should not be in overload.
%%
%% Whenever the vnode is resumed, the test checks the message queues are
%% cleared and the model is reset. The main goal is to advance the proxy
%% into interesting new states.
%%
%% This test can be run outside of riak_test while working on it.
%% Symlink the source into a release build and run
%% c(proxy_overload_recovery).
%% proxy_overload_recovery:run(300). % Run for 5 mins
%%
%% On failure you can re-run counter examples *and* print out the internal
%% state with the run.
%% proxy_overload_recovery:check().
%%
%% TODO/Questions:
%% 1) Is there a better way to do the initialization step?
%% 2) Remove the riak_kv dependency so it's a pure riak_core test.
-module(proxy_overload_recovery).
-behaviour(riak_test).
-ifdef(EQC).
-include_lib("eqc/include/eqc.hrl").
-include_lib("eqc/include/eqc_statem.hrl").
-include_lib("eunit/include/eunit.hrl").
-export([confirm/0]).
-compile(export_all).
confirm() ->
%% disable AAE to prevent vnode start delay opening AAE leveldb
Opts = [{riak_kv, [{anti_entropy, {off, []}}]}],
Nodes = [Node1] = rt:deploy_nodes(1, Opts),
case setup_eqc(Nodes) of
ok ->
%% Distribute this code to the remote and add EQC
rt:load_modules_on_nodes([?MODULE], Nodes),
TestingTime = rt_config:get(eqc_testing_time, 120),
lager:info("Running vnode proxy overload property for ~p seconds\n",
[TestingTime]),
?assertEqual(true, rpc:call(Node1, ?MODULE, rtrun, [TestingTime])),
pass;
_ ->
lager:warning("EQC was unavailable on this machine - PASSing "
"the test, but it did not run. Wish we could skip.\n", []),
pass
end.
-record(tstate, {
rt, % Runtime information - either symbolic state or #rt{} record
vnode_running = true, % true if running, false if suspended
proxy_msgs = 0, % messages proxied while vnode suspended
direct_msgs = 0, % messages sent directly while vnode suspended
threshold}). % threshold for overload
-record(rt, {id, % Partition id being used for the test
pl, % Preflist to send messages via the proxy
ppid, % Proxy pid
vpid}).% Vnode pid
%% Riak test run - to be executed on the node
rtrun(Seconds) ->
case run(Seconds) of
true ->
true;
Res ->
%% Hopefully rpc will set the group leader to r_t so we can see
io:format("=== FAILED - running check to see if it was "
"transient and print model states ===\n", []),
check(),
Res
end.
%%
%% Runners - on failure may be helpful to connect to the node and run
%% check / recheck to verify a fix or try reshrinking.
%%
run() ->
run(60).
run(Seconds) ->
eqc:quickcheck(eqc:testing_time(Seconds, ?MODULE:prop_proxy_recovery())).
trap_run(Seconds) ->
eqc:quickcheck(eqc:testing_time(Seconds, ?TRAPEXIT(?MODULE:prop_proxy_recovery()))).
check() ->
eqc:check(eqc_statem:show_states(?MODULE:prop_proxy_recovery())).
recheck() ->
eqc:recheck(?MODULE:prop_proxy_recovery()).
cover(Seconds) ->
cover:start(),
cover:compile(?MODULE),
eqc:quickcheck(eqc:testing_time(Seconds, ?MODULE:prop_proxy_recovery())),
cover:analyze_to_file(?MODULE),
cover:stop().
%%
%% Quickcheck Property - setup and run the statem
%%
prop_proxy_recovery() ->
?FORALL(Cmds,commands(?MODULE),
begin
Start = os:timestamp(),
{H,S,Res} = run_commands(?MODULE, Cmds),
End = os:timestamp(),
Msecs = timer:now_diff(End, Start) div 1000,
?WHENFAIL(
begin
RT = S#tstate.rt,
case RT of
undefined ->
ok;
#rt{ppid = PPid, vpid = VPid} ->
eqc:format("\n\nPPid state = ~w\n",
[catch sys:get_status(PPid)]),
eqc:format("Vnode MsgQLen = ~p\n",
[catch msgq_len(VPid)])
end
end,
measure(duration, Msecs,
aggregate(with_title("Commands"), command_names(Cmds),
pretty_commands(?MODULE, Cmds, {H, S, Res},
Res == ok))))
end).
%%
%% Generators
%%
pos_int() ->
?LET(X, int(), 1 + abs(X)).
%% Generate the threshold seed - see get_params
threshold_seed() ->
frequency([%% test proxy disabled - 1%
{1, undefined},
%% test with all parameters given
{4, {choose(1,25),choose(1,25),choose(1,50)}},
%% test with the interval and threshold
{5, {choose(1, 50), choose(1, 50)}},
%% just provide a threshold, defaults for others
{90, choose(4, 100)}]).
%%
%% initial_state
%%
initial_state() ->
#tstate{}.
%%
%% Run prepare once before any other calls and then never again.
%%
precondition_common(#tstate{rt = undefined}, {call, _M, F, _A}) ->
F == prepare;
precondition_common(_, {call, _M, F, _A}) ->
F /= prepare.
%% %% Make sure we're still running what we think we're running - uncomment
%% %% if having process death issues
%% invariant(#tstate{rt = undefined}) ->
%% true;
%% invariant(#tstate{rt = #rt{id = Index, ppid = PPid, vpid = VPid}}) ->
%% RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index),
%% PPid = whereis(RegName), % Check process we think it is.
%% true = is_process_alive(PPid),
%% true = is_process_alive(VPid),
%% true.
%%
%% Prepare runtime state, pick a vnode and find pids - precondition_common
%% ensures this runs first.
%%
prepare_args(_S) ->
[threshold_seed()].
prepare(ThresholdSeed) ->
{RequestInterval, Interval, Threshold} = get_params(ThresholdSeed),
Index = 0, % Mess with the same vnode proxy each time - could rotate if we wanted
%% Set (or unset) appenvs for the test
prep_env(vnode_check_request_interval, RequestInterval),
prep_env(vnode_check_interval, Interval),
prep_env(vnode_overload_threshold, Threshold),
prep_env(inactivity_timeout, infinity),
%% Reset the vnode and the proxy to get back into a known state.
%% Ask the supervisor to do it for us to avoid messy messages in the logs.
Id = 0,
{ok, VPid0} = riak_core_vnode_manager:get_vnode_pid(Id, riak_kv_vnode),
sys:resume(VPid0),
ok = supervisor:terminate_child(riak_core_vnode_sup, VPid0),
%% Reset the proxy pid to make sure it resets state and picks up the new
%% environment variables
ok = supervisor:terminate_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}),
RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index),
undefined = whereis(RegName),
VPid1 = wait_for_vnode_change(VPid0, Index),
{ok, PPid} = supervisor:restart_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}),
%% Find the proxy pid and check it's alive and matches the supervisor
RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index),
PPid = whereis(RegName),
%% Send a message through the vnode proxy (the timeout message is ignored)
%% and return the Pid so we know we have the same Pid.
{ok, VPid} = riak_core_vnode_proxy:command_return_vnode(
{riak_kv_vnode,Index,node()}, timeout),
?assertEqual(VPid, VPid1),
true = is_process_alive(PPid),
true = is_process_alive(VPid),
#rt{id = Id, pl = {Id, node()}, ppid = PPid, vpid = VPid}.
prepare_next(S, RT, [ThresholdSeed]) ->
{_RequestInterval, _Interval, Threshold} = get_params(ThresholdSeed),
S#tstate{rt = RT, threshold = Threshold}.
%%
%% Suspend the vnode so it cannot process messages and builds
%% up the vnode message queue
%%
suspend_pre(#tstate{vnode_running = Running}) ->
Running.
suspend_args(#tstate{rt = RT}) ->
[RT].
suspend(#rt{vpid = VPid}) ->
sys:suspend(VPid).
suspend_next(S, _V, _A) ->
S#tstate{vnode_running = false}.
%%
%% Resume the vnode and wait until it has completed
%% processing all queued messages
%%
resume_pre(#tstate{vnode_running = Running}) ->
not Running.
resume_args(#tstate{rt = RT}) ->
[RT].
resume(#rt{ppid = PPid, vpid = VPid}) ->
sys:resume(VPid),
%% Use the sys:get_status call to force a synchronous call
%% against the vnode & the proxy to ensure all messages sent by
%% this process have been serviced and there are no pending
%% 'ping's in the vnode before we continue.
%% Then drain the vnode to make sure any pending pongs have
%% been sent, and ensure the proxy has
_ = sys:get_status(PPid),
_ = sys:get_status(VPid),
ok = drain([VPid, PPid]).
resume_next(S, _V, _A) ->
S#tstate{vnode_running = true, proxy_msgs = 0, direct_msgs = 0}.
%%
%% Send messages through the proxy process, do not
%% wait for responses, use ignore so riak_core_vnode:reply
%% will not send any responses
%%
proxy_msg_args(#tstate{rt = RT}) ->
[RT, pos_int()].
proxy_msg(#rt{pl = PL}, NumMsgs) ->
[riak_kv_vnode:get(PL, {<<"b">>,<<"k">>}, ignore) ||
_X <- lists:seq(1, NumMsgs)],
ok.
proxy_msg_next(#tstate{vnode_running = false, proxy_msgs = ProxyMsgs} = S, _V,
[_RT, NumMsgs]) ->
S#tstate{proxy_msgs = ProxyMsgs + NumMsgs};
proxy_msg_next(S, _V, _A) ->
S.
%%
%% Send message directly to the core vnode process, simulating
%% the vnode manager or other processes sending additional messages
%% not accounted for by the proxy.
%%
direct_msg_args(#tstate{rt = RT}) ->
[RT, pos_int()].
direct_msg(#rt{vpid = VPid}, NumMsgs) ->
[catch gen_fsm:sync_send_event(VPid, {ohai, X}, 0) ||
X <- lists:seq(1, NumMsgs)],
ok.
direct_msg_next(#tstate{vnode_running = false, direct_msgs = DirectMsgs} = S,
_V, [_RT, NumMsgs]) ->
S#tstate{direct_msgs = DirectMsgs + NumMsgs};
direct_msg_next(S, _V, _A) ->
S.
%%
%% Check the current state of the vnode so we can test properties
%%
overloaded_args(#tstate{vnode_running = Running, rt = RT}) ->
[Running, RT].
overloaded(Running, #rt{ppid = PPid, vpid = VPid}) ->
case Running of
true ->
ok = drain([PPid, VPid]);
_ ->
ok
end,
{messages, PMsgs} = process_info(PPid, messages),
{messages, VMsgs} = process_info(VPid, messages),
Overloaded = riak_core_vnode_proxy:overloaded(PPid),
{Overloaded, {VMsgs, PMsgs}, sys:get_status(PPid)}.
overloaded_post(#tstate{threshold = undefined}, _A,
{R, _Messages, _ProxyStatus}) ->
%% If there are no thresholds there should never be an overload
eq(R, false);
overloaded_post(#tstate{vnode_running = true}, _A,
{R, _Messages, _ProxyStatus}) ->
%% If the vnode is running, we have cleared queues so
%% should not be in overload.
eq(R, false);
overloaded_post(#tstate{vnode_running = false,
proxy_msgs = ProxyMsgs,
threshold = Threshold}, _A,
{ResultOverload, _Messages, _ProxyStatus}) ->
%% Either
%% mailbox is completely an estimate based on proxy msgs
%% or mailbox is a check + estimate since
%% The test cannot tell which as prior state may have left the counter
%% at any value between 0 and Interval.
%%
if
ProxyMsgs > Threshold -> % definitely overloaded
eq(ResultOverload, true);
true ->
true % otherwise, not possible to check - vnode_resume gets us back
end.
%%
%% Test helpers
%%
%% Convert the test seed to request intervals, intervals and threshold
%% if unset, remove from the app env so the proxy init auto-configures
get_params(undefined) ->
{unset, unset, undefined};
get_params({Interval, ThresholdDelta}) ->
{unset, % let the proxy allocate it
Interval,
Interval + ThresholdDelta};
get_params({RequestInterval, IntervalDelta, ThresholdDelta}) ->
{RequestInterval,
RequestInterval + IntervalDelta,
RequestInterval + IntervalDelta + ThresholdDelta};
get_params(Threshold) when Threshold > 5000 ->
{unset, unset, Threshold};
get_params(Threshold) when Threshold < 5000 ->
%% Avoid annoying message about safety
{unset, Threshold div 2, Threshold}.
%% Set (or unset) params
prep_env(Var, unset) ->
application:unset_env(riak_core, Var, infinity);
prep_env(Var, Val) ->
application:set_env(riak_core, Var, Val).
%%
%% Wait until all messages are drained by the Pid. No guarantees
%% about future messages being sent, or that responses for the
%% last message consumed have been transmitted.
%% NOTE: The "drain 3 times in a row" was determined empirically,
%% and may not be sufficient (2 was not). Given time constraints,
%% living with it for now. If this fails, we should really add some
%% tracing code around the send of messages to Vnode and Proxy to
%% determine where extra messages are coming from rather than just
%% make this "try 4 times"
%%
drain(Pid) when is_pid(Pid) ->
drain([Pid], {-1, -1});
drain(Pids) when is_list(Pids) ->
drain(Pids, {-1, -1}).
drain(Pids, {PrevPrev, Prev}) ->
_ = [sys:suspend(Pid) || Pid <- Pids],
Len = lists:foldl(fun(Pid, Acc0) ->
{message_queue_len, Len} = erlang:process_info(Pid, message_queue_len),
Acc0 + Len
end, 0, Pids),
_ = [sys:resume(Pid) || Pid <- Pids],
case {PrevPrev, Prev, Len} of
{0, 0, 0} ->
ok;
_ ->
%% Attempt to ensure something else is scheduled before we try to drain again
erlang:yield(),
timer:sleep(1),
drain(Pids, {Prev, Len})
end.
%% Return the length of the message queue (or crash if proc dead)
msgq_len(Pid) ->
{message_queue_len, L} = process_info(Pid, message_queue_len),
L.
%% Kill the process and wait for it to exit
kill_and_wait(Pid) ->
Mref = erlang:monitor(process, Pid),
exit(Pid, kill),
receive
{'DOWN', Mref, _, _, _} ->
ok
end.
%%
%% Check if EQC is available on the local node, then add to remotes if needed
%% This could be refactored out if many tests need it.
setup_eqc(Nodes) ->
try
_ = eqc:reserved_until(), % will throw exception if problems
add_eqc_apps(Nodes)
catch
_:Err ->
lager:info("EQC unavailable: ~p\n", [Err]),
{error, unavailable}
end.
%% Add the QuickCheck in the r_t runner to the list of Nodes
add_eqc_apps(Nodes) ->
Apps = [eqc, pulse, pulse_otp, eqc_mcerlang],
[case code:lib_dir(App, ebin) of
{error, bad_name} -> % EQC component not installed locally
ok;
Path when is_list(Path) ->
case rpc:call(Node, code, priv_dir, [App]) of
{error, bad_name} ->
true = rpc:call(Node, code, add_pathz, [Path], 60000);
_ ->
ok
end
end || App <- Apps, Node <- Nodes],
ok.
wait_for_vnode_change(VPid0, Index) ->
{ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode),
case VPid1 of
VPid0 ->
timer:sleep(1),
wait_for_vnode_change(VPid0, Index);
_ ->
VPid1
end.
-else. %% no EQC
-export([confirm/0]).
confirm() ->
lager:info("EQC not enabled, skipping test"),
pass.
-endif.