%% ------------------------------------------------------------------- %% %% Copyright (c) 2012 Basho Technologies, Inc. %% %% This file is provided to you under the Apache License, %% Version 2.0 (the "License"); you may not use this file %% except in compliance with the License. You may obtain %% a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, %% software distributed under the License is distributed on an %% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY %% KIND, either express or implied. See the License for the %% specific language governing permissions and limitations %% under the License. %% %% ------------------------------------------------------------------- %% @doc Verify that inputs are forwarded properly if a pipe worker %% fails to restart. %% %% Important: this test loads this module and {@link rt_pipe} on each %% Riak node, such that it can reference their functions in pipe %% workers. %% %% IMPORTANT: this test must be run on a ONE-node cluster, because %% riak_pipe_w_crash uses ETS to determine a "restart" situation, and %% sets the fitting process as the heir of the table, so it survives %% the worker's restart %% %% These tests used to be a component of riak_pipe:exception_test_/0. -module(pipe_verify_restart_input_forwarding). -export([ %% riak_test's entry confirm/0 ]). -include_lib("eunit/include/eunit.hrl"). %% local copy of riak_pipe.hrl -include("rt_pipe.hrl"). %% must be 1 for verify_worker_restart_failure_input_forwarding -define(NODE_COUNT, 1). -define(ERR_LOG, [{log, sink}, {trace, [error]}]). -define(ALL_LOG, [{log, sink}, {trace, all}]). %% @doc riak_test callback confirm() -> lager:info("Build ~b node cluster", [?NODE_COUNT]), Nodes = rt:build_cluster(?NODE_COUNT), [rt:wait_for_service(Node, riak_pipe) || Node <- Nodes], rt:load_modules_on_nodes([?MODULE, rt_pipe], Nodes), verify_worker_restart_failure_input_forwarding(Nodes), rt_pipe:assert_no_zombies(Nodes), lager:info("~s: PASS", [atom_to_list(?MODULE)]), pass. verify_worker_restart_failure_input_forwarding([RN]) -> lager:info("Verify input forwarding after worker restart failure"), %% make a worker fail, and then also fail to restart, and check %% that the input that killed it generates a processing error, %% while the inputs that were queued for it get sent to another %% vnode Spec = [#fitting_spec{name=restarter, module=riak_pipe_w_crash, arg=init_restartfail, %% use nval=2 to get some failover nval=2}], Opts = [{sink, rt_pipe:self_sink()}, {log, sink}, {trace,[error,restart,restart_fail,queue]}], {ok, Pipe} = rpc:call(RN, riak_pipe, exec, [Spec, Opts]), Inputs1 = lists:seq(0,127), Inputs2 = lists:seq(128,255), Inputs3 = lists:seq(256,383), %% sleep, send more inputs %% this should make one of the riak_pipe_w_crash workers die with %% unprocessed items in its queue, and then also deliver a few %% more inputs to that worker, which will be immediately %% redirected to an alternate vnode %% send many inputs, send crash, send more inputs [ok = rpc:call(RN, riak_pipe, queue_work, [Pipe, N]) || N <- Inputs1], ok = rpc:call(RN, riak_pipe, queue_work, [Pipe, init_restartfail]), [ok = rpc:call(RN, riak_pipe, queue_work, [Pipe, N]) || N <- Inputs2], %% one worker should now have both the crashing input and a valid %% input following it waiting in its queue - the test is whether %% or not that valid input following the crash gets redirected %% correctly %% wait for the worker to crash, then send more input at it %% - the test is whether the new inputs are redirected correctly timer:sleep(2000), [ok = rpc:call(RN, riak_pipe, queue_work, [Pipe, N]) || N <- Inputs3], %% flush the pipe ok = riak_pipe:eoi(Pipe), {eoi, Results, Trace} = riak_pipe:collect_results(Pipe), %% all results should have completed correctly ?assertEqual(length(Inputs1++Inputs2++Inputs3), length(Results)), %% There should be one trace errors: %% - the processing error (worker crash) Errors = rt_pipe:extract_trace_errors(Trace), ?assertEqual(1, length(Errors)), ?assert(is_list(hd(Errors))), ?assertMatch(init_restartfail, proplists:get_value(input, hd(Errors))), Restarter = proplists:get_value(partition, hd(Errors)), %% ... and also one message about the worker %% restart failure ?assertMatch([Restarter], rt_pipe:extract_restart_fail(Trace)), Queued = rt_pipe:extract_queued(Trace), %% find out who caught the restartfail Restarted = [ P || {P, init_restartfail} <- Queued ], ?assertMatch([Restarter], Restarted), %% what input arrived after the crashing input, %% but before the crash? {_PreCrashIn, PostCrashIn0} = lists:splitwith(fun is_integer/1, [ I || {P,I} <- Queued, P == Restarter]), %% drop actual crash input PostCrashIn = tl(PostCrashIn0), %% make sure the input was actually enqueued %% before the crash (otherwise test was bogus) ?assert(length(PostCrashIn) > 0), %% so where did the post-crash inputs end up? ReQueued = lists:map( fun(I) -> Re = [ P || {P,X} <- Queued, X == I, P /= Restarter ], ?assertMatch([_Part], Re), hd(Re) end, PostCrashIn), ?assertMatch([_Requeuer], lists:usort(ReQueued)), [Requeuer|_] = ReQueued, %% finally, did the inputs destined for the crashed worker that %% were sent *after* the worker crashed, also get forwarded to the %% correct location? Destined = lists:filter( fun(I) -> [{P,_}] = rpc:call(RN, riak_core_apl, get_apl, [chash:key_of(I), 1, riak_pipe]), P == Restarter end, Inputs3), Forwarded = lists:map( fun(I) -> [Part] = [P || {P,X} <- Queued, X == I], Part end, Destined), ?assertMatch([_Forward], lists:usort(Forwarded)), [Forward|_] = Forwarded, %% consistent hash means this should be the same ?assertEqual(Requeuer, Forward).