riak_test/tests/verify_mr_prereduce_node_down.erl
Bryan Fink b6a6066d65 use rt:systest_write instead of listcomp of riakc_pb_socket:put
Yay for using builtin utilities instead of hand-rolled hacks.
2013-02-02 08:54:57 -05:00

94 lines
3.6 KiB
Erlang

%% -------------------------------------------------------------------
%%
%% Copyright (c) 2013 Basho Technologies, Inc.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
%% @doc This test was designed to provoke a specific failure in
%% MapReduce when one node is down, and a prereduce phase is used. The
%% test simply counts items in a bucket, but it will occasionally get
%% a result of `[]' (the empty list) or `[0]' instead of `[Count]'.
%%
%% The bug was determined to be in the choice of static hash for the
%% final reduce phase. It did not take into account node liveness, and
%% therefor might assign the reduce worker to a vnode on a node that
%% was down.
%%
%% This test is based on one submitted by Alexander Gunin to the
%% riak-users mailing list as an issue reproducer.
%%
%% [http://lists.basho.com/pipermail/riak-users_lists.basho.com/2013-January/010896.html]
-module(verify_mr_prereduce_node_down).
-export([
%% riak_test's entry
confirm/0
]).
-include_lib("eunit/include/eunit.hrl").
%% @doc riak_test callback
confirm() ->
NodeCount = 4,
lager:info("Build ~b-node cluster", [NodeCount]),
[Primary,ToKill|_] = rt:build_cluster(NodeCount),
%% We need one node down for this test
rt:stop(ToKill),
%% store our test data
Bucket = <<"verify_mr_prereduce_node_down">>,
ObjCount = 100,
lager:info("Loading ~b objects of test data", [ObjCount]),
[] = rt:systest_write(Primary, 1, ObjCount, Bucket, 3),
%% run the query a bunch
C = rt:pbc(Primary),
TestCount = 100,
lager:info("Running the MR query ~b times", [TestCount]),
Runs = [ run_query(C, Bucket) || _ <- lists:seq(1, TestCount) ],
lager:info("Evaluating results"),
%% Errors == failures that even Riak thinks were failures
%% Correct == correct answers
%% Incorrect == failures that Riak thought were correct
SupposedCorrectFun = fun({ok, _}) -> true; (_) -> false end,
ActualCorrectFun = fun({ok, V}) -> V == [{1, [ObjCount]}] end,
{Supposed, Errors} = lists:partition(SupposedCorrectFun, Runs),
{Correct, Incorrect} = lists:partition(ActualCorrectFun, Supposed),
%% asserting that all queries gave the correct answer; asserting
%% more than just Correct == TestCount, such that failures print
%% out details about how they failed
?assertEqual({TestCount, [], []},
{length(Correct), Incorrect, Errors}),
lager:info("~s: PASS", [atom_to_list(?MODULE)]),
pass.
%% result should be a count of the objects in the bucket
run_query(C, Bucket) ->
riakc_pb_socket:mapred(
C, Bucket,
%% this prereduce is key - with it, we'll get
%% {ok, []} results in the broken case; without
%% it, we'll get error tuples
[{map, {modfun, riak_kv_mapreduce, map_identity}, [do_prereduce], false},
%% counting inputs works because the inputs are riak_objects
%% (not integers, which might confuse the counting
{reduce, {modfun, riak_kv_mapreduce, reduce_count_inputs}, none, true}]).