%% -------------------------------------------------------------------
%%
%% Copyright (c) 2013 Basho Technologies, Inc.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License.  You may obtain
%% a copy of the License at
%%
%%   http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied.  See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
%% @doc This test was designed to provoke a specific failure in
%% MapReduce when one node is down, and a prereduce phase is used. The
%% test simply counts items in a bucket, but it will occasionally get
%% a result of `[]' (the empty list) or `[0]' instead of `[Count]'.
%%
%% The bug was determined to be in the choice of static hash for the
%% final reduce phase. It did not take into account node liveness, and
%% therefor might assign the reduce worker to a vnode on a node that
%% was down.
%%
%% This test is based on one submitted by Alexander Gunin to the
%% riak-users mailing list as an issue reproducer.
%%
%% [http://lists.basho.com/pipermail/riak-users_lists.basho.com/2013-January/010896.html]
-module(verify_mr_prereduce_node_down).

-export([
         %% riak_test's entry
         confirm/0
        ]).

-include_lib("eunit/include/eunit.hrl").

%% @doc riak_test callback
confirm() ->
    NodeCount = 4,
    lager:info("Build ~b-node cluster", [NodeCount]),
    [Primary,ToKill|_] = rt:build_cluster(NodeCount),

    %% We need one node down for this test
    rt:stop(ToKill),

    %% store our test data
    Bucket = <<"verify_mr_prereduce_node_down">>,
    ObjCount = 100,
    lager:info("Loading ~b objects of test data", [ObjCount]),
    [] = rt:systest_write(Primary, 1, ObjCount, Bucket, 3),

    %% run the query a bunch
    C = rt:pbc(Primary),
    TestCount = 100,
    lager:info("Running the MR query ~b times", [TestCount]),
    Runs = [ run_query(C, Bucket) || _ <- lists:seq(1, TestCount) ],

    lager:info("Evaluating results"),

    %% Errors == failures that even Riak thinks were failures
    %% Correct == correct answers
    %% Incorrect == failures that Riak thought were correct
    SupposedCorrectFun = fun({ok, _}) -> true; (_) -> false end,
    ActualCorrectFun = fun({ok, V}) -> V == [{1, [ObjCount]}] end,
    {Supposed, Errors} = lists:partition(SupposedCorrectFun, Runs),
    {Correct, Incorrect} = lists:partition(ActualCorrectFun, Supposed),

    %% asserting that all queries gave the correct answer; asserting
    %% more than just Correct == TestCount, such that failures print
    %% out details about how they failed
    ?assertEqual({TestCount, [], []},
                 {length(Correct), Incorrect, Errors}),

    lager:info("~s: PASS", [atom_to_list(?MODULE)]),
    pass.

%% result should be a count of the objects in the bucket
run_query(C, Bucket) ->
    riakc_pb_socket:mapred(
      C, Bucket,
      %% this prereduce is key - with it, we'll get
      %% {ok, []} results in the broken case; without
      %% it, we'll get error tuples
      [{map, {modfun, riak_kv_mapreduce, map_identity}, [do_prereduce], false},
       %% counting inputs works because the inputs are riak_objects
       %% (not integers, which might confuse the counting
       {reduce, {modfun, riak_kv_mapreduce, reduce_count_inputs}, none, true}]).