riak_test/tests/mapred_basic_compat.erl

263 lines
10 KiB
Erlang
Raw Normal View History

%% -------------------------------------------------------------------
%%
%% Copyright (c) 2012 Basho Technologies, Inc.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
%% @doc Verify some MapReduce internals.
%%
%% This test used to be in riak_kv's test/mapred_test.erl. It was
%% called `compat_basic1_test_'. It has been moved here to avoid the
%% fragile setup and teardown stages that frequently broke eunit
%% testing.
-module(mapred_basic_compat).
-behavior(riak_test).
-export([
%% riak_test api
confirm/0,
%% test helpers
inputs_gen_seq/3,
inputs_gen_bkeys_1/3
]).
-compile([export_all]). %% because we call ?MODULE:TestName
-include_lib("eunit/include/eunit.hrl").
-define(INTS_BUCKET, <<"foonum">>).
-define(LINK_BUCKET, <<"link bucket">>).
-define(BUCKET_TYPE, <<"mytype">>).
confirm() ->
Nodes = rt:build_cluster(3),
[Node1|_] = Nodes,
%% create a new type
rt:create_and_activate_bucket_type(Node1, ?BUCKET_TYPE, [{n_val, 3}]),
rt:wait_until_bucket_type_status(?BUCKET_TYPE, active, Nodes),
rt:wait_until_bucket_type_visible(Nodes, ?BUCKET_TYPE),
load_test_data(Nodes),
rt:load_modules_on_nodes([?MODULE], Nodes),
[ begin
lager:info("Running test ~p", [T]),
?MODULE:T(Nodes)
end
|| T <- [empty_query,
reduce_zero_inputs,
keep_both,
keep_neither,
keep_first_only,
keep_second_only,
explicity_rereduce,
error_not_found_propagation,
basic_link,
link_not_found,
keydata,
key_filters,
map_output_with_btype,
modfun_generator1,
modfun_generator2] ],
pass.
load_test_data([Node|_]) ->
%% creates foonum/1..5 - this is what populates ?INTS_BUCKET
lager:info("Filling INTS_BUCKET (~s)", [?INTS_BUCKET]),
ok = rpc:call(Node, riak_kv_mrc_pipe, example_setup, []),
lager:info("Adding Link object"),
Obj = riakc_obj:new(?LINK_BUCKET,
<<"yo">>,
<<"link val">>,
"text/plain"),
MD = riakc_obj:add_link(
riakc_obj:get_update_metadata(Obj),
[{<<"link 1">>, [{?LINK_BUCKET, <<"nokey-1">>}]},
{<<"link 2">>, [{?LINK_BUCKET, <<"nokey-2">>}]}]),
C = rt:pbc(Node),
ok = riakc_pb_socket:put(C, riakc_obj:update_metadata(Obj, MD)),
%% Some bucket type entries {mytype,foonum}/bar{1..10}
[begin
K = list_to_binary("bar"++integer_to_list(N)),
V = list_to_binary(integer_to_list(N)),
O = riakc_obj:new({?BUCKET_TYPE, ?INTS_BUCKET}, K, V),
riakc_pb_socket:put(C, O)
end || N <- lists:seq(1,10)],
riakc_pb_socket:stop(C).
rpcmr(Node, Inputs, Query) ->
rpc:call(Node, riak_kv_mrc_pipe, mapred, [Inputs, Query]).
%% @doc This will trigger a traversal of IntsBucket, but because the
%% query is empty, the MapReduce will traverse the bucket and send
%% BKeys down the pipe.
empty_query([Node|_]) ->
{ok, BKeys} = rpcmr(Node, ?INTS_BUCKET, []),
?assertEqual(5, length(BKeys)),
?assertEqual({?INTS_BUCKET, <<"bar1">>}, hd(lists:sort(BKeys))).
%% @doc AZ 479: Reduce with zero inputs -> call reduce once w/empty list
reduce_zero_inputs([Node|_]) ->
Spec = [{reduce, {modfun, riak_kv_mapreduce, reduce_sum}, none, true}],
?assertEqual({ok, [0]}, rpcmr(Node, [], Spec)).
%% @doc Basic compatibility: keep both stages
keep_both([Node|_]) ->
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, true},
{reduce, {modfun, riak_kv_mapreduce, reduce_sum}, none, true}],
{ok, [MapRs, ReduceRs]} = rpcmr(Node, ?INTS_BUCKET, Spec),
?assertEqual(5, length(MapRs)),
?assertEqual([15], ReduceRs).
%% @doc Basic compat: keep neither stages -> no output
keep_neither([Node|_]) ->
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_sum}, none, false}],
%% "Crazy" semantics: if only 1 keeper stage, then
%% return List instead of [List].
?assertEqual({ok, []}, rpcmr(Node, ?INTS_BUCKET, Spec)).
%% @doc Basic compat: keep first stage only, want 'crazy' result",
keep_first_only([Node|_]) ->
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, true},
{reduce, {modfun, riak_kv_mapreduce, reduce_sum}, none, false}],
%% "Crazy" semantics: if only 1 keeper stage, then
%% return List instead of [List].
{ok, MapRs} = rpcmr(Node, ?INTS_BUCKET, Spec),
?assertEqual(5, length(MapRs)).
%% @doc Basic compat: keep second stage only, want 'crazy' result
keep_second_only([Node|_]) ->
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_sum}, none, true}],
%% "Crazy" semantics: if only 1 keeper stage, then
%% return List instead of [List].
?assertEqual({ok, [15]}, rpcmr(Node, ?INTS_BUCKET, Spec)).
%% @doc Explicit rereduce
explicity_rereduce([Node|_]) ->
RedSpec = {reduce, {modfun, riak_kv_mapreduce, reduce_sum}, none, true},
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, true}]
++ lists:duplicate(5, RedSpec),
?assertMatch({ok, [_, [15],[15],[15],[15],[15]]},
rpcmr(Node, ?INTS_BUCKET, Spec)).
%% @doc Make certain that {error, not_found} goes down the pipe from a
%% map phase.
error_not_found_propagation([Node|_]) ->
Inputs = [{<<"no-such-bucket">>, <<"no-such-key!">>}],
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value},
{struct,[{<<"sub">>,[<<"0">>]}]}, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_string_to_integer},
none,true}],
?assertEqual({ok, [0]}, rpcmr(Node, Inputs, Spec)),
B = {?BUCKET_TYPE, ?INTS_BUCKET},
Inputs2 = [{{B, <<"nokey">>}, undefined}],
Spec2 = [{map, {modfun, riak_kv_mapreduce, map_object_value},
{struct,[{<<"sub">>,[<<"0">>]}]}, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_string_to_integer},
none,true}],
?assertEqual({ok, [0]}, rpcmr(Node, Inputs2, Spec2)).
%% @doc A map phase outputting a 4 tuple can feed objects to another map phase
map_output_with_btype([Node|_]) ->
%% Translates from regular bucket to bucket type one
Inputs = ?INTS_BUCKET,
Spec = [{map, {jsanon, <<"function(o){return[[o.bucket,o.key,null,\"mytype\"]];}">>}, undefined, false},
{map, {modfun, riak_kv_mapreduce, map_object_value}, undefined, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_string_to_integer}, undefined, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_sort}, undefined, true}
],
?assertEqual({{ok, lists:seq(1,5)}, {Inputs, Spec}},
{rpcmr(Node, Inputs, Spec), {Inputs, Spec}}).
%% @doc Basic link phase
basic_link([Node|_]) ->
Spec = [{link, '_', <<"link 1">>, true}],
?assertEqual({ok, [ [?LINK_BUCKET, <<"nokey-1">>, <<"link 1">>] ]},
rpcmr(Node, ?LINK_BUCKET, Spec)).
%% @doc Link phase + notfound
link_not_found([Node|_]) ->
Inputs = [{<<"no">>, K} || K <- [<<"no1">>, <<"no2">>]],
Spec = [{link, '_', '_', true}],
?assertEqual({ok, []}, rpcmr(Node, Inputs, Spec)).
%% @doc KeyData
keydata([Node|_]) ->
UnMap = fun(O, undefined, _) ->
[{riak_object:bucket(O),
riak_object:key(O)}];
(O, KeyData, _) ->
[{{riak_object:bucket(O),
riak_object:key(O)},
KeyData}]
end,
Normalize = fun({{B,K},D}) -> {{B,K},D};
({B,K}) -> {B,K};
([B,K]) -> {B,K};
([B,K,D]) -> {{B,K},D}
end,
Spec = [{map, {qfun, UnMap}, none, true}],
Inputs = [{?INTS_BUCKET, <<"bar1">>},
{{?INTS_BUCKET, <<"bar2">>}, <<"keydata works">>},
[?INTS_BUCKET, <<"bar3">>],
[?INTS_BUCKET, <<"bar4">>, <<"keydata still works">>]],
{ok, Results} = rpcmr(Node, Inputs, Spec),
SortedNormal = lists:sort([ Normalize(I) || I <- Inputs ]),
?assertEqual(SortedNormal, lists:sort(Results)).
%% @doc Key Filters
key_filters([Node|_]) ->
%% filter sould match only "bar4" key
Inputs = {?INTS_BUCKET, [[<<"ends_with">>, <<"r4">>]]},
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value}, none, true}],
?assertEqual({ok, [4]}, rpcmr(Node, Inputs, Spec)).
%% @doc modfun for inputs generator
modfun_generator1([Node|_]) ->
Inputs = {modfun, ?MODULE, inputs_gen_seq, 6},
Spec = [{reduce, {modfun, riak_kv_mapreduce, reduce_sum},none,true}],
?assertEqual({ok, [21]}, rpcmr(Node, Inputs, Spec)).
%% @doc runs on riak node
inputs_gen_seq(Pipe, Max, _Timeout) ->
[riak_pipe:queue_work(Pipe, X) || X <- lists:seq(1, Max)],
riak_pipe:eoi(Pipe),
ok.
%% @doc modfun for inputs generator: make BKeys for conventional phases
modfun_generator2([Node|_]) ->
Inputs = {modfun, ?MODULE, inputs_gen_bkeys_1, {?INTS_BUCKET, 1, 5}},
Spec = [{map, {modfun, riak_kv_mapreduce, map_object_value},
none, false},
{reduce, {modfun, riak_kv_mapreduce, reduce_string_to_integer},
none,false},
{reduce, {modfun, riak_kv_mapreduce, reduce_sum},
none,true}],
?assertEqual({ok, [15]}, rpcmr(Node, Inputs, Spec)).
%% @doc runs on riak node
inputs_gen_bkeys_1(Pipe, {Bucket, Start, End}, _Timeout) ->
BKeys = [{Bucket, list_to_binary("bar"++integer_to_list(X))} ||
X <- lists:seq(Start, End)],
[riak_pipe:queue_work(Pipe, BK) || BK <- BKeys],
riak_pipe:eoi(Pipe),
ok.