2013-02-25 21:08:34 +00:00
|
|
|
%% -------------------------------------------------------------------
|
|
|
|
%%
|
|
|
|
%% Copyright (c) 2013 Basho Technologies, Inc.
|
|
|
|
%%
|
|
|
|
%% This file is provided to you under the Apache License,
|
|
|
|
%% Version 2.0 (the "License"); you may not use this file
|
|
|
|
%% except in compliance with the License. You may obtain
|
|
|
|
%% a copy of the License at
|
|
|
|
%%
|
|
|
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
%%
|
|
|
|
%% Unless required by applicable law or agreed to in writing,
|
|
|
|
%% software distributed under the License is distributed on an
|
|
|
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
%% KIND, either express or implied. See the License for the
|
|
|
|
%% specific language governing permissions and limitations
|
|
|
|
%% under the License.
|
|
|
|
%%
|
|
|
|
%% -------------------------------------------------------------------
|
|
|
|
-module(verify_handoff).
|
|
|
|
-behavior(riak_test).
|
|
|
|
-export([confirm/0]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
|
|
%% We've got a separate test for capability negotiation and other mechanisms, so the test here is fairly
|
|
|
|
%% straightforward: get a list of different versions of nodes and join them into a cluster, making sure that
|
|
|
|
%% each time our data has been replicated:
|
|
|
|
confirm() ->
|
2014-09-11 21:05:26 +00:00
|
|
|
NTestItems = 1000, %% How many test items to write/verify?
|
2013-02-25 21:08:34 +00:00
|
|
|
NTestNodes = 3, %% How many nodes to spin up for tests?
|
|
|
|
TestMode = false, %% Set to false for "production tests", true if too slow.
|
|
|
|
EncodingTypes = [default, encode_raw, encode_zlib], %% Usually, you won't want to fiddle with these.
|
|
|
|
|
2014-09-11 21:05:26 +00:00
|
|
|
[run_test(TestMode, NTestItems, NTestNodes, EncodingType) ||
|
|
|
|
EncodingType <- EncodingTypes],
|
2013-02-25 21:08:34 +00:00
|
|
|
|
|
|
|
lager:info("Test verify_handoff passed."),
|
|
|
|
pass.
|
|
|
|
|
2014-09-11 21:05:26 +00:00
|
|
|
run_test(TestMode, NTestItems, NTestNodes, Encoding) ->
|
|
|
|
lager:info("Testing handoff (items ~p, encoding: ~p)", [NTestItems, Encoding]),
|
2013-02-25 21:08:34 +00:00
|
|
|
|
2013-02-28 02:33:20 +00:00
|
|
|
%% This resets nodes, cleans up stale directories, etc.:
|
|
|
|
lager:info("Cleaning up..."),
|
|
|
|
rt:setup_harness(dummy, dummy),
|
|
|
|
|
2013-02-25 21:08:34 +00:00
|
|
|
lager:info("Spinning up test nodes"),
|
|
|
|
[RootNode | TestNodes] = Nodes = deploy_test_nodes(TestMode, NTestNodes),
|
|
|
|
|
|
|
|
rt:wait_for_service(RootNode, riak_kv),
|
|
|
|
|
2014-09-11 21:05:26 +00:00
|
|
|
set_handoff_encoding(Encoding, Nodes),
|
|
|
|
|
|
|
|
%% Insert delay into handoff folding to test the efficacy of the
|
|
|
|
%% handoff heartbeat addition
|
|
|
|
[rt_intercept:add(N, {riak_core_handoff_sender,
|
|
|
|
[{{visit_item, 3}, delayed_visit_item_3}]})
|
|
|
|
|| N <- Nodes],
|
2013-02-25 21:08:34 +00:00
|
|
|
|
|
|
|
lager:info("Populating root node."),
|
|
|
|
rt:systest_write(RootNode, NTestItems),
|
2013-09-11 18:07:55 +00:00
|
|
|
%% write one object with a bucket type
|
2014-01-14 19:21:32 +00:00
|
|
|
rt:create_and_activate_bucket_type(RootNode, <<"type">>, []),
|
2013-09-11 18:07:55 +00:00
|
|
|
%% allow cluster metadata some time to propogate
|
|
|
|
rt:systest_write(RootNode, 1, 2, {<<"type">>, <<"bucket">>}, 2),
|
2013-02-25 21:08:34 +00:00
|
|
|
|
|
|
|
%% Test handoff on each node:
|
|
|
|
lager:info("Testing handoff for cluster."),
|
|
|
|
lists:foreach(fun(TestNode) -> test_handoff(RootNode, TestNode, NTestItems) end, TestNodes),
|
|
|
|
|
2013-02-28 02:33:20 +00:00
|
|
|
%% Prepare for the next call to our test (we aren't polite about it, it's faster that way):
|
2013-02-25 21:08:34 +00:00
|
|
|
lager:info("Bringing down test nodes."),
|
2013-02-28 02:33:20 +00:00
|
|
|
lists:foreach(fun(N) -> rt:brutal_kill(N) end, TestNodes),
|
2013-02-25 21:08:34 +00:00
|
|
|
|
|
|
|
%% The "root" node can't leave() since it's the only node left:
|
|
|
|
lager:info("Stopping root node."),
|
2013-02-28 02:33:20 +00:00
|
|
|
rt:brutal_kill(RootNode).
|
2013-02-25 21:08:34 +00:00
|
|
|
|
2014-09-11 21:05:26 +00:00
|
|
|
set_handoff_encoding(default, _) ->
|
|
|
|
lager:info("Using default encoding type."),
|
|
|
|
true;
|
|
|
|
set_handoff_encoding(Encoding, Nodes) ->
|
|
|
|
lager:info("Forcing encoding type to ~p.", [Encoding]),
|
|
|
|
|
|
|
|
%% Update all nodes (capabilities are not re-negotiated):
|
|
|
|
[begin
|
|
|
|
rt:update_app_config(Node, override_data(Encoding)),
|
|
|
|
assert_using(Node, {riak_kv, handoff_data_encoding}, Encoding)
|
|
|
|
end || Node <- Nodes].
|
|
|
|
|
|
|
|
override_data(Encoding) ->
|
|
|
|
[
|
|
|
|
{ riak_core,
|
|
|
|
[
|
|
|
|
{ override_capability,
|
|
|
|
[
|
|
|
|
{ handoff_data_encoding,
|
|
|
|
[
|
|
|
|
{ use, Encoding},
|
|
|
|
{ prefer, Encoding}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]}].
|
|
|
|
|
2013-02-25 21:08:34 +00:00
|
|
|
%% See if we get the same data back from our new nodes as we put into the root node:
|
|
|
|
test_handoff(RootNode, NewNode, NTestItems) ->
|
|
|
|
|
|
|
|
lager:info("Waiting for service on new node."),
|
|
|
|
rt:wait_for_service(NewNode, riak_kv),
|
|
|
|
|
|
|
|
lager:info("Joining new node with cluster."),
|
|
|
|
rt:join(NewNode, RootNode),
|
2013-02-28 02:33:20 +00:00
|
|
|
?assertEqual(ok, rt:wait_until_nodes_ready([RootNode, NewNode])),
|
2013-02-25 21:08:34 +00:00
|
|
|
rt:wait_until_no_pending_changes([RootNode, NewNode]),
|
|
|
|
|
|
|
|
%% See if we get the same data back from the joined node that we added to the root node.
|
|
|
|
%% Note: systest_read() returns /non-matching/ items, so getting nothing back is good:
|
|
|
|
lager:info("Validating data after handoff:"),
|
2014-09-11 21:05:26 +00:00
|
|
|
Results = rt:systest_read(NewNode, NTestItems),
|
|
|
|
?assertEqual(0, length(Results)),
|
2013-09-11 18:07:55 +00:00
|
|
|
Results2 = rt:systest_read(RootNode, 1, 2, {<<"type">>, <<"bucket">>}, 2),
|
|
|
|
?assertEqual(0, length(Results2)),
|
2014-09-11 21:05:26 +00:00
|
|
|
lager:info("Data looks ok.").
|
2013-02-25 21:08:34 +00:00
|
|
|
|
|
|
|
assert_using(Node, {CapabilityCategory, CapabilityName}, ExpectedCapabilityName) ->
|
|
|
|
lager:info("assert_using ~p =:= ~p", [ExpectedCapabilityName, CapabilityName]),
|
2014-09-11 21:05:26 +00:00
|
|
|
ExpectedCapabilityName =:= rt:capability(Node, {CapabilityCategory, CapabilityName}).
|
2013-02-25 21:08:34 +00:00
|
|
|
|
|
|
|
%% For some testing purposes, making these limits smaller is helpful:
|
2014-09-11 21:05:26 +00:00
|
|
|
deploy_test_nodes(false, N) ->
|
|
|
|
Config = [{riak_core, [{ring_creation_size, 8},
|
|
|
|
{handoff_acksync_threshold, 20},
|
|
|
|
{handoff_receive_timeout, 2000}]}],
|
|
|
|
rt:deploy_nodes(N, Config);
|
2013-02-25 21:08:34 +00:00
|
|
|
deploy_test_nodes(true, N) ->
|
|
|
|
lager:info("WARNING: Using turbo settings for testing."),
|
|
|
|
Config = [{riak_core, [{forced_ownership_handoff, 8},
|
2014-09-11 21:05:26 +00:00
|
|
|
{ring_creation_size, 8},
|
2013-02-25 21:08:34 +00:00
|
|
|
{handoff_concurrency, 8},
|
|
|
|
{vnode_inactivity_timeout, 1000},
|
2014-09-11 21:05:26 +00:00
|
|
|
{handoff_acksync_threshold, 20},
|
|
|
|
{handoff_receive_timeout, 2000},
|
2013-02-25 21:08:34 +00:00
|
|
|
{gossip_limit, {10000000, 60000}}]}],
|
|
|
|
rt:deploy_nodes(N, Config).
|