2015-03-26 22:00:53 +00:00
|
|
|
%% -------------------------------------------------------------------
|
|
|
|
%%
|
2015-07-21 01:23:22 +00:00
|
|
|
%% Copyright (c) 2015 Basho Technologies, Inc.
|
2015-03-26 22:00:53 +00:00
|
|
|
%%
|
|
|
|
%% This file is provided to you under the Apache License,
|
|
|
|
%% Version 2.0 (the "License"); you may not use this file
|
|
|
|
%% except in compliance with the License. You may obtain
|
|
|
|
%% a copy of the License at
|
|
|
|
%%
|
|
|
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
%%
|
|
|
|
%% Unless required by applicable law or agreed to in writing,
|
|
|
|
%% software distributed under the License is distributed on an
|
|
|
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
%% KIND, either express or implied. See the License for the
|
|
|
|
%% specific language governing permissions and limitations
|
|
|
|
%% under the License.
|
|
|
|
%%
|
|
|
|
%% -------------------------------------------------------------------
|
2015-03-28 00:38:47 +00:00
|
|
|
-module(verify_handoff_write_once).
|
2015-03-26 22:00:53 +00:00
|
|
|
-behavior(riak_test).
|
|
|
|
-export([confirm/0]).
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
2015-03-28 00:38:47 +00:00
|
|
|
-define(BUCKET_TYPE, <<"write_once">>).
|
2015-07-07 20:45:05 +00:00
|
|
|
-define(BUCKET, {?BUCKET_TYPE, <<"write_once">>}).
|
2015-03-26 22:00:53 +00:00
|
|
|
|
|
|
|
|
2015-07-23 18:18:25 +00:00
|
|
|
%% @doc This test will run a handoff in the case of write_once buckets, verifying
|
|
|
|
%% that write-once entries are properly handed off as part of ownership handoff,
|
|
|
|
%% but more importantly, that riak_kv_vnode properly handles data being written into
|
|
|
|
%% riak while ownership handoff is taking place.
|
|
|
|
%%
|
|
|
|
%% This test will create two nodes each with a ring size of 8, and populate one node
|
|
|
|
%% with 1k entries. It will then join the two nodes to make a cluster of size 2, which
|
|
|
|
%% will result in ownership handoff of four of the nodes (in each direction).
|
|
|
|
%%
|
|
|
|
%% We have intercepted the riak_kv_worker, which handles handoff for an individual vnode,
|
|
|
|
%% to ensure what we can send data through Riak while the cluster is in the handoff state,
|
|
|
|
%% thus ensuring that the riak_kv_vnode:handle_handoff_command callback is exercised in
|
|
|
|
%% the case of write_once buckets.
|
|
|
|
%%
|
|
|
|
%% We install intercepts at key points in the vnode to measure how many time various key
|
|
|
|
%% parts of the code are called.
|
|
|
|
%%
|
|
|
|
%% We run the above test twice, once in the case where we are doing asynchronous writes on the
|
|
|
|
%% back end, and once when we are using synchronous writes. Currently, this is toggled via
|
|
|
|
%% the use of a back end that can support async writes (currently, only leveldb)
|
|
|
|
%%
|
2015-07-21 01:23:22 +00:00
|
|
|
confirm() ->
|
2015-03-26 22:00:53 +00:00
|
|
|
|
2015-07-21 01:23:22 +00:00
|
|
|
AsyncConfig = create_config(riak_kv_eleveldb_backend),
|
|
|
|
AsyncCluster = run_test(AsyncConfig, true),
|
2015-03-26 22:00:53 +00:00
|
|
|
|
2015-07-21 01:23:22 +00:00
|
|
|
rt:clean_cluster(AsyncCluster),
|
2015-03-26 22:00:53 +00:00
|
|
|
|
2015-07-21 01:23:22 +00:00
|
|
|
SyncConfig = create_config(riak_kv_memory_backend),
|
|
|
|
_SyncCluster = run_test(SyncConfig, false),
|
2015-07-02 15:48:20 +00:00
|
|
|
|
2015-07-21 01:23:22 +00:00
|
|
|
pass.
|
2015-07-02 15:48:20 +00:00
|
|
|
|
2015-07-21 01:23:22 +00:00
|
|
|
create_config(Backend) ->
|
|
|
|
[{riak_core, [
|
2015-11-24 18:38:25 +00:00
|
|
|
{default_bucket_props,
|
|
|
|
[
|
|
|
|
{n_val, 1},
|
|
|
|
{allow_mult, true},
|
|
|
|
{dvv_enabled, true}
|
|
|
|
]},
|
2015-07-21 01:23:22 +00:00
|
|
|
{ring_creation_size, 8},
|
|
|
|
{handoff_acksync_threshold, 20},
|
|
|
|
{handoff_concurrency, 4},
|
|
|
|
{handoff_receive_timeout, 2000},
|
|
|
|
{vnode_management_timer, 100}]},
|
|
|
|
{riak_kv, [
|
|
|
|
{storage_backend, Backend}]}
|
|
|
|
].
|
|
|
|
|
|
|
|
run_test(Config, AsyncWrites) ->
|
|
|
|
%%
|
|
|
|
%% Deploy 2 nodes based on config. Wait for K/V to start on each node.
|
|
|
|
%%
|
|
|
|
lager:info("Deploying 2 nodes..."),
|
2015-07-24 15:07:15 +00:00
|
|
|
Cluster = [RootNode, NewNode] = rt:deploy_nodes(2, Config),
|
2015-07-21 01:23:22 +00:00
|
|
|
[rt:wait_for_service(Node, riak_kv) || Node <- [RootNode, NewNode]],
|
|
|
|
%%
|
|
|
|
%% Set up the intercepts
|
|
|
|
%%
|
|
|
|
lager:info("Setting up intercepts..."),
|
2015-07-02 15:48:20 +00:00
|
|
|
make_intercepts_tab(RootNode),
|
2015-07-16 21:03:37 +00:00
|
|
|
% This intercept will tell the backround process (below) to send an event for each
|
|
|
|
% vnode that is being handed off (there will be 4 such vnodes, in this test case)
|
2015-07-07 20:45:05 +00:00
|
|
|
rt_intercept:add(
|
2015-07-16 21:03:37 +00:00
|
|
|
RootNode, {riak_kv_worker, [{{handle_work, 3}, handle_work_intercept}]}
|
2015-07-07 20:45:05 +00:00
|
|
|
),
|
|
|
|
rt_intercept:add(
|
2015-07-21 01:23:22 +00:00
|
|
|
RootNode, {riak_kv_vnode, [
|
|
|
|
%% Count everytime riak_kv_vnode:handle_handoff_command/3 is called with a write_once message
|
|
|
|
{{handle_handoff_command, 3}, count_handoff_w1c_puts},
|
2015-07-24 16:12:31 +00:00
|
|
|
%% Count everytime riak_kv_vnode:handle_command/3 is called with a write_once message
|
2015-07-21 01:23:22 +00:00
|
|
|
{{handle_command, 3}, count_w1c_handle_command}
|
|
|
|
]}
|
2015-07-07 20:45:05 +00:00
|
|
|
),
|
2015-07-21 01:23:22 +00:00
|
|
|
true = rpc:call(RootNode, ets, insert, [intercepts_tab, {w1c_async_replies, 0}]),
|
|
|
|
true = rpc:call(RootNode, ets, insert, [intercepts_tab, {w1c_sync_replies, 0}]),
|
2015-07-02 15:48:20 +00:00
|
|
|
true = rpc:call(RootNode, ets, insert, [intercepts_tab, {w1c_put_counter, 0}]),
|
2015-07-21 01:23:22 +00:00
|
|
|
%%
|
|
|
|
%% Seed the root node with some data
|
|
|
|
%%
|
|
|
|
lager:info("Populating root node..."),
|
|
|
|
rt:create_and_activate_bucket_type(RootNode, ?BUCKET_TYPE, [{write_once, true}, {n_val, 1}]),
|
2015-07-24 15:07:15 +00:00
|
|
|
NTestItems = 100,
|
2015-07-23 16:25:35 +00:00
|
|
|
RingSize = proplists:get_value(ring_creation_size, proplists:get_value(riak_core, Config)),
|
2015-07-23 21:47:30 +00:00
|
|
|
[] = rt:systest_write(RootNode, 1, NTestItems, ?BUCKET, 1),
|
2015-07-21 01:23:22 +00:00
|
|
|
%%
|
|
|
|
%% Start an asynchronous proc which will send puts into riak during handoff.
|
|
|
|
%%
|
|
|
|
lager:info("Joining new node with cluster..."),
|
2015-07-23 21:47:30 +00:00
|
|
|
start_proc(RootNode, NTestItems, RingSize div 2),
|
2015-07-02 15:48:20 +00:00
|
|
|
rt:join(NewNode, RootNode),
|
2015-07-24 15:07:15 +00:00
|
|
|
TotalSent = wait_until_async_writes_complete(),
|
|
|
|
?assertMatch(ok, rt:wait_until_nodes_ready(Cluster)),
|
2015-07-28 15:03:45 +00:00
|
|
|
rt:wait_until_bucket_type_visible(Cluster, ?BUCKET_TYPE),
|
2015-07-21 01:23:22 +00:00
|
|
|
rt:wait_until_no_pending_changes(Cluster),
|
|
|
|
rt:wait_until_transfers_complete(Cluster),
|
|
|
|
%%
|
|
|
|
%% Verify the results
|
|
|
|
%%
|
2015-07-07 20:45:05 +00:00
|
|
|
lager:info("Validating data after handoff..."),
|
|
|
|
Results2 = rt:systest_read(NewNode, 1, TotalSent, ?BUCKET, 1),
|
2015-07-24 15:07:15 +00:00
|
|
|
?assertMatch([], Results2),
|
2015-07-21 01:23:22 +00:00
|
|
|
lager:info("Read ~p entries.", [TotalSent]),
|
2015-07-02 15:48:20 +00:00
|
|
|
[{_, Count}] = rpc:call(RootNode, ets, lookup, [intercepts_tab, w1c_put_counter]),
|
2015-07-23 16:25:35 +00:00
|
|
|
?assertEqual(RingSize div 2, Count),
|
2015-07-21 01:23:22 +00:00
|
|
|
lager:info("We handled ~p write_once puts during handoff.", [Count]),
|
|
|
|
[{_, W1CAsyncReplies}] = rpc:call(RootNode, ets, lookup, [intercepts_tab, w1c_async_replies]),
|
|
|
|
[{_, W1CSyncReplies}] = rpc:call(RootNode, ets, lookup, [intercepts_tab, w1c_sync_replies]),
|
|
|
|
case AsyncWrites of
|
|
|
|
true ->
|
2015-07-24 15:07:15 +00:00
|
|
|
?assertEqual(NTestItems + RingSize div 2, W1CAsyncReplies),
|
2015-07-21 01:23:22 +00:00
|
|
|
?assertEqual(0, W1CSyncReplies);
|
|
|
|
false ->
|
2015-07-23 16:25:35 +00:00
|
|
|
?assertEqual(0, W1CAsyncReplies),
|
2015-07-24 15:07:15 +00:00
|
|
|
?assertEqual(NTestItems + RingSize div 2, W1CSyncReplies)
|
2015-07-21 01:23:22 +00:00
|
|
|
end,
|
|
|
|
Cluster.
|
|
|
|
|
2015-03-26 22:00:53 +00:00
|
|
|
make_intercepts_tab(Node) ->
|
|
|
|
SupPid = rpc:call(Node, erlang, whereis, [sasl_safe_sup]),
|
2015-07-02 15:48:20 +00:00
|
|
|
intercepts_tab = rpc:call(Node, ets, new, [intercepts_tab, [named_table,
|
|
|
|
public, set, {heir, SupPid, {}}]]).
|
2015-07-07 20:45:05 +00:00
|
|
|
|
|
|
|
|
2015-07-23 18:18:25 +00:00
|
|
|
%%
|
|
|
|
%% Notes on the background process and corresponding intercepts.
|
|
|
|
%%
|
|
|
|
%% The code below is used to spawn a background process that is globally
|
|
|
|
%% registered with the name rt_ho_w1c_proc. This process will
|
|
|
|
%% wait for a message from the riak_kv_worker handle_work intercept,
|
|
|
|
%% telling this proc to write a message into Riak. The timing of the
|
|
|
|
%% intercept is such that the write is guaranteed to take place while
|
|
|
|
%% handoff is in progress, but before the vnode has been told to finish.
|
|
|
|
%% Sending this message will trigger this background process to do a
|
|
|
|
%% write into Riak, which in turn will force the vnode's
|
|
|
|
%% handle_handoff_command to be called.
|
|
|
|
%%
|
|
|
|
|
2015-07-07 20:45:05 +00:00
|
|
|
-record(state, {
|
2015-07-24 15:07:15 +00:00
|
|
|
node, sender, k, pids=[], expected, init=true
|
2015-07-07 20:45:05 +00:00
|
|
|
}).
|
|
|
|
|
2015-07-23 21:47:30 +00:00
|
|
|
start_proc(Node, NTestItems, Expected) ->
|
2015-07-07 20:45:05 +00:00
|
|
|
Self = self(),
|
2015-07-24 15:07:15 +00:00
|
|
|
Pid = spawn_link(fun() -> loop(#state{node=Node, sender=Self, k=NTestItems, expected=Expected}) end),
|
|
|
|
global:register_name(rt_ho_w1c_proc, Pid),
|
|
|
|
receive ok -> ok end.
|
2015-07-07 20:45:05 +00:00
|
|
|
|
2015-07-24 15:07:15 +00:00
|
|
|
loop(#state{node=Node, sender=Sender, k=K, pids=Pids, expected=Expected, init=Init} = State) ->
|
|
|
|
case Init of
|
|
|
|
true ->
|
|
|
|
Sender ! ok;
|
|
|
|
_ -> ok
|
|
|
|
end,
|
2015-07-17 13:04:17 +00:00
|
|
|
receive
|
|
|
|
{write, Pid} ->
|
2015-07-23 21:47:30 +00:00
|
|
|
ThePids = [Pid | Pids],
|
2015-07-24 15:07:15 +00:00
|
|
|
NumPids = length(ThePids),
|
|
|
|
case NumPids of
|
2015-07-23 21:47:30 +00:00
|
|
|
Expected ->
|
2015-07-24 15:07:15 +00:00
|
|
|
%%
|
|
|
|
%% The number of expected vnodes are now in the handoff state. Do some writes, and send ok's
|
|
|
|
%% back to the waiting vnodes. Once they get the ok back, they will complete handoff. At this
|
|
|
|
%% point, we are done, so we can tell the test to proceed and wait for handoff to complete.
|
|
|
|
%%
|
|
|
|
[] = rt:systest_write(Node, K + 1, K + Expected, ?BUCKET, 1),
|
|
|
|
lager:info(
|
|
|
|
"Asynchronously wrote entries [~p..~p] during handoff. Sending ok's back to ~p waiting vnode(s)...",
|
|
|
|
[K + 1, K + Expected, NumPids]
|
|
|
|
),
|
|
|
|
[ThePid ! ok || ThePid <- ThePids],
|
|
|
|
Sender ! (K + Expected);
|
|
|
|
_ ->
|
|
|
|
loop(State#state{pids=ThePids, init=false})
|
|
|
|
end
|
2015-07-07 20:45:05 +00:00
|
|
|
end.
|
|
|
|
|
2015-07-24 15:07:15 +00:00
|
|
|
|
|
|
|
wait_until_async_writes_complete() ->
|
2015-07-07 20:45:05 +00:00
|
|
|
receive
|
|
|
|
K -> K
|
2015-07-24 15:07:15 +00:00
|
|
|
after 60000 ->
|
|
|
|
throw("Timed out after 60s waiting for async writes to complete.")
|
|
|
|
end.
|