mirror of
https://github.com/valitydev/riak_test.git
synced 2024-11-06 08:35:22 +00:00
8e2b99d6e8
There were some race conditions we found in this test where reads to an ensemble may fail for a brief period of time after recovery. By using the waitfor function, we ensure that the tests will still pass as long as things eventually recover and start working properly again.
305 lines
10 KiB
Erlang
305 lines
10 KiB
Erlang
%% -------------------------------------------------------------------
|
|
%%
|
|
%% Copyright (c) 2013-2014 Basho Technologies, Inc.
|
|
%%
|
|
%% This file is provided to you under the Apache License,
|
|
%% Version 2.0 (the "License"); you may not use this file
|
|
%% except in compliance with the License. You may obtain
|
|
%% a copy of the License at
|
|
%%
|
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
%%
|
|
%% Unless required by applicable law or agreed to in writing,
|
|
%% software distributed under the License is distributed on an
|
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
%% KIND, either express or implied. See the License for the
|
|
%% specific language governing permissions and limitations
|
|
%% under the License.
|
|
%%
|
|
%% -------------------------------------------------------------------
|
|
|
|
-module(ensemble_byzantine).
|
|
-export([confirm/0]).
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
-define(HARNESS, (rt_config:get(rt_harness))).
|
|
|
|
-define(NUM_NODES, 8).
|
|
-define(NVAL, 5).
|
|
|
|
confirm() ->
|
|
NumNodes = ?NUM_NODES,
|
|
NVal = ?NVAL,
|
|
_Quorum = NVal div 2 + 1,
|
|
Config = config(),
|
|
lager:info("Building cluster and waiting for ensemble to stablize"),
|
|
Nodes = ensemble_util:build_cluster(NumNodes, Config, NVal),
|
|
vnode_util:load(Nodes),
|
|
Node = hd(Nodes),
|
|
ensemble_util:wait_until_stable(Node, NVal),
|
|
|
|
create_strong_bucket_type(Node, NVal),
|
|
|
|
Bucket = {<<"strong">>, <<"test">>},
|
|
Key = <<"test-key">>,
|
|
Val = <<"test-val">>,
|
|
|
|
{ok, PL} = get_preflist(Node, Bucket, Key, NVal),
|
|
?assertEqual(NVal, length(PL)),
|
|
lager:info("PREFERENCE LIST: ~n ~p", [PL]),
|
|
|
|
PBC = rt:pbc(Node),
|
|
|
|
normal_write_and_read(PBC, Bucket, Key, Val),
|
|
test_lose_one_node_one_partition(PBC, Bucket, Key, Val, PL),
|
|
test_lose_all_but_one_partition(PBC, Bucket, Key, Val, PL),
|
|
test_lose_minority_synctrees(PBC, Bucket, Key, Val, PL),
|
|
test_lose_majority_synctrees(PBC, Bucket, Key, Val, PL),
|
|
test_lose_minority_synctrees_one_node_partitioned(PBC, Bucket, Key, Val,
|
|
PL, Nodes),
|
|
test_lose_all_data_and_trees_except_one_node(PBC, Bucket, Key, Val, PL),
|
|
{ok, _NewVal} = test_backup_restore_data_not_trees(Bucket, Key, Val, PL),
|
|
test_lose_all_data(PBC, Bucket, Key, PL),
|
|
|
|
pass.
|
|
|
|
config() ->
|
|
[{riak_core, [{default_bucket_props, [{n_val, 5}]},
|
|
{vnode_management_timer, 1000},
|
|
{ring_creation_size, 16},
|
|
{enable_consensus, true},
|
|
{target_n_val, 8}]}].
|
|
|
|
test_lose_minority_synctrees(PBC, Bucket, Key, Val, PL) ->
|
|
Minority = minority_vnodes(PL),
|
|
assert_lose_synctrees_and_recover(PBC, Bucket, Key, Val, PL, Minority).
|
|
|
|
test_lose_majority_synctrees(PBC, Bucket, Key, Val, PL) ->
|
|
Majority= majority_vnodes(PL),
|
|
assert_lose_synctrees_and_recover(PBC, Bucket, Key, Val, PL, Majority).
|
|
|
|
test_lose_minority_synctrees_one_node_partitioned(PBC, Bucket, Key, Val, PL,
|
|
Nodes) ->
|
|
Minority = minority_vnodes(PL),
|
|
{{Idx0, Node0}, primary} = hd(PL),
|
|
Ensemble = {kv, Idx0, 5},
|
|
Rest = [Node || {{_, Node}, _} <- lists:sublist(PL, ?NVAL-1)],
|
|
|
|
%% Partition off the last node
|
|
{{_, PartitionedNode},_} = lists:nth(?NVAL, PL),
|
|
PartInfo = rt:partition([PartitionedNode], Nodes -- [PartitionedNode]),
|
|
|
|
%% Wipe a minority of nodes
|
|
[wipe_tree(Ensemble, Idx, Node) || {{Idx, Node}, _} <- Minority],
|
|
kill_peers(Ensemble, Rest),
|
|
|
|
%% With a majority of nodes down (minority reboot, 1 partitioned) we
|
|
%% shouldn't be able to reach quorum. This is because we now have a majority
|
|
%% of untrusted synctrees, and all nodes are not online.
|
|
timer:sleep(10000),
|
|
{error, <<"timeout">>} = riakc_pb_socket:get(PBC, Bucket, Key, []),
|
|
|
|
%% Heal the partition so that we can get quorum
|
|
rt:heal(PartInfo),
|
|
ensemble_util:wait_until_quorum(Node0, Ensemble),
|
|
assert_valid_read(PBC, Bucket, Key, Val).
|
|
|
|
test_lose_all_but_one_partition(PBC, Bucket, Key, Val, PL) ->
|
|
Wiped = tl(PL),
|
|
{{Idx0, Node0}, primary} = hd(PL),
|
|
Ensemble = {kv, Idx0, 5},
|
|
lager:info("Wiping Data on Following Vnodes: ~p", [Wiped]),
|
|
wipe_partitions(Wiped),
|
|
ensemble_util:wait_until_quorum(Node0, Ensemble),
|
|
assert_valid_read(PBC, Bucket, Key, Val).
|
|
|
|
test_lose_one_node_one_partition(PBC, Bucket, Key, Val, PL) ->
|
|
{{Idx0, Node0}, primary} = hd(PL),
|
|
Ensemble = {kv, Idx0, 5},
|
|
Leader = ensemble_util:get_leader_pid(Node0, Ensemble),
|
|
LeaderNode = node(Leader),
|
|
LeaderIdx = get_leader_idx(PL, LeaderNode),
|
|
lager:info("Wiping Idx ~p data on LeaderNode ~p", [LeaderIdx, LeaderNode]),
|
|
wipe_partition(LeaderIdx, LeaderNode),
|
|
ensemble_util:wait_until_quorum(LeaderNode, Ensemble),
|
|
assert_valid_read(PBC, Bucket, Key, Val).
|
|
|
|
test_lose_all_data_and_trees_except_one_node(PBC, Bucket, Key, Val, PL) ->
|
|
Wiped = tl(PL),
|
|
{{Idx0, Node0}, primary} = hd(PL),
|
|
Ensemble = {kv, Idx0, 5},
|
|
wipe_partitions(Wiped),
|
|
wipe_trees(Ensemble, Wiped),
|
|
ensemble_util:wait_until_quorum(Node0, Ensemble),
|
|
assert_valid_read(PBC, Bucket, Key, Val).
|
|
|
|
test_backup_restore_data_not_trees(Bucket, Key, _Val, PL) ->
|
|
{{Idx, Node}, primary} = hd(PL),
|
|
Ensemble = {kv, Idx, 5},
|
|
stop_nodes(PL),
|
|
backup_data(1, PL),
|
|
start_nodes(PL),
|
|
PBC = rt:pbc(Node),
|
|
ensemble_util:wait_until_quorum(Node, Ensemble),
|
|
timer:sleep(10000),
|
|
|
|
Obj0 = rt:pbc_read(PBC, Bucket, Key),
|
|
NewVal = <<"test-val2">>,
|
|
Obj = riakc_obj:update_value(Obj0, NewVal),
|
|
riakc_pb_socket:put(PBC, Obj),
|
|
assert_valid_read(PBC, Bucket, Key, NewVal),
|
|
|
|
stop_nodes(PL),
|
|
%% Backup the new data.
|
|
backup_data(2, PL),
|
|
%% Restore old data
|
|
restore_data(1, PL),
|
|
start_nodes(PL),
|
|
PBC1 = rt:pbc(Node),
|
|
ensemble_util:wait_until_quorum(Node, Ensemble),
|
|
|
|
%% Fail to read the restored data. Trees match newer data than what was
|
|
%% restored
|
|
assert_failed_read(PBC1, Bucket, Key),
|
|
stop_nodes(PL),
|
|
|
|
%% Restore New Data that matches trees
|
|
restore_data(2, PL),
|
|
start_nodes(PL),
|
|
PBC2 = rt:pbc(Node),
|
|
ensemble_util:wait_until_quorum(Node, Ensemble),
|
|
|
|
assert_valid_read(PBC2, Bucket, Key, NewVal),
|
|
{ok, NewVal}.
|
|
|
|
test_lose_all_data(PBC, Bucket, Key, PL) ->
|
|
wipe_partitions(PL),
|
|
{error, _}=E = riakc_pb_socket:get(PBC, Bucket, Key, []),
|
|
lager:info("All data loss error = ~p", [E]).
|
|
|
|
assert_valid_read(PBC, Bucket, Key, Val) ->
|
|
ReadFun = fun() ->
|
|
Obj = rt:pbc_read(PBC, Bucket, Key),
|
|
Val =:= riakc_obj:get_value(Obj)
|
|
end,
|
|
?assertEqual(ok, rt:wait_until(ReadFun)).
|
|
|
|
assert_failed_read(PBC, Bucket, Key) ->
|
|
?assertMatch({error, _}, riakc_pb_socket:get(PBC, Bucket, Key, [])).
|
|
|
|
normal_write_and_read(PBC, Bucket, Key, Val) ->
|
|
lager:info("Writing a consistent key"),
|
|
ok = rt:pbc_write(PBC, Bucket, Key, Val),
|
|
lager:info("Read key to verify it exists"),
|
|
assert_valid_read(PBC, Bucket, Key, Val).
|
|
|
|
stop_nodes(PL) ->
|
|
[rt:stop_and_wait(Node) || {{_, Node}, _} <- PL].
|
|
|
|
start_nodes(PL) ->
|
|
[rt:start_and_wait(Node) || {{_, Node}, _} <- PL].
|
|
|
|
data_path(Node) ->
|
|
?HARNESS:node_path(Node) ++ "/data/"++backend_dir().
|
|
|
|
backup_path(Node, N) ->
|
|
data_path(Node) ++ integer_to_list(N) ++ ".bak".
|
|
|
|
backup_data(N, PL) ->
|
|
[backup_node(Node, N) || {{_, Node}, _} <- PL].
|
|
|
|
backup_node(Node, N) ->
|
|
Path = data_path(Node),
|
|
BackupPath = backup_path(Node, N),
|
|
Cmd = "cp -R "++Path++" "++BackupPath,
|
|
lager:info("~p", [os:cmd(Cmd)]).
|
|
|
|
restore_data(N, PL) ->
|
|
[restore_node(Node, N) || {{_, Node}, _} <- PL].
|
|
|
|
restore_node(Node, N) ->
|
|
Path = data_path(Node),
|
|
BackupPath = backup_path(Node, N),
|
|
rm_backend_dir(Node),
|
|
Cmd = "mv "++BackupPath++" "++Path,
|
|
?assertEqual([], os:cmd(Cmd)).
|
|
|
|
assert_lose_synctrees_and_recover(PBC, Bucket, Key, Val, PL, ToLose) ->
|
|
{{Idx0, Node0}, primary} = hd(PL),
|
|
Ensemble = {kv, Idx0, 5},
|
|
[wipe_tree(Ensemble, Idx, Node) || {{Idx, Node}, _} <- ToLose],
|
|
ensemble_util:wait_until_quorum(Node0, Ensemble),
|
|
assert_valid_read(PBC, Bucket, Key, Val).
|
|
|
|
majority_vnodes(PL) ->
|
|
Num = ?NVAL div 2 + 1,
|
|
{Majority, _} = lists:split(Num, PL),
|
|
Majority.
|
|
|
|
minority_vnodes(PL) ->
|
|
Num = ?NVAL div 2,
|
|
{Minority, _} = lists:split(Num, PL),
|
|
Minority.
|
|
|
|
get_leader_idx(PL, LeaderNode) ->
|
|
[{LeaderIdx, _}] = [{Idx, N} || {{Idx, N}, _} <- PL, N =:= LeaderNode],
|
|
LeaderIdx.
|
|
|
|
kill_peers(Ensemble, Nodes) ->
|
|
Node = hd(Nodes),
|
|
{_, [View | _]} = rpc:call(Node, riak_ensemble_manager, get_views, [Ensemble]),
|
|
Peers = [P || P={_Id, N} <- View, lists:member(N, Nodes)],
|
|
lager:info("Killing Peers: ~p", [Peers]),
|
|
Pids = [rpc:call(Node, riak_ensemble_manager, get_peer_pid,
|
|
[Ensemble, Peer]) || Peer <- Peers],
|
|
[exit(Pid, kill) || Pid <- Pids, Pid =/= undefined].
|
|
|
|
wipe_partitions(PL) ->
|
|
[wipe_partition(Idx, Node) || {{Idx, Node}, _} <- PL].
|
|
|
|
wipe_trees(Ensemble, PL) ->
|
|
[wipe_tree(Ensemble, Idx, Node) || {{Idx, Node}, _} <- PL].
|
|
|
|
wipe_tree(Ensemble, Idx, Node) ->
|
|
rt:clean_data_dir([Node], "ensembles/trees/kv_"++integer_to_list(Idx)),
|
|
{_, [View | _]} = rpc:call(Node, riak_ensemble_manager, get_views, [Ensemble]),
|
|
[Peer] = [P || P={_Id, N} <- View, Node =:= N],
|
|
Pid = rpc:call(Node, riak_ensemble_manager, get_peer_pid, [Ensemble, Peer]),
|
|
lager:info("Peer= ~p, Pid = ~p", [Peer, Pid]),
|
|
exit(Pid, kill).
|
|
|
|
wipe_partition(Idx, Node) ->
|
|
rm_partition_dir(Idx, Node),
|
|
vnode_util:kill_vnode({Idx, Node}).
|
|
|
|
rm_backend_dir(Node) ->
|
|
rt:clean_data_dir([Node], backend_dir()).
|
|
|
|
rm_partition_dir(Idx, Node) ->
|
|
RelativePath = backend_dir() ++ "/" ++ integer_to_list(Idx),
|
|
rt:clean_data_dir([Node], RelativePath).
|
|
|
|
backend_dir() ->
|
|
TestMetaData = riak_test_runner:metadata(),
|
|
KVBackend = proplists:get_value(backend, TestMetaData),
|
|
backend_dir(KVBackend).
|
|
|
|
backend_dir(undefined) ->
|
|
%% riak_test defaults to bitcask when undefined
|
|
backend_dir(bitcask);
|
|
backend_dir(bitcask) ->
|
|
"bitcask";
|
|
backend_dir(eleveldb) ->
|
|
"leveldb".
|
|
|
|
get_preflist(Node, Bucket, Key, NVal) ->
|
|
DocIdx = rpc:call(Node, riak_core_util, chash_std_keyfun, [{Bucket, Key}]),
|
|
PL = rpc:call(Node, riak_core_apl, get_primary_apl, [DocIdx, NVal, riak_kv]),
|
|
{ok, PL}.
|
|
|
|
create_strong_bucket_type(Node, NVal) ->
|
|
lager:info("Creating/activating 'strong' bucket type"),
|
|
rt:create_and_activate_bucket_type(Node, <<"strong">>,
|
|
[{consistent, true}, {n_val, NVal}]),
|
|
ensemble_util:wait_until_stable(Node, NVal).
|