Merge remote branch 'origin/riak/2.1' into merge/bch/2.1.3-to-ts

This commit is contained in:
Brett Hazen 2015-12-16 16:46:18 +00:00
commit f8e94b070a
20 changed files with 167 additions and 51 deletions

View File

@ -721,7 +721,13 @@ wait_until_no_pending_changes(Nodes) ->
rpc:multicall(Nodes, riak_core_vnode_manager, force_handoffs, []),
{Rings, BadNodes} = rpc:multicall(Nodes, riak_core_ring_manager, get_raw_ring, []),
Changes = [ riak_core_ring:pending_changes(Ring) =:= [] || {ok, Ring} <- Rings ],
BadNodes =:= [] andalso length(Changes) =:= length(Nodes) andalso lists:all(fun(T) -> T end, Changes)
case BadNodes =:= [] andalso length(Changes) =:= length(Nodes) andalso lists:all(fun(T) -> T end, Changes) of
true -> true;
false ->
NodesWithChanges = [Node || {Node, false} <- lists:zip(Nodes -- BadNodes, Changes)],
lager:info("Changes not yet complete, or bad nodes. BadNodes=~p, Nodes with Pending Changes=~p~n", [BadNodes, NodesWithChanges]),
false
end
end,
?assertEqual(ok, wait_until(F)),
ok.

View File

@ -10,7 +10,11 @@ confirm() ->
lager:info("Deploy some nodes"),
Nodes = rt:build_cluster(4, [], [
{riak_core, [{default_bucket_props,
[{n_val, 2}]}]}]),
[
{n_val, 2},
{allow_mult, true},
{dvv_enabled, true}
]}]}]),
Node = hd(Nodes),
RMD = riak_test_runner:metadata(),

View File

@ -56,7 +56,7 @@ confirm() ->
test_lose_minority_synctrees(PBC, Bucket, Key, Val, PL),
test_lose_majority_synctrees(PBC, Bucket, Key, Val, PL),
test_lose_minority_synctrees_one_node_partitioned(PBC, Bucket, Key, Val,
PL, Nodes),
PL, Nodes),
test_lose_all_data_and_trees_except_one_node(PBC, Bucket, Key, Val, PL),
{ok, _NewVal} = test_backup_restore_data_not_trees(Bucket, Key, Val, PL),
test_lose_all_data(PBC, Bucket, Key, PL),
@ -64,7 +64,12 @@ confirm() ->
pass.
config() ->
[{riak_core, [{default_bucket_props, [{n_val, 5}]},
[{riak_core, [{default_bucket_props,
[
{n_val, 5},
{allow_mult, true},
{dvv_enabled, true}
]},
{vnode_management_timer, 1000},
{ring_creation_size, 16},
{enable_consensus, true},
@ -79,7 +84,7 @@ test_lose_majority_synctrees(PBC, Bucket, Key, Val, PL) ->
assert_lose_synctrees_and_recover(PBC, Bucket, Key, Val, PL, Majority).
test_lose_minority_synctrees_one_node_partitioned(PBC, Bucket, Key, Val, PL,
Nodes) ->
Nodes) ->
Minority = minority_vnodes(PL),
{{Idx0, Node0}, primary} = hd(PL),
Ensemble = {kv, Idx0, 5},
@ -251,7 +256,7 @@ kill_peers(Ensemble, Nodes) ->
Peers = [P || P={_Id, N} <- View, lists:member(N, Nodes)],
lager:info("Killing Peers: ~p", [Peers]),
Pids = [rpc:call(Node, riak_ensemble_manager, get_peer_pid,
[Ensemble, Peer]) || Peer <- Peers],
[Ensemble, Peer]) || Peer <- Peers],
[exit(Pid, kill) || Pid <- Pids, Pid =/= undefined].
wipe_partitions(PL) ->

View File

@ -27,7 +27,12 @@
-define(RING_SIZE, 16).
config() ->
[{riak_core, [{default_bucket_props, [{n_val, 5}]},
[{riak_core, [{default_bucket_props,
[
{n_val, 5},
{allow_mult, true},
{dvv_enabled, true}
]},
{vnode_management_timer, 1000},
{ring_creation_size, ?RING_SIZE},
{enable_consensus, true},

View File

@ -58,10 +58,15 @@ fast_config(Nval, EnableAAE) when is_boolean(EnableAAE) ->
fast_config(NVal, RingSize, EnableAAE) ->
[config_aae(EnableAAE),
{riak_core, [{default_bucket_props, [{n_val, NVal}]},
{vnode_management_timer, 1000},
{ring_creation_size, RingSize},
{enable_consensus, true}]}].
{riak_core, [{default_bucket_props,
[
{n_val, NVal},
{allow_mult, true},
{dvv_enabled, true}
]},
{vnode_management_timer, 1000},
{ring_creation_size, RingSize},
{enable_consensus, true}]}].
config_aae(true) ->
{riak_kv, [{anti_entropy_build_limit, {100, 1000}},

View File

@ -13,7 +13,11 @@ confirm() ->
lager:info("Deploy some nodes"),
Nodes = rt:build_cluster(4, [], [
{riak_core, [{default_bucket_props,
[{n_val, 2}]}]}]),
[
{n_val, 2},
{allow_mult, true},
{dvv_enabled, true}
]}]}]),
Node = hd(Nodes),
RMD = riak_test_runner:metadata(),

View File

@ -30,7 +30,7 @@ confirm() ->
PrivDir = rt:priv_dir(),
Conf = [
{riak_core, [
{default_bucket_props, [{allow_mult, true}]},
{default_bucket_props, [{allow_mult, true}, {dvv_enabled, true}]},
{ssl, [
{certfile, filename:join([CertDir,
"site3.basho.com/cert.pem"])},

View File

@ -59,7 +59,12 @@ default_config(#config{
fsm_limit = FsmLimit
}) ->
[{riak_core, [{ring_creation_size, 8},
{default_bucket_props, [{n_val, 5}]},
{default_bucket_props,
[
{n_val, 5},
{allow_mult, true},
{dvv_enabled, true}
]},
{vnode_management_timer, 1000},
{enable_health_checks, false},
{enable_consensus, true},

View File

@ -53,7 +53,7 @@ confirm() ->
PrivDir = rt:priv_dir(),
Conf = [
{riak_core, [
{default_bucket_props, [{allow_mult, true}]},
{default_bucket_props, [{allow_mult, true}, {dvv_enabled, true}]},
{ssl, [
{certfile, filename:join([CertDir,"site3.basho.com/cert.pem"])},
{keyfile, filename:join([CertDir, "site3.basho.com/key.pem"])},

View File

@ -17,14 +17,14 @@
%% cleared and the model is reset. The main goal is to advance the proxy
%% into interesting new states.
%%
%% This test can be run outside of riak_test while working on it.
%% This test can be run outside of riak_test while working on it.
%% Symlink the source into a release build and run
%% c(proxy_overload_recovery).
%% c(proxy_overload_recovery).
%% proxy_overload_recovery:run(300). % Run for 5 mins
%%
%% On failure you can re-run counter examples *and* print out the internal
%% state with the run.
%% proxy_overload_recovery:check().
%% proxy_overload_recovery:check().
%%
%% TODO/Questions:
%% 1) Is there a better way to do the initialization step?
@ -137,7 +137,7 @@ prop_proxy_recovery() ->
[catch msgq_len(VPid)])
end
end,
measure(duration, Msecs,
measure(duration, Msecs,
aggregate(with_title("Commands"), command_names(Cmds),
pretty_commands(?MODULE, Cmds, {H, S, Res},
Res == ok))))
@ -173,13 +173,13 @@ precondition_common(#tstate{rt = undefined}, {call, _M, F, _A}) ->
precondition_common(_, {call, _M, F, _A}) ->
F /= prepare.
%% %% Make sure we're still running what we think we're running - uncomment
%% %% Make sure we're still running what we think we're running - uncomment
%% %% if having process death issues
%% invariant(#tstate{rt = undefined}) ->
%% true;
%% invariant(#tstate{rt = #rt{id = Index, ppid = PPid, vpid = VPid}}) ->
%% RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index),
%% PPid = whereis(RegName), % Check process we think it is.
%% PPid = whereis(RegName), % Check process we think it is.
%% true = is_process_alive(PPid),
%% true = is_process_alive(VPid),
%% true.
@ -208,13 +208,14 @@ prepare(ThresholdSeed) ->
{ok, VPid0} = riak_core_vnode_manager:get_vnode_pid(Id, riak_kv_vnode),
sys:resume(VPid0),
ok = supervisor:terminate_child(riak_core_vnode_sup, VPid0),
false = is_process_alive(VPid0),
%% Reset the proxy pid to make sure it resets state and picks up the new
%% environment variables
ok = supervisor:terminate_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}),
RegName = riak_core_vnode_proxy:reg_name(riak_kv_vnode, Index),
undefined = whereis(RegName),
VPid1 = wait_for_vnode_change(VPid0, Index),
{ok, PPid} = supervisor:restart_child(riak_core_vnode_proxy_sup, {riak_kv_vnode, Id}),
%% Find the proxy pid and check it's alive and matches the supervisor
@ -225,6 +226,7 @@ prepare(ThresholdSeed) ->
%% and return the Pid so we know we have the same Pid.
{ok, VPid} = riak_core_vnode_proxy:command_return_vnode(
{riak_kv_vnode,Index,node()}, timeout),
?assertEqual(VPid, VPid1),
true = is_process_alive(PPid),
true = is_process_alive(VPid),
@ -264,14 +266,14 @@ resume_args(#tstate{rt = RT}) ->
resume(#rt{ppid = PPid, vpid = VPid}) ->
sys:resume(VPid),
%% Use the sys:get_status call to force a synchronous call
%% against the vnode proxy to ensure all messages sent by
%% against the vnode & the proxy to ensure all messages sent by
%% this process have been serviced and there are no pending
%% 'ping's in the vnode before we continue.
%% Then drain the vnode to make sure any pending pongs have
%% been sent.
ok = drain(VPid),
%% been sent, and ensure the proxy has
_ = sys:get_status(PPid),
_ = sys:get_status(VPid),
_ = sys:get_status(PPid).
ok = drain([VPid, PPid]).
resume_next(S, _V, _A) ->
S#tstate{vnode_running = true, proxy_msgs = 0, direct_msgs = 0}.
@ -324,28 +326,28 @@ overloaded_args(#tstate{vnode_running = Running, rt = RT}) ->
overloaded(Running, #rt{ppid = PPid, vpid = VPid}) ->
case Running of
true ->
ok = drain(PPid), % make sure all proxy msgs processed/dropped
ok = drain(VPid); % make sure any pending ping/pongs are processed
ok = drain([PPid, VPid]);
_ ->
ok
end,
{riak_core_vnode_proxy:overloaded(PPid),
msgq_len(VPid), % just for debug so we can review in log output
sys:get_status(PPid)}. % ditto
{messages, PMsgs} = process_info(PPid, messages),
{messages, VMsgs} = process_info(VPid, messages),
Overloaded = riak_core_vnode_proxy:overloaded(PPid),
{Overloaded, {VMsgs, PMsgs}, sys:get_status(PPid)}.
overloaded_post(#tstate{threshold = undefined}, _A,
{R, _VnodeQ, _ProxyStatus}) ->
{R, _Messages, _ProxyStatus}) ->
%% If there are no thresholds there should never be an overload
eq(R, false);
overloaded_post(#tstate{vnode_running = true}, _A,
{R, _VnodeQ = 0, _ProxyStatus}) ->
{R, _Messages, _ProxyStatus}) ->
%% If the vnode is running, we have cleared queues so
%% should not be in overload.
eq(R, false);
overloaded_post(#tstate{vnode_running = false,
proxy_msgs = ProxyMsgs,
threshold = Threshold}, _A,
{ResultOverload, _VnodeQ, _ProxyStatus}) ->
{ResultOverload, _Messages, _ProxyStatus}) ->
%% Either
%% mailbox is completely an estimate based on proxy msgs
%% or mailbox is a check + estimate since
@ -392,16 +394,33 @@ prep_env(Var, Val) ->
%% Wait until all messages are drained by the Pid. No guarantees
%% about future messages being sent, or that responses for the
%% last message consumed have been transmitted.
%% NOTE: The "drain 3 times in a row" was determined empirically,
%% and may not be sufficient (2 was not). Given time constraints,
%% living with it for now. If this fails, we should really add some
%% tracing code around the send of messages to Vnode and Proxy to
%% determine where extra messages are coming from rather than just
%% make this "try 4 times"
%%
drain(Pid) ->
case erlang:process_info(Pid, message_queue_len) of
{message_queue_len, 0} ->
drain(Pid) when is_pid(Pid) ->
drain([Pid], {-1, -1});
drain(Pids) when is_list(Pids) ->
drain(Pids, {-1, -1}).
drain(Pids, {PrevPrev, Prev}) ->
_ = [sys:suspend(Pid) || Pid <- Pids],
Len = lists:foldl(fun(Pid, Acc0) ->
{message_queue_len, Len} = erlang:process_info(Pid, message_queue_len),
Acc0 + Len
end, 0, Pids),
_ = [sys:resume(Pid) || Pid <- Pids],
case {PrevPrev, Prev, Len} of
{0, 0, 0} ->
ok;
{message_queue_len, L} when L > 0 ->
timer:sleep(1), % give it a millisecond to drain
drain(Pid);
ER ->
ER
_ ->
%% Attempt to ensure something else is scheduled before we try to drain again
erlang:yield(),
timer:sleep(1),
drain(Pids, {Prev, Len})
end.
%% Return the length of the message queue (or crash if proc dead)
@ -448,6 +467,17 @@ add_eqc_apps(Nodes) ->
end || App <- Apps, Node <- Nodes],
ok.
wait_for_vnode_change(VPid0, Index) ->
{ok, VPid1} = riak_core_vnode_manager:get_vnode_pid(Index, riak_kv_vnode),
case VPid1 of
VPid0 ->
timer:sleep(1),
wait_for_vnode_change(VPid0, Index);
_ ->
VPid1
end.
-else. %% no EQC
-export([confirm/0]).

View File

@ -16,7 +16,12 @@
{riak_core,
[
{ring_creation_size, 8},
{default_bucket_props, [{n_val, 1}]}
{default_bucket_props,
[
{n_val, 1},
{allow_mult, true},
{dvv_enabled, true}
]}
]
},
{riak_kv,
@ -46,6 +51,8 @@ confirm() ->
pass.
simple_test() ->
lager:info("Starting simple_test"),
%% Deploy 6 nodes.
Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]),
@ -118,6 +125,8 @@ simple_test() ->
pass.
dual_test() ->
lager:info("Starting dual_test"),
%% Deploy 6 nodes.
Nodes = rt:deploy_nodes(6, ?CONF(infinity), [riak_kv, riak_repl]),
@ -218,6 +227,8 @@ dual_test() ->
pass.
bidirectional_test() ->
lager:info("Starting bidirectional_test"),
%% Deploy 6 nodes.
Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]),
@ -301,6 +312,8 @@ bidirectional_test() ->
pass.
difference_test() ->
lager:info("Starting difference_test"),
%% Deploy 6 nodes.
Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]),
@ -393,6 +406,8 @@ difference_test() ->
pass.
deadlock_test() ->
lager:info("Starting deadlock_test"),
%% Deploy 6 nodes.
Nodes = rt:deploy_nodes(6, ?CONF(5), [riak_kv, riak_repl]),

View File

@ -22,7 +22,12 @@ confirm() ->
{riak_core,
[
{ring_creation_size, 8},
{default_bucket_props, [{n_val, 1}]}
{default_bucket_props,
[
{n_val, 1},
{allow_mult, true},
{dvv_enabled, true}
]}
]
},
{riak_kv,

View File

@ -11,7 +11,12 @@
{riak_core,
[
{ring_creation_size, 8},
{default_bucket_props, [{n_val, 1}]}
{default_bucket_props,
[
{n_val, 1},
{allow_mult, true},
{dvv_enabled, true}
]}
]
},
{riak_kv,

View File

@ -13,7 +13,12 @@
{riak_core,
[
{ring_creation_size, 8},
{default_bucket_props, [{n_val, 1}]}
{default_bucket_props,
[
{n_val, 1},
{allow_mult, true},
{dvv_enabled, true}
]}
]
},
{riak_kv,

View File

@ -11,7 +11,12 @@
{riak_core,
[
{ring_creation_size, 8},
{default_bucket_props, [{n_val, ?N}]}
{default_bucket_props,
[
{n_val, ?N},
{allow_mult, true},
{dvv_enabled, true}
]}
]
},
{riak_kv,

View File

@ -62,7 +62,7 @@ confirm() ->
make_clusters() ->
Conf = [{riak_repl, [{fullsync_on_connect, false},
{fullsync_interval, disabled}]},
{riak_core, [{default_bucket_props, [{allow_mult, true}]}]}],
{riak_core, [{default_bucket_props, [{allow_mult, true}, {dvv_enabled, true}]}]}],
Nodes = rt:deploy_nodes(6, Conf, [riak_kv, riak_repl]),
{ClusterA, ClusterB} = lists:split(3, Nodes),
A = make_cluster(ClusterA, "A"),

View File

@ -60,7 +60,12 @@ confirm() ->
create_config(Backend) ->
[{riak_core, [
{default_bucket_props, [{n_val, 1}]},
{default_bucket_props,
[
{n_val, 1},
{allow_mult, true},
{dvv_enabled, true}
]},
{ring_creation_size, 8},
{handoff_acksync_threshold, 20},
{handoff_concurrency, 4},

View File

@ -304,7 +304,12 @@ config(RingSize, NVal) ->
config(RingSize, NVal, Backend) ->
[
{riak_core, [
{default_bucket_props, [{n_val, NVal}]},
{default_bucket_props,
[
{n_val, NVal},
{allow_mult, true},
{dvv_enabled, true}
]},
{vnode_management_timer, 1000},
{ring_creation_size, RingSize}]
},

View File

@ -38,7 +38,12 @@
[{riak_core,
[
{ring_creation_size, 16},
{default_bucket_props, [{n_val, ?N}]},
{default_bucket_props,
[
{n_val, ?N},
{allow_mult, true},
{dvv_enabled, true}
]},
{anti_entropy_build_limit, {100, 1000}},
{anti_entropy_concurrency, 8}
]},

View File

@ -207,6 +207,8 @@ confirm() ->
%% Upgrade
yokozuna_rt:rolling_upgrade(Cluster, current),
[rt:wait_until_ready(ANode) || ANode <- Cluster],
[rt:assert_capability(ANode, ?YZ_CAP, true) || ANode <- Cluster],
[rt:assert_supported(rt:capability(ANode, all), ?YZ_CAP, [true, false]) ||
ANode <- Cluster],