Merge pull request #826 from basho/bugfix/jdm/cluster-does-not-converge-race

Bugfix/jdm/cluster does not converge race
This commit is contained in:
John Daily 2015-08-12 12:38:44 -04:00
commit ac42485583
2 changed files with 64 additions and 1 deletions

View File

@ -413,7 +413,13 @@ join(Node, PNode) ->
%% @doc Have `Node' send a join request to `PNode'
staged_join(Node, PNode) ->
R = rpc:call(Node, riak_core, staged_join, [PNode]),
%% `riak_core:staged_join/1' can now return an `{error,
%% node_still_starting}' tuple which indicates retry. `wait_until'
%% isn't smart enough to retry only on that tuple, but it's good
%% enough
R = wait_until(fun() -> lager:info("Trying staged_join"),
rpc:call(Node, riak_core, staged_join,
[PNode]) == ok end),
lager:info("[join] ~p to (~p): ~p", [Node, PNode, R]),
?assertEqual(ok, R),
ok.

View File

@ -0,0 +1,57 @@
%% -------------------------------------------------------------------
%%
%% Copyright (c) 2012 Basho Technologies, Inc.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(verify_build_cluster_caps_race).
-behavior(riak_test).
-export([confirm/0]).
-include_lib("eunit/include/eunit.hrl").
-import(rt, [wait_until_nodes_ready/1,
wait_until_no_pending_changes/1]).
%% We have to define our own deploy_nodes to force a race condition
-define(HARNESS, (rt_config:get(rt_harness))).
deploy_nodes(InitialConfig) ->
NodeConfig = [{current, Config} || Config <- InitialConfig],
Nodes = ?HARNESS:deploy_nodes(NodeConfig),
lager:info("Start nodes ~p without waiting for services", [Nodes]),
Nodes.
staged_join(InitiatingNode, DestinationNode) ->
rpc:call(InitiatingNode, riak_core, staged_join,
[DestinationNode]).
confirm() ->
%% Deploy a set of new nodes
lager:info("Deploying nodes"),
%% We want riak_core to be slow to start on node 2 to verify that
%% the join will be disallowed if init is not yet complete
Configs = [
[{riak_core, []}],
[{riak_core, [{delayed_start, 20000}]}]
],
[Node1, Node2] = deploy_nodes(Configs),
lager:info("joining Node 2 to the cluster..."),
?assertMatch({error, _}, staged_join(Node2, Node1)),
pass.