From 0d341f72105ae82e1b4e8c91016f16d8bd623cdb Mon Sep 17 00:00:00 2001 From: Nick Marino Date: Wed, 14 Sep 2016 16:07:51 -0400 Subject: [PATCH] Always ensure nodes are ready before continuing The try_node_ready function would previously get called after the first execution of plan_and_commit returned, and if it failed it would call plan_and_commit again. However, once the second plan_and_commit call finishes, try_node_ready would not get called again. This commit changes this behavior so that try_node_ready is always called after plan_and_commit succeeds. --- src/rt.erl | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 661e890c..a35da5a5 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -467,38 +467,38 @@ staged_join(Node, PNode) -> ?assertEqual(ok, join_with_retry(Fun)), ok. -plan_and_commit(Node) -> +plan_and_commit(Node, AllNodes) -> timer:sleep(500), lager:info("planning cluster join"), case rpc:call(Node, riak_core_claimant, plan, []) of {error, ring_not_ready} -> lager:info("plan: ring not ready"), timer:sleep(100), - plan_and_commit(Node); + plan_and_commit(Node, AllNodes); {ok, _, _} -> lager:info("plan: done"), - do_commit(Node) + do_commit(Node, AllNodes) end. -do_commit(Node) -> +do_commit(Node, AllNodes) -> lager:info("planning cluster commit"), case rpc:call(Node, riak_core_claimant, commit, []) of {error, plan_changed} -> lager:info("commit: plan changed"), timer:sleep(100), maybe_wait_for_changes(Node), - plan_and_commit(Node); + plan_and_commit(Node, AllNodes); {error, ring_not_ready} -> lager:info("commit: ring not ready"), timer:sleep(100), maybe_wait_for_changes(Node), - do_commit(Node); + do_commit(Node, AllNodes); {error, nothing_planned} -> %% Assume plan actually committed somehow lager:info("commit: nothing planned"), ok; ok -> - ok + try_nodes_ready(AllNodes) end. maybe_wait_for_changes(Node) -> @@ -1187,8 +1187,7 @@ join_cluster(Nodes) -> %% ok do a staged join and then commit it, this eliminates the %% large amount of redundant handoff done in a sequential join [staged_join(Node, Node1) || Node <- OtherNodes], - plan_and_commit(Node1), - try_nodes_ready(Nodes) + ?assertEqual(ok, wait_until(fun() -> ok == plan_and_commit(Node1, Nodes) end)) end, ?assertEqual(ok, wait_until_nodes_ready(Nodes)), @@ -1216,9 +1215,9 @@ product(Node) -> try_nodes_ready(Nodes) -> try_nodes_ready(Nodes, 10, 500). -try_nodes_ready([Node1 | _Nodes], 0, _SleepMs) -> - lager:info("Nodes not ready after initial plan/commit, retrying"), - plan_and_commit(Node1); +try_nodes_ready(_Nodes, 0, _SleepMs) -> + lager:info("Nodes not ready after plan/commit, retrying"), + not_ready; try_nodes_ready(Nodes, N, SleepMs) -> ReadyNodes = [Node || Node <- Nodes, is_ready(Node) =:= true], case ReadyNodes of