From 5c321fbf126e83d8ac5843ece387c8b9a1181a9d Mon Sep 17 00:00:00 2001 From: Doug Rohrer Date: Wed, 1 Jun 2016 12:28:46 -0400 Subject: [PATCH] Update jmx_verify to actually verify supervisor behavior & retry settings. Previously would fail only if the retry + delay settings for riak_test were longer than the overall test_timeout, otherwise would fail even though it shouldn't. --- src/rt.erl | 14 +++++++++++--- tests/jmx_verify.erl | 36 ++++++++++++------------------------ 2 files changed, 23 insertions(+), 27 deletions(-) diff --git a/src/rt.erl b/src/rt.erl index 1b2c3e0c..1580733f 100644 --- a/src/rt.erl +++ b/src/rt.erl @@ -70,6 +70,7 @@ get_ip/1, get_node_logs/0, get_replica/5, + get_retry_settings/0, get_ring/1, get_version/0, get_version/1, @@ -658,10 +659,14 @@ is_ring_ready(Node) -> %% provided `rt_max_wait_time' and `rt_retry_delay' parameters in %% specified `riak_test' config file. wait_until(Fun) when is_function(Fun) -> + {Delay, Retry} = get_retry_settings(), + wait_until(Fun, Retry, Delay). + +get_retry_settings() -> MaxTime = rt_config:get(rt_max_wait_time), Delay = rt_config:get(rt_retry_delay), Retry = MaxTime div Delay, - wait_until(Fun, Retry, Delay). + {Delay, Retry}. %% @doc Convenience wrapper for wait_until for the myriad functions that %% take a node as single argument. @@ -1953,8 +1958,11 @@ setup_log_capture(Nodes) when is_list(Nodes) -> setup_log_capture(Node) when not is_list(Node) -> setup_log_capture([Node]). - expect_in_log(Node, Pattern) -> + {Delay, Retry} = get_retry_settings(), + expect_in_log(Node, Pattern, Retry, Delay). + +expect_in_log(Node, Pattern, Retry, Delay) -> CheckLogFun = fun() -> Logs = rpc:call(Node, riak_test_lager_backend, get_logs, []), lager:info("looking for pattern ~s in logs for ~p", @@ -1968,7 +1976,7 @@ expect_in_log(Node, Pattern) -> false end end, - case rt:wait_until(CheckLogFun) of + case rt:wait_until(CheckLogFun, Retry, Delay) of ok -> true; _ -> diff --git a/tests/jmx_verify.erl b/tests/jmx_verify.erl index 73129562..b8b081be 100644 --- a/tests/jmx_verify.erl +++ b/tests/jmx_verify.erl @@ -118,7 +118,7 @@ confirm() -> pass. test_supervision() -> - JMXPort = 41111, + JMXPort = 22, Config = [{riak_jmx, [{enabled, true}, {port, JMXPort}]}], [Node|[]] = rt:deploy_nodes(1, Config), timer:sleep(20000), @@ -144,30 +144,18 @@ test_supervision() -> rpc:call(Node, riak_jmx, start, []), lager:info("It can fail, it can fail 10 times"), - - rt:wait_until(retry_check_fun(Node)), + %% NOTE: 10 times comes from riak_jmx_monitor.erl's MAX_RETRY macro (10). + %% Error logging is 0-based, so look for Retry #9 + {Delay, _Retry} = rt:get_retry_settings(), + TwoMinutes = 2*60*1000, + TwoMinutsOfRetry = TwoMinutes div Delay, + ?assertEqual(true, rt:expect_in_log(Node, "JMX server monitor .* exited with code .*\. Retry #9", + TwoMinutsOfRetry, Delay)), + ?assertEqual(true, rt:expect_in_log(Node, "JMX server monitor .* exited with code .*\. Reached maximum retries of 10", + TwoMinutsOfRetry, Delay)), rt:stop(Node), ok_ok. -retry_check_fun(Node) -> - fun() -> - Logs = rpc:call(Node, riak_test_lager_backend, get_logs, []), - 10 =:= lists:foldl(log_fold_fun(), 0, Logs) - end. - -log_fold_fun() -> - fun(Log, Sum) -> - try case re:run(Log, "JMX server monitor .* exited with code .*\. Retry #.*", []) of - {match, _} -> 1 + Sum; - _ -> Sum - end - catch - Err:Reason -> - lager:error("jmx supervision re:run failed w/ ~p: ~p", [Err, Reason]), - Sum - end - end. - test_application_stop() -> lager:info("Testing application:stop()"), JMXPort = 41111, @@ -178,7 +166,7 @@ test_application_stop() -> %% Let's make sure the java process is alive! lager:info("checking for riak_jmx.jar running."), - rt:wait_until(Node, fun(_N) -> + ?assertEqual(ok, rt:wait_until(Node, fun(_N) -> try case re:run(rpc:call(Node, os, cmd, ["ps -Af"]), "riak_jmx.jar", []) of nomatch -> false; _ -> true @@ -188,7 +176,7 @@ test_application_stop() -> lager:error("jmx stop re:run failed w/ ~p: ~p", [Err, Reason]), false end - end), + end)), rpc:call(Node, riak_jmx, stop, ["Stopping riak_jmx"]), timer:sleep(20000),