2013-01-22 19:26:17 +00:00
|
|
|
%% -------------------------------------------------------------------
|
|
|
|
%%
|
|
|
|
%% Copyright (c) 2012 Basho Technologies, Inc.
|
|
|
|
%%
|
|
|
|
%% This file is provided to you under the Apache License,
|
|
|
|
%% Version 2.0 (the "License"); you may not use this file
|
|
|
|
%% except in compliance with the License. You may obtain
|
|
|
|
%% a copy of the License at
|
|
|
|
%%
|
|
|
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
%%
|
|
|
|
%% Unless required by applicable law or agreed to in writing,
|
|
|
|
%% software distributed under the License is distributed on an
|
|
|
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
|
|
%% KIND, either express or implied. See the License for the
|
|
|
|
%% specific language governing permissions and limitations
|
|
|
|
%% under the License.
|
|
|
|
%%
|
|
|
|
%% -------------------------------------------------------------------
|
2015-03-31 18:21:25 +00:00
|
|
|
|
|
|
|
%% @doc Tests if the java VM can be started and polled. A config option
|
|
|
|
%% 'jmx_verify_wait' (or JMX_VERIFY_WAIT if set via os env) delays the
|
|
|
|
%% initial check of the jvm by that many milliseconds. It defaults to
|
|
|
|
%% 3000, though most faster machines can set it to 0.
|
2013-01-22 19:26:17 +00:00
|
|
|
-module(jmx_verify).
|
|
|
|
-behavior(riak_test).
|
2013-05-07 15:52:59 +00:00
|
|
|
-export([confirm/0, test_supervision/0]).
|
2013-01-22 19:26:17 +00:00
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
|
|
-prereq("java").
|
|
|
|
|
|
|
|
%% You should have curl installed locally to do this.
|
|
|
|
confirm() ->
|
2013-05-01 14:09:04 +00:00
|
|
|
test_supervision(),
|
|
|
|
|
2013-05-09 16:41:51 +00:00
|
|
|
test_application_stop(),
|
|
|
|
|
2013-01-22 19:26:17 +00:00
|
|
|
JMXPort = 41111,
|
|
|
|
Config = [{riak_jmx, [{enabled, true}, {port, JMXPort}]}],
|
|
|
|
Nodes = rt:deploy_nodes(1, Config),
|
|
|
|
[Node1] = Nodes,
|
|
|
|
?assertEqual(ok, rt:wait_until_nodes_ready([Node1])),
|
|
|
|
|
2013-09-11 17:31:49 +00:00
|
|
|
[{http, {IP, _Port}}|_] = rt:connection_info(Node1),
|
2013-01-22 19:26:17 +00:00
|
|
|
|
|
|
|
JMXDumpCmd = jmx_dump_cmd(IP, JMXPort),
|
|
|
|
|
2015-03-31 18:21:25 +00:00
|
|
|
WaitTimeout = case rt_config:config_or_os_env(jmx_verify_wait, 3000) of
|
|
|
|
N when is_integer(N) ->
|
|
|
|
N;
|
|
|
|
N when is_list(N) ->
|
|
|
|
list_to_integer(N)
|
|
|
|
end,
|
|
|
|
timer:sleep(WaitTimeout),
|
2013-01-22 19:26:17 +00:00
|
|
|
JMX1 = jmx_dump(JMXDumpCmd),
|
|
|
|
|
|
|
|
%% make sure a set of stats have valid values
|
|
|
|
verify_nz(JMX1, [<<"cpu_nprocs">>,
|
|
|
|
<<"mem_total">>,
|
|
|
|
<<"mem_allocated">>,
|
|
|
|
<<"ring_creation_size">>]),
|
|
|
|
|
|
|
|
lager:info("perform 5 x PUT and a GET to increment the stats"),
|
|
|
|
lager:info("as the stat system only does calcs for > 5 readings"),
|
2014-06-11 17:26:45 +00:00
|
|
|
|
2013-01-22 19:26:17 +00:00
|
|
|
C = rt:httpc(Node1),
|
|
|
|
[rt:httpc_write(C, <<"systest">>, <<X>>, <<"12345">>) || X <- lists:seq(1, 5)],
|
|
|
|
[rt:httpc_read(C, <<"systest">>, <<X>>) || X <- lists:seq(1, 5)],
|
|
|
|
|
|
|
|
JMX2 = jmx_dump(JMXDumpCmd),
|
|
|
|
%% make sure the stats that were supposed to increment did
|
|
|
|
verify_inc(JMX1, JMX2, [{<<"node_gets">>, 10},
|
|
|
|
{<<"node_puts">>, 5},
|
|
|
|
{<<"node_gets_total">>, 10},
|
|
|
|
{<<"node_puts_total">>, 5},
|
|
|
|
{<<"vnode_gets">>, 30},
|
|
|
|
{<<"vnode_puts">>, 15},
|
|
|
|
{<<"vnode_gets_total">>, 30},
|
|
|
|
{<<"vnode_puts_total">>, 15}]),
|
|
|
|
|
|
|
|
%% verify that fsm times were tallied
|
|
|
|
verify_nz(JMX2, [<<"node_get_fsm_time_mean">>,
|
|
|
|
<<"node_get_fsm_time_median">>,
|
|
|
|
<<"node_get_fsm_time_95">>,
|
|
|
|
<<"node_get_fsm_time_99">>,
|
|
|
|
<<"node_get_fsm_time_100">>,
|
|
|
|
<<"node_put_fsm_time_mean">>,
|
|
|
|
<<"node_put_fsm_time_median">>,
|
|
|
|
<<"node_put_fsm_time_95">>,
|
|
|
|
<<"node_put_fsm_time_99">>,
|
|
|
|
<<"node_put_fsm_time_100">>]),
|
|
|
|
|
|
|
|
lager:info("Make PBC Connection"),
|
|
|
|
Pid = rt:pbc(Node1),
|
|
|
|
|
|
|
|
JMX3 = jmx_dump(JMXDumpCmd),
|
|
|
|
rt:systest_write(Node1, 1),
|
|
|
|
%% make sure the stats that were supposed to increment did
|
|
|
|
verify_inc(JMX2, JMX3, [{<<"pbc_connects_total">>, 1},
|
|
|
|
{<<"pbc_connects">>, 1},
|
|
|
|
{<<"pbc_active">>, 1}]),
|
|
|
|
|
|
|
|
lager:info("Force Read Repair"),
|
|
|
|
rt:pbc_write(Pid, <<"testbucket">>, <<"1">>, <<"blah!">>),
|
|
|
|
rt:pbc_set_bucket_prop(Pid, <<"testbucket">>, [{n_val, 4}]),
|
|
|
|
|
|
|
|
JMX4 = jmx_dump(JMXDumpCmd),
|
|
|
|
|
|
|
|
verify_inc(JMX3, JMX4, [{<<"read_repairs_total">>, 0},
|
|
|
|
{<<"read_repairs">>, 0}]),
|
|
|
|
|
|
|
|
_Value = rt:pbc_read(Pid, <<"testbucket">>, <<"1">>),
|
|
|
|
|
|
|
|
%%Stats5 = get_stats(Node1),
|
|
|
|
JMX5 = jmx_dump(JMXDumpCmd),
|
|
|
|
verify_inc(JMX3, JMX5, [{<<"read_repairs_total">>, 1},
|
|
|
|
{<<"read_repairs">>, 1}]),
|
|
|
|
pass.
|
|
|
|
|
2013-05-01 14:09:04 +00:00
|
|
|
test_supervision() ->
|
2016-06-01 20:38:25 +00:00
|
|
|
%% Specifically invalid port number to make JMX startup fail.
|
|
|
|
%% NOTE: using a priveleged port doesn't work in some test environments
|
|
|
|
%% as tests/riak are running as root. For this test, -1 is fine.
|
|
|
|
JMXPort = -1,
|
2013-05-01 14:09:04 +00:00
|
|
|
Config = [{riak_jmx, [{enabled, true}, {port, JMXPort}]}],
|
|
|
|
[Node|[]] = rt:deploy_nodes(1, Config),
|
|
|
|
timer:sleep(20000),
|
|
|
|
case net_adm:ping(Node) of
|
|
|
|
pang ->
|
|
|
|
lager:error("riak_jmx crash able to crash riak node"),
|
|
|
|
?assertEqual("riak_jmx crash able to crash riak node", true);
|
|
|
|
_ ->
|
2013-05-10 17:02:19 +00:00
|
|
|
yay
|
2013-05-10 14:48:35 +00:00
|
|
|
end,
|
2013-05-10 17:02:19 +00:00
|
|
|
|
|
|
|
%% Let's make sure the thing's restarting as planned
|
|
|
|
lager:info("calling riak_jmx:stop() to reset retry counters"),
|
|
|
|
rpc:call(Node, riak_jmx, stop, ["stopping for test purposes"]),
|
|
|
|
|
|
|
|
lager:info("loading lager backend on node"),
|
|
|
|
rt:load_modules_on_nodes([riak_test_lager_backend], [Node]),
|
|
|
|
ok = rpc:call(Node, gen_event, add_handler, [lager_event, riak_test_lager_backend, [info, false]]),
|
|
|
|
ok = rpc:call(Node, lager, set_loglevel, [riak_test_lager_backend, info]),
|
2014-06-11 17:26:45 +00:00
|
|
|
|
2013-05-10 17:02:19 +00:00
|
|
|
lager:info("Now we're capturing logs on the node, let's start jmx"),
|
|
|
|
lager:info("calling riak_jmx:start() to get these retries started"),
|
|
|
|
rpc:call(Node, riak_jmx, start, []),
|
|
|
|
|
|
|
|
lager:info("It can fail, it can fail 10 times"),
|
2016-06-01 16:28:46 +00:00
|
|
|
%% NOTE: 10 times comes from riak_jmx_monitor.erl's MAX_RETRY macro (10).
|
|
|
|
%% Error logging is 0-based, so look for Retry #9
|
|
|
|
{Delay, _Retry} = rt:get_retry_settings(),
|
|
|
|
TwoMinutes = 2*60*1000,
|
2016-06-01 20:38:25 +00:00
|
|
|
Retries = TwoMinutes div Delay,
|
2016-06-01 16:28:46 +00:00
|
|
|
?assertEqual(true, rt:expect_in_log(Node, "JMX server monitor .* exited with code .*\. Retry #9",
|
2016-06-01 20:38:25 +00:00
|
|
|
Retries, Delay)),
|
2016-06-01 16:28:46 +00:00
|
|
|
?assertEqual(true, rt:expect_in_log(Node, "JMX server monitor .* exited with code .*\. Reached maximum retries of 10",
|
2016-06-01 20:38:25 +00:00
|
|
|
Retries, Delay)),
|
2014-06-11 17:26:45 +00:00
|
|
|
rt:stop(Node),
|
|
|
|
ok_ok.
|
|
|
|
|
2013-05-09 16:41:51 +00:00
|
|
|
test_application_stop() ->
|
|
|
|
lager:info("Testing application:stop()"),
|
|
|
|
JMXPort = 41111,
|
|
|
|
Config = [{riak_jmx, [{enabled, true}, {port, JMXPort}]}],
|
|
|
|
Nodes = rt:deploy_nodes(1, Config),
|
|
|
|
[Node] = Nodes,
|
|
|
|
?assertEqual(ok, rt:wait_until_nodes_ready([Node])),
|
|
|
|
|
|
|
|
%% Let's make sure the java process is alive!
|
|
|
|
lager:info("checking for riak_jmx.jar running."),
|
2016-06-01 16:28:46 +00:00
|
|
|
?assertEqual(ok, rt:wait_until(Node, fun(_N) ->
|
2013-06-20 18:49:12 +00:00
|
|
|
try case re:run(rpc:call(Node, os, cmd, ["ps -Af"]), "riak_jmx.jar", []) of
|
|
|
|
nomatch -> false;
|
|
|
|
_ -> true
|
|
|
|
end
|
|
|
|
catch
|
|
|
|
Err:Reason ->
|
|
|
|
lager:error("jmx stop re:run failed w/ ~p: ~p", [Err, Reason]),
|
|
|
|
false
|
|
|
|
end
|
2016-06-01 16:28:46 +00:00
|
|
|
end)),
|
2014-06-11 17:26:45 +00:00
|
|
|
|
2013-05-09 16:41:51 +00:00
|
|
|
rpc:call(Node, riak_jmx, stop, ["Stopping riak_jmx"]),
|
|
|
|
timer:sleep(20000),
|
|
|
|
case net_adm:ping(Node) of
|
|
|
|
pang ->
|
|
|
|
lager:error("riak_jmx stop takes down riak node"),
|
|
|
|
?assertEqual("riak_jmx stop takes down riak node", true);
|
|
|
|
_ ->
|
|
|
|
yay
|
|
|
|
end,
|
|
|
|
|
|
|
|
%% Let's make sure the java process is dead!
|
|
|
|
lager:info("checking for riak_jmx.jar not running."),
|
2013-06-20 18:49:12 +00:00
|
|
|
|
2013-05-10 14:48:35 +00:00
|
|
|
?assertEqual(nomatch, re:run(rpc:call(Node, os, cmd, ["ps -Af"]), "riak_jmx.jar", [])),
|
2013-05-09 16:41:51 +00:00
|
|
|
|
|
|
|
rt:stop(Node).
|
|
|
|
|
2013-01-22 19:26:17 +00:00
|
|
|
verify_inc(Prev, Props, Keys) ->
|
|
|
|
[begin
|
|
|
|
Old = proplists:get_value(Key, Prev, 0),
|
|
|
|
New = proplists:get_value(Key, Props, 0),
|
|
|
|
lager:info("~s: ~p -> ~p (expected ~p)", [Key, Old, New, Old + Inc]),
|
|
|
|
?assertEqual({Key, New}, {Key, (Old + Inc)})
|
|
|
|
end || {Key, Inc} <- Keys].
|
|
|
|
|
|
|
|
verify_nz(Props, Keys) ->
|
|
|
|
[?assertNotEqual({Key, proplists:get_value(Key,Props,0)}, {Key, 0}) || Key <- Keys].
|
|
|
|
|
|
|
|
jmx_jar_path() ->
|
|
|
|
%% Find riak_jmx.jar
|
|
|
|
DepsPath = rt:get_deps(),
|
|
|
|
Deps = string:tokens(os:cmd("ls " ++ DepsPath), "\n"),
|
|
|
|
[RiakJMX] = lists:filter(fun(X) -> string:str(X, "riak_jmx") == 1 end, Deps),
|
|
|
|
filename:join([DepsPath, RiakJMX, "priv", "riak_jmx.jar"]).
|
|
|
|
|
|
|
|
jmx_dump_cmd(IP, Port) ->
|
2014-06-11 17:26:45 +00:00
|
|
|
io_lib:format("java -cp ~s com.basho.riak.jmx.Dump ~s ~p",
|
2013-01-22 19:26:17 +00:00
|
|
|
[jmx_jar_path(), IP, Port]).
|
|
|
|
|
|
|
|
jmx_dump(Cmd) ->
|
|
|
|
timer:sleep(40000), %% JMX only updates every 30seconds
|
2014-11-12 03:06:22 +00:00
|
|
|
lager:info("Dumping JMX stats using command ~s", [Cmd]),
|
2013-01-22 19:26:17 +00:00
|
|
|
Output = string:strip(os:cmd(Cmd), both, $\n),
|
|
|
|
JSONOutput = mochijson2:decode(Output),
|
2016-06-01 20:38:25 +00:00
|
|
|
[ {Key, Value} || {struct, [{Key, Value}]} <- JSONOutput].
|