update for single-run reporing, and many other things

This commit is contained in:
Evan Vigil-McClanahan 2014-04-28 11:39:57 -04:00
parent 183fa49954
commit 43b772f35a
10 changed files with 217 additions and 23 deletions

26
bcrunner-nocuttle.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
test_name=$1
bin_size=$2
version=$3
if [ -z $version -o -z $test_name -o -z $bin_size ]; then
echo "out"
exit 1
fi
./riak_test -c rtperf2 -t get_put -- --restart true --prepop true \
--run-time 120 --target-pct 120 --ram-size 48 \
--bin-size $bin_size --name $test_name --bin-type exponential \
--version $version --cuttle false
./riak_test -c rtperf2 -t get_put -- --restart true --prepop true \
--run-time 120 --target-pct 70 --ram-size 48 \
--bin-size $bin_size --name $test_name --bin-type exponential \
--version $version --cuttle false
./riak_test -c rtperf2 -t get_put -- --restart true --prepop true \
--run-time 120 --target-pct 20 --ram-size 48 \
--bin-size $bin_size --name $test_name --bin-type exponential \
--version $version --cuttle false

26
bcrunner.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
test_name=$1
bin_size=$2
version=$3
if [ -z $version -o -z $test_name -o -z $bin_size ]; then
echo "out"
exit 1
fi
./riak_test -c rtperf2 -t get_put -- --restart true --prepop true \
--run-time 120 --target-pct 120 --ram-size 48 \
--bin-size $bin_size --name $test_name --bin-type exponential \
--version $version
./riak_test -c rtperf2 -t get_put -- --restart true --prepop true \
--run-time 120 --target-pct 70 --ram-size 48 \
--bin-size $bin_size --name $test_name --bin-type exponential \
--version $version
./riak_test -c rtperf2 -t get_put -- --restart true --prepop true \
--run-time 120 --target-pct 20 --ram-size 48 \
--bin-size $bin_size --name $test_name --bin-type exponential \
--version $version

1
compare.sh Symbolic link
View File

@ -0,0 +1 @@
priv/reporting/compare.sh

View File

@ -23,7 +23,7 @@ confirm() ->
max,
rt_config:get(perf_duration),
HostList,
{int_to_bin_bigendian, {uniform_int, SetSize}},
{int_to_bin_bigendian, {truncated_pareto_int, SetSize}},
rt_bench:valgen(rt_config:get(perf_bin_type), BinSize),
%% 4:1 get/put
[{get, 3}, {update, 1}]

15
priv/reporting/report.sh Executable file
View File

@ -0,0 +1,15 @@
#!/usr/bin/env bash
set -e
if [ ! -d "$1/" ]; then
echo "a directory must be specified"
exit 1
fi
if [ "x$2" == "xtrue" ]; then
(cd $1; rm -f *-digest; escript ~/bin/riak-digest.escript)
fi
D1=`basename "$1"`
#generate our comparison graph
gnuplot -e "dir1=\"$1\"; outfilename=\"${D1}-report.png\";" priv/reporting/summarize1.gpl

View File

@ -135,6 +135,10 @@ avg_items(L, Names) ->
(lists:sum(Vals)/length(Vals)) / 60;
vnode_puts ->
(lists:sum(Vals)/length(Vals)) / 60;
node_gets ->
(lists:sum(Vals)/length(Vals)) / 60;
node_puts ->
(lists:sum(Vals)/length(Vals)) / 60;
_ ->
lists:sum(Vals)/length(Vals)
end
@ -151,8 +155,8 @@ winnow(Data0) ->
strip_stats(Glob) ->
Filter = [
node_gets, node_puts,
vnode_gets, vnode_puts,
node_gets, node_puts,
vnode_gets, vnode_puts,
node_get_fsm_time_median,
node_get_fsm_time_95,
node_get_fsm_time_99,
@ -160,17 +164,17 @@ strip_stats(Glob) ->
node_put_fsm_time_95,
node_put_fsm_time_99,
message_queue_max,
cpu_utilization,
cpu_iowait,
memory_utilization,
memory_page_dirty,
memory_page_writeback,
dropped_vnode_requests_total,
node_get_fsm_objsize_median,
node_get_fsm_objsize_95,
node_get_fsm_objsize_99,
disk_utilization
],
cpu_utilization,
cpu_iowait,
memory_utilization,
memory_page_dirty,
memory_page_writeback,
dropped_vnode_requests_total,
node_get_fsm_objsize_median,
node_get_fsm_objsize_95,
node_get_fsm_objsize_99,
disk_utilization
],
[begin
{Name, Val}
end

View File

@ -0,0 +1,116 @@
#clean up the environment for interactive use
unset multiplot
reset
set terminal png font "/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf,9" size 850,1100
set output outfilename
#set term x11 size 850, 1100
set multiplot
set grid
#set key below
set tics out
set lmargin 12
set rmargin 10
rd = dir1."/".'rstats-digest'
## graph system stats
set size .5, .315
## graph latencies
set origin 0, .315
#set xtics 10
#set yrange [500:150000]
#set y2range [1000:1100000]
#set y2tics nomirror
set xlabel "5 second intervals"
set ylabel "usec"
unset y2label
plot rd using "node_put_fsm_time_95" with lines, \
rd using "node_put_fsm_time_99" with lines, \
rd using "node_get_fsm_time_95" with lines, \
rd using "node_get_fsm_time_99" with lines
unset y2tics
unset y2range
## graph ops performance
set origin 0, .63
#ymax = median(rd) + 1000
#set yrange [0:50000]
set ylabel "operations per node"
plot rd using "vnode_gets" with lines, \
rd using "vnode_puts" with lines
## graph system stats
set origin .48, 0
set xlabel "5 second intervals"
set y2tics nomirror
plot rd using "memory_page_dirty" with lines, \
rd using "memory_page_writeback" with lines axis x1y2
unset y2tics
unset y2range
## graph latencies
set origin .48, .315
#set yrange [500:150000]
#set y2range [1000:1100000]
#set y2tics nomirror
#set xtics 10
set xlabel "5 second intervals"
set ylabel "usec"
unset y2label
plot rd using "node_get_fsm_time_median" with lines, \
rd using "node_put_fsm_time_median" with lines
unset y2tics
unset y2range
## graph ops performance
set origin .48, .63
#set yrange [0:50000]
set ylabel "operations per node"
#hack to set the title for the whole graph
set label dir1 at screen 0.5,0.97 center front
plot rd using "message_queue_max" with lines, \
rd using "dropped_vnode_requests_total" with lines
set origin 0, 0
#set xtics 60
set yrange [0:100]
set xlabel "5 second intervals"
set ylabel "percentage"
plot rd using "cpu_utilization" with lines, \
rd using "cpu_iowait" with lines, \
rd using "disk_utilization" with lines, \
rd using "memory_utilization" with lines
unset yrange
unset y2tics
unset multiplot
reset

1
report.sh Symbolic link
View File

@ -0,0 +1 @@
priv/reporting/report.sh

View File

@ -63,14 +63,14 @@ watcher_loop(W=#watcher{probes=Probes,
W2 = install_probes(Missing, W),
Probes2 = W2#watcher.probes,
receive
{'DOWN', MRef, process, _, _} ->
{'DOWN', MRef, process, _, Reason} ->
case lists:keyfind(MRef, 2, Probes2) of
false ->
%% master died, exit
io:format("watcher exiting~n"),
ok;
{Node, MRef} ->
io:format("Probe exit: ~p/~p~n", [Node, MRef]),
io:format("Probe exit. ~p: ~p~n", [Node, Reason]),
Probes3 = lists:keyreplace(Node, 1, Probes2, {Node, undefined}),
W3 = W2#watcher{probes=Probes3},
?MODULE:watcher_loop(W3)
@ -129,7 +129,8 @@ start(Master, Rate, Collector, Nodes, Fun) ->
init(Master, Rate, {Host, Port, _Dir}, Nodes, Fun) ->
lager:info("In init: ~p ~p~n", [node(), Host]),
{ok, Sock} = gen_tcp:connect(Host, Port,
[binary, {packet, 2}]),
[binary, {packet, 2},
{send_timeout, 500}]),
case application:get_env(riak_kv, storage_backend) of
{ok, riak_kv_eleveldb_backend} ->
LRef = get_leveldb_ref();
@ -214,8 +215,13 @@ collect(H0) ->
Stats0 = L ++ Q ++ P ++ N ++ D ++ V ++ M ++ C ++ R,
Stats = term_to_binary(Stats0),
%% catch TCP errors here
ok = gen_tcp:send(H3#history.collector_sock, Stats),
%% catch print_down(Nodes),
case gen_tcp:send(H3#history.collector_sock, Stats) of
ok -> ok;
%% die on any error, we'll get restarted soon.
{error, _} ->
gen_tcp:close(H3#history.collector_sock),
error(splode)
end,
H3.
%% this portion is meant to be run inside a VM instance running riak
@ -312,7 +318,9 @@ report_memory(H) ->
{memory_page_dirty, Dirty},
{memory_page_writeback, Writeback}]}.
report_leveldb(H = #history{ lvlref = LRef}) ->
report_leveldb(H = #history{ lvlref = undefined }) ->
{H, []};
report_leveldb(H = #history{ lvlref = LRef }) ->
try case eleveldb:status(LRef, <<"leveldb.ThrottleGauge">>) of
{ok, Result} ->
Value = list_to_integer(Result),

View File

@ -32,8 +32,6 @@ harness_opts() ->
"version to test"},
{prepop, undefined, "prepop", {boolean, false},
"prepopulate cluster"},
{test_type, undefined, "type", {atom, uniform},
"uniform | pareto"},
{restart, undefined, "restart", {boolean, false},
"stop running riak cluster and start new"},
{force, undefined, "force", {boolean, false},
@ -342,7 +340,6 @@ test_name() ->
BinSize = rt_config:get(perf_bin_size),
rt_config:get(perf_test_name)++"-"++Vsn++"-"++
integer_to_list(rt_config:get(perf_target_pct))++"pct-"++
atom_to_list(rt_config:get(perf_test_type))++"-"++
atom_to_list(rt_config:get(perf_bin_type))++"-"++
integer_to_list(BinSize)++"b-"++date_string().