2013-02-23 03:10:25 +00:00
|
|
|
-module(pr_pw).
|
|
|
|
|
|
|
|
-behavior(riak_test).
|
|
|
|
-export([confirm/0]).
|
|
|
|
|
|
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
|
|
|
|
confirm() ->
|
|
|
|
lager:info("Deploy some nodes"),
|
|
|
|
Nodes = rt:deploy_nodes(4),
|
|
|
|
repl_util:make_cluster(Nodes),
|
|
|
|
timer:sleep(100),
|
|
|
|
rt:wait_until_ring_converged(Nodes),
|
|
|
|
|
|
|
|
%% calculate the preflist for foo/bar
|
|
|
|
{ok, Ring} = rpc:call(hd(Nodes), riak_core_ring_manager, get_my_ring, []),
|
|
|
|
UpNodes = rpc:call(hd(Nodes), riak_core_node_watcher, nodes, [riak_kv]),
|
|
|
|
DocIdx = rpc:call(hd(Nodes), riak_core_util, chash_key, [{<<"foo">>,
|
|
|
|
<<"bar">>}]),
|
|
|
|
N = 3,
|
|
|
|
Preflist2 = riak_core_apl:get_apl_ann(DocIdx, N, Ring, UpNodes),
|
|
|
|
lager:info("Preflist is ~p", [Preflist2]),
|
|
|
|
PLNodes = [Node || {{_Index, Node}, _Status} <- Preflist2],
|
|
|
|
lager:info("Nodes in preflist ~p", [PLNodes]),
|
|
|
|
[SafeNode] = Nodes -- PLNodes,
|
|
|
|
lager:info("Node not involved in this preflist ~p", [SafeNode]),
|
|
|
|
%% connect to the only node in the preflist we won't break, to avoid
|
|
|
|
%% random put forwarding
|
2013-02-23 05:25:08 +00:00
|
|
|
{ok, C} = riak:client_connect(hd(PLNodes)),
|
|
|
|
Obj = riak_object:new(<<"foo">>, <<"bar">>, <<42:32/integer>>),
|
|
|
|
?assertEqual(ok, C:put(Obj, [{pw, all}])),
|
|
|
|
?assertMatch({ok, _}, C:get(<<"foo">>, <<"bar">>, [{pr, all}])),
|
|
|
|
|
|
|
|
%% install an intercept to emulate a node that kernel paniced or
|
|
|
|
%% something where it can take some time for the node_watcher to spot the
|
|
|
|
%% downed node
|
|
|
|
{{Index, Node}, _} = lists:last(Preflist2),
|
|
|
|
make_intercepts_tab(Node, Index),
|
|
|
|
rt_intercept:add(Node, {riak_kv_vnode, [{{do_get,4}, drop_do_get},
|
|
|
|
{{do_put, 7}, drop_do_put}]}),
|
|
|
|
lager:info("disabling do_get for index ~p on ~p", [Index, Node]),
|
|
|
|
rt:log_to_nodes(Nodes, "disabling do_get for index ~p on ~p", [Index, Node]),
|
|
|
|
timer:sleep(100),
|
|
|
|
|
|
|
|
%% one vnode will never return, so we get timeouts
|
|
|
|
?assertEqual({error, timeout},
|
|
|
|
C:get(<<"foo">>, <<"bar">>, [{pr, all}])),
|
|
|
|
?assertEqual({error, timeout}, C:put(Obj, [{pw, all}])),
|
|
|
|
|
|
|
|
%% we can still meet quorum, though
|
|
|
|
?assertEqual(ok, C:put(Obj, [{pw, quorum}])),
|
2013-02-23 03:10:25 +00:00
|
|
|
?assertMatch({ok, _},
|
2013-02-23 05:25:08 +00:00
|
|
|
C:get(<<"foo">>, <<"bar">>, [{pr, quorum}])),
|
|
|
|
|
|
|
|
rt:stop(Node),
|
|
|
|
rt:wait_until_unpingable(Node),
|
2013-02-23 03:10:25 +00:00
|
|
|
|
2013-02-23 05:25:08 +00:00
|
|
|
%% there's now a fallback in the preflist, so PR/PW won't be satisfied
|
|
|
|
%% anymore
|
|
|
|
?assertEqual({error, {pr_val_unsatisfied, 3, 2}},
|
|
|
|
C:get(<<"foo">>, <<"bar">>, [{pr, all}])),
|
|
|
|
?assertEqual({error, {pw_val_unsatisfied, 3, 2}}, C:put(Obj, [{pw, all}])),
|
|
|
|
|
|
|
|
%% emulate another node failure
|
|
|
|
{{Index2, Node2}, _} = lists:nth(2, Preflist2),
|
|
|
|
make_intercepts_tab(Node2, Index2),
|
|
|
|
rt_intercept:add(Node2, {riak_kv_vnode, [{{do_get,4}, drop_do_get},
|
|
|
|
{{do_put, 7}, drop_do_put}]}),
|
|
|
|
lager:info("disabling do_get for index ~p on ~p", [Index2, Node2]),
|
|
|
|
rt:log_to_nodes(Nodes, "disabling do_get for index ~p on ~p", [Index2, Node2]),
|
2013-02-23 03:10:25 +00:00
|
|
|
timer:sleep(100),
|
|
|
|
|
2013-02-23 05:25:08 +00:00
|
|
|
%% can't even meet quorum now
|
|
|
|
?assertEqual({error, timeout},
|
|
|
|
C:get(<<"foo">>, <<"bar">>, [{pr, quorum}])),
|
|
|
|
?assertEqual({error, timeout}, C:put(Obj, [{pw, quorum}])),
|
2013-02-23 03:10:25 +00:00
|
|
|
|
2013-02-23 05:25:08 +00:00
|
|
|
%% restart the node
|
|
|
|
rt:start(Node),
|
|
|
|
rt:wait_until_pingable(Node),
|
|
|
|
rt:wait_for_service(Node, riak_kv),
|
2013-02-23 03:10:25 +00:00
|
|
|
|
2013-02-23 05:25:08 +00:00
|
|
|
%% we can make quorum again
|
|
|
|
?assertEqual(ok, C:put(Obj, [{pw, quorum}])),
|
|
|
|
?assertMatch({ok, _}, C:get(<<"foo">>, <<"bar">>, [{pr, quorum}])),
|
|
|
|
%% intercepts still in force on second node, so we'll get timeouts
|
|
|
|
?assertEqual({error, timeout},
|
|
|
|
C:get(<<"foo">>, <<"bar">>, [{pr, all}])),
|
|
|
|
?assertEqual({error, timeout}, C:put(Obj, [{pw, all}])),
|
2013-02-23 03:10:25 +00:00
|
|
|
|
2013-02-23 05:25:08 +00:00
|
|
|
%% reboot the node
|
|
|
|
rt:stop(Node2),
|
|
|
|
rt:wait_until_unpingable(Node2),
|
|
|
|
rt:start(Node2),
|
|
|
|
rt:wait_until_pingable(Node2),
|
|
|
|
rt:wait_for_service(Node2, riak_kv),
|
|
|
|
|
|
|
|
%% everything is happy again
|
|
|
|
?assertEqual(ok, C:put(Obj, [{pw, all}])),
|
|
|
|
?assertMatch({ok, _}, C:get(<<"foo">>, <<"bar">>, [{pr, all}])),
|
|
|
|
pass.
|
2013-02-25 14:57:13 +00:00
|
|
|
|
|
|
|
make_intercepts_tab(Node, Partition) ->
|
2013-02-25 18:52:47 +00:00
|
|
|
SupPid = rpc:call(Node, erlang, whereis, [sasl_safe_sup]),
|
|
|
|
intercepts_tab = rpc:call(Node, ets, new, [intercepts_tab, [named_table,
|
|
|
|
public, set, {heir, SupPid, {}}]]),
|
|
|
|
true = rpc:call(Node, ets, insert, [intercepts_tab, {drop_do_get_partitions,
|
|
|
|
[Partition]}]),
|
|
|
|
true = rpc:call(Node, ets, insert, [intercepts_tab, {drop_do_put_partitions,
|
|
|
|
[Partition]}]).
|