mirror of
https://github.com/valitydev/riak_test.git
synced 2024-11-06 00:25:22 +00:00
WIP Kv679 tests
Dataloss at coordinator Dataloss at coordinator with an old clock repaired Vnode id's not unique
This commit is contained in:
parent
87afd498e6
commit
8cbec21a5d
140
tests/kv679_dataloss.erl
Normal file
140
tests/kv679_dataloss.erl
Normal file
@ -0,0 +1,140 @@
|
||||
%% -------------------------------------------------------------------
|
||||
%%
|
||||
%% Copyright (c) 2014 Basho Technologies, Inc.
|
||||
%%
|
||||
%% This file is provided to you under the Apache License,
|
||||
%% Version 2.0 (the "License"); you may not use this file
|
||||
%% except in compliance with the License. You may obtain
|
||||
%% a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing,
|
||||
%% software distributed under the License is distributed on an
|
||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
%% KIND, either express or implied. See the License for the
|
||||
%% specific language governing permissions and limitations
|
||||
%% under the License.
|
||||
%%
|
||||
%% -------------------------------------------------------------------
|
||||
%%% @copyright (C) 2014, Basho Technologies
|
||||
%%% @doc
|
||||
%%% riak_test for kv679 lost clock flavour.
|
||||
%%%
|
||||
%%% issue kv679 is a possible dataloss issue, it's basically caused by
|
||||
%%% the fact that per key logical clocks can go backwards in time in
|
||||
%%% certain situations. The situation under test here is as follows:
|
||||
%%% Create value, write N times
|
||||
%%% Fail to locally read value on coordinate (corruption, error, solar flare)
|
||||
%%% write new value (new value, new clock!)
|
||||
%%% replicate new value
|
||||
%%% replicas drop write as clock is dominated
|
||||
%%% read repair removes value. Data loss.
|
||||
%%% @end
|
||||
|
||||
-module(kv679_dataloss).
|
||||
-behavior(riak_test).
|
||||
-compile([export_all]).
|
||||
-export([confirm/0]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(BUCKET, <<"kv679">>).
|
||||
-define(KEY, <<"test">>).
|
||||
|
||||
confirm() ->
|
||||
Conf = [
|
||||
{riak_kv, [{anti_entropy, {off, []}}]},
|
||||
{riak_core, [{default_bucket_props, [{allow_mult, true},
|
||||
{dvv_enabled, true}]}]},
|
||||
{bitcask, [{sync_strategy, o_sync}, {io_mode, nif}]}],
|
||||
|
||||
[Node] = rt:deploy_nodes(1, Conf),
|
||||
Client = rt:pbc(Node),
|
||||
riakc_pb_socket:set_options(Client, [queue_if_disconnected]),
|
||||
|
||||
%% Get preflist for key
|
||||
%% assuming that the head of the preflist on a single node cluster
|
||||
%% will always coordinate the writes
|
||||
PL = kv679_tombstone:get_preflist(Node),
|
||||
|
||||
%% Write key some times
|
||||
write_key(Client, <<"bob">>),
|
||||
|
||||
lager:info("wrote value <<bob>>"),
|
||||
|
||||
%% delete the local data for Key
|
||||
delete_datadir(hd(PL)),
|
||||
|
||||
%% timer:sleep(000),
|
||||
|
||||
write_key(Client, <<"jon">>),
|
||||
|
||||
lager:info("wrote value <<jon>>"),
|
||||
|
||||
%% At this point, two puts with empty contexts should be siblings
|
||||
%% due to the data loss at the coordinator we lose the second
|
||||
%% write
|
||||
|
||||
Res = riakc_pb_socket:get(Client, ?BUCKET, ?KEY, []),
|
||||
|
||||
?assertMatch({ok, _}, Res),
|
||||
{ok, O} = Res,
|
||||
|
||||
?assertEqual([<<"bob">>, <<"jon">>], riakc_obj:get_values(O)),
|
||||
|
||||
pass.
|
||||
|
||||
write_key(Client, Val) ->
|
||||
write_object(Client, riakc_obj:new(?BUCKET, ?KEY, Val)).
|
||||
|
||||
write_object(Client, Object) ->
|
||||
riakc_pb_socket:put(Client, Object).
|
||||
|
||||
delete_datadir({{Idx, Node}, Type}) ->
|
||||
lager:info("deleting backend data dir for ~p ~p on ~p",
|
||||
[Idx, Node, Type]),
|
||||
%% Get default backend
|
||||
Backend = rpc:call(Node, app_helper, get_env, [riak_kv, storage_backend]),
|
||||
%% get name from mod
|
||||
BackendName = backend_name_from_mod(Backend),
|
||||
%% get data root for type
|
||||
DataRoot = rpc:call(Node, app_helper, get_env, [BackendName, data_root]),
|
||||
%% get datadir from Idx
|
||||
Path = filename:join([rtdev:relpath(current),
|
||||
"dev",
|
||||
"dev"++ integer_to_list(rtdev:node_id(Node)),
|
||||
DataRoot,
|
||||
integer_to_list(Idx)]),
|
||||
lager:info("Path ~p~n", [Path]),
|
||||
|
||||
vnode_util:kill_vnode({Idx, Node}),
|
||||
del_dir(Path).
|
||||
|
||||
backend_name_from_mod(riak_kv_bitcask_backend) ->
|
||||
bitcask;
|
||||
backend_name_from_mod(riak_kv_eleveldb_backend) ->
|
||||
eleveldb.
|
||||
|
||||
del_dir(Dir) ->
|
||||
lists:foreach(fun(D) ->
|
||||
ok = file:del_dir(D)
|
||||
end, del_all_files([Dir], [])).
|
||||
|
||||
del_all_files([], EmptyDirs) ->
|
||||
EmptyDirs;
|
||||
del_all_files([Dir | T], EmptyDirs) ->
|
||||
{ok, FilesInDir} = file:list_dir(Dir),
|
||||
{Files, Dirs} = lists:foldl(fun(F, {Fs, Ds}) ->
|
||||
Path = Dir ++ "/" ++ F,
|
||||
case filelib:is_dir(Path) of
|
||||
true ->
|
||||
{Fs, [Path | Ds]};
|
||||
false ->
|
||||
{[Path | Fs], Ds}
|
||||
end
|
||||
end, {[],[]}, FilesInDir),
|
||||
lists:foreach(fun(F) ->
|
||||
ok = file:delete(F)
|
||||
end, Files),
|
||||
del_all_files(T ++ Dirs, [Dir | EmptyDirs]).
|
188
tests/kv679_dataloss_fb.erl
Normal file
188
tests/kv679_dataloss_fb.erl
Normal file
@ -0,0 +1,188 @@
|
||||
%% -------------------------------------------------------------------
|
||||
%%
|
||||
%% Copyright (c) 2014 Basho Technologies, Inc.
|
||||
%%
|
||||
%% This file is provided to you under the Apache License,
|
||||
%% Version 2.0 (the "License"); you may not use this file
|
||||
%% except in compliance with the License. You may obtain
|
||||
%% a copy of the License at
|
||||
%%
|
||||
%% http://www.apache.org/licenses/LICENSE-2.0
|
||||
%%
|
||||
%% Unless required by applicable law or agreed to in writing,
|
||||
%% software distributed under the License is distributed on an
|
||||
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
%% KIND, either express or implied. See the License for the
|
||||
%% specific language governing permissions and limitations
|
||||
%% under the License.
|
||||
%%
|
||||
%% -------------------------------------------------------------------
|
||||
%%% @copyright (C) 2014, Basho Technologies
|
||||
%%% @doc
|
||||
%%% riak_test for kv679 lost clock flavour.
|
||||
%%%
|
||||
%%% issue kv679 is a possible dataloss issue, it's basically caused by
|
||||
%%% the fact that per key logical clocks can go backwards in time in
|
||||
%%% certain situations. The situation under test here is as follows:
|
||||
%%%
|
||||
%% A coords a write to K [{a, 1}] and replicates to fallbacks D, E
|
||||
%% A coords a write to K [{a, 2}] and replicates to primaries B, C
|
||||
%% A coords a write K [{a, 3}] and replicates to primaries B, C
|
||||
%% A loses it's clock for K (so far this is like the lost clock case above)
|
||||
%% Read of A, D, E read repairs A with K=[{a, 1}]
|
||||
%% A coords a write, issues [{a, 2}] again
|
||||
%% Acked write is lost
|
||||
%%%
|
||||
%%%
|
||||
%%% @end
|
||||
|
||||
-module(kv679_dataloss_fb).
|
||||
-behavior(riak_test).
|
||||
-compile([export_all]).
|
||||
-export([confirm/0]).
|
||||
|
||||
-include_lib("eunit/include/eunit.hrl").
|
||||
|
||||
-define(BUCKET, <<"kv679">>).
|
||||
-define(KEY, <<"test">>).
|
||||
|
||||
confirm() ->
|
||||
Conf = [
|
||||
{riak_kv, [{anti_entropy, {off, []}}]},
|
||||
{riak_core, [{default_bucket_props, [{allow_mult, true},
|
||||
{dvv_enabled, true},
|
||||
{ring_creation_size, 8},
|
||||
{vnode_management_timer, 1000},
|
||||
{handoff_concurrency, 100},
|
||||
{vnode_inactivity_timeout, 1000}]}]},
|
||||
{bitcask, [{sync_strategy, o_sync}, {io_mode, nif}]}],
|
||||
|
||||
%% 5 'cos I want a perfect preflist when 2 primaries are down.
|
||||
%% i.e. I want to kill the fallbacks before they can handoff
|
||||
%% without effecting the primaries
|
||||
Nodes = rt:build_cluster(6, Conf),
|
||||
|
||||
Clients = kv679_tombstone:create_pb_clients(Nodes),
|
||||
|
||||
%% Get preflist for key
|
||||
PL = kv679_tombstone:get_preflist(hd(Nodes)),
|
||||
|
||||
?assert(kv679_tombstone2:perfect_preflist(PL)),
|
||||
|
||||
lager:info("Got preflist"),
|
||||
|
||||
{CoordNode, _}=CoordClient = kv679_tombstone:coordinating_client(Clients, PL),
|
||||
|
||||
OtherPrimaries = [Node || {{_Idx, Node}, Type} <- PL,
|
||||
Type == primary,
|
||||
Node /= CoordNode],
|
||||
|
||||
[rt:stop_and_wait(N) || N <- OtherPrimaries],
|
||||
|
||||
lager:info("Killed 2 primaries"),
|
||||
|
||||
rt:wait_until(fun() ->
|
||||
NewPL = kv679_tombstone:get_preflist(CoordNode),
|
||||
two_fallbacks_one_primary(NewPL) == {1, 2}
|
||||
end),
|
||||
|
||||
FBPL = kv679_tombstone:get_preflist(CoordNode),
|
||||
|
||||
lager:info("Got a preflist with coord and 2 fbs ~p~n", [FBPL]),
|
||||
|
||||
%% Write key twice at C1
|
||||
kv679_tombstone:write_key(CoordClient, [<<"bob">>, <<"jim">>]),
|
||||
|
||||
kv679_tombstone2:dump_clock(CoordClient),
|
||||
|
||||
|
||||
lager:info("Clock at 2 fallbacks"),
|
||||
|
||||
%% Kill the fallbacks before they can handoff
|
||||
Fallbacks = [Node || {{_Idx, Node}, Type} <- FBPL,
|
||||
Type == fallback],
|
||||
|
||||
[rt:brutal_kill(FB) || FB <- Fallbacks],
|
||||
|
||||
%% Bring back the primaries and do some more writes
|
||||
[rt:start_and_wait(P) || P <- OtherPrimaries],
|
||||
|
||||
lager:info("started primaries back up"),
|
||||
|
||||
rt:wait_until(fun() ->
|
||||
NewPL = kv679_tombstone:get_preflist(CoordNode),
|
||||
NewPL == PL
|
||||
end),
|
||||
|
||||
kv679_tombstone:write_key(CoordClient, [<<"jon">>, <<"joe">>]),
|
||||
|
||||
kv679_tombstone2:dump_clock(CoordClient),
|
||||
|
||||
%% Kill those primaries with there frontier clocks
|
||||
[rt:brutal_kill(P) || P <- OtherPrimaries],
|
||||
|
||||
lager:info("killed primaries again"),
|
||||
|
||||
%% delete the local data at the coordinator Key
|
||||
kv679_dataloss:delete_datadir(hd(PL)),
|
||||
|
||||
%% Start up those fallbacks
|
||||
[rt:start_and_wait(F) || F <- Fallbacks],
|
||||
|
||||
lager:info("restart fallbacks"),
|
||||
|
||||
%% Wait for the fallback prefist
|
||||
rt:wait_until(fun() ->
|
||||
NewPL = kv679_tombstone:get_preflist(CoordNode),
|
||||
NewPL == FBPL
|
||||
end),
|
||||
|
||||
%% Read the key, read repair will mean that the data deleted vnode
|
||||
%% will have an old clock (gone back in time!)
|
||||
|
||||
kv679_tombstone2:dump_clock(CoordClient),
|
||||
|
||||
%% write a new value, this _should_ be a sibling of what is on
|
||||
%% crashed primaries
|
||||
kv679_tombstone:write_key(CoordClient, <<"anne">>),
|
||||
|
||||
kv679_tombstone2:dump_clock(CoordClient),
|
||||
|
||||
%% Time start up those primaries, let handoff happen, and see what
|
||||
%% happens to that last write
|
||||
|
||||
[rt:start_and_wait(P) || P <- OtherPrimaries],
|
||||
|
||||
lager:info("restart primaries _again_"),
|
||||
|
||||
rt:wait_until(fun() ->
|
||||
NewPL = kv679_tombstone:get_preflist(CoordNode),
|
||||
NewPL == PL
|
||||
end),
|
||||
|
||||
lager:info("wait for handoffs"),
|
||||
|
||||
[begin
|
||||
rpc:call(FB, riak_core_vnode_manager, force_handoffs, []),
|
||||
rt:wait_until_transfers_complete([FB])
|
||||
end || FB <- Fallbacks],
|
||||
|
||||
lager:info("final get"),
|
||||
|
||||
Res = kv679_tombstone:read_key(CoordClient),
|
||||
|
||||
?assertMatch({ok, _}, Res),
|
||||
{ok, O} = Res,
|
||||
|
||||
?assertEqual([<<"anne">>, <<"joe">>], riakc_obj:get_values(O)),
|
||||
|
||||
pass.
|
||||
|
||||
two_fallbacks_one_primary(PL) ->
|
||||
lists:foldl(fun({{_, _}, primary}, {P, F}) ->
|
||||
{P+1, F};
|
||||
({{_, _}, fallback}, {P, F}) ->
|
||||
{P, F+1}
|
||||
end,
|
||||
{0, 0},
|
||||
PL).
|
@ -35,9 +35,7 @@
|
||||
|
||||
confirm() ->
|
||||
[Node] = rt:deploy_nodes(1),
|
||||
|
||||
%% Get preflist for key
|
||||
PL = get_preflist(Node),
|
||||
PL = kv679_tombstone:get_preflist(Node),
|
||||
|
||||
%% Get vnodeids for each primary
|
||||
PartitionIdMap = get_vnodeids(PL, Node),
|
||||
@ -55,8 +53,3 @@ get_vnodeids(PLAnn, Node) ->
|
||||
Type == primary],
|
||||
Statuses = rpc:call(Node, riak_kv_vnode, vnode_status, [PL]),
|
||||
[{Idx, proplists:get_value(vnodeid, Status)} || {Idx, Status} <- Statuses].
|
||||
|
||||
get_preflist(Node) ->
|
||||
Chash = rpc:call(Node, riak_core_util, chash_key, [{?BUCKET, ?KEY}]),
|
||||
UpNodes = rpc:call(Node, riak_core_node_watcher, nodes, [riak_kv]),
|
||||
rpc:call(Node, riak_core_apl, get_apl_ann, [Chash, 3, UpNodes]).
|
||||
|
Loading…
Reference in New Issue
Block a user