mirror of
https://github.com/valitydev/riak_test.git
synced 2024-11-06 16:45:29 +00:00
bff9ddc872
If we just wait for the old vnode to die, we are not guaranteed that the new one will have yet been started and registered with the vnode manager, so it's possible we will end up trying to do a call into the old dead vnode in the subsequent test code. We saw a couple of test failures in giddyup recently which I believe were caused by this race condition. To fix, we can wait for the vnode manager to return a new pid instead of just waiting for the old pid to die.
345 lines
13 KiB
Erlang
345 lines
13 KiB
Erlang
%% -------------------------------------------------------------------
|
|
%%
|
|
%% Copyright (c) 2012 Basho Technologies, Inc.
|
|
%%
|
|
%% This file is provided to you under the Apache License,
|
|
%% Version 2.0 (the "License"); you may not use this file
|
|
%% except in compliance with the License. You may obtain
|
|
%% a copy of the License at
|
|
%%
|
|
%% http://www.apache.org/licenses/LICENSE-2.0
|
|
%%
|
|
%% Unless required by applicable law or agreed to in writing,
|
|
%% software distributed under the License is distributed on an
|
|
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
%% KIND, either express or implied. See the License for the
|
|
%% specific language governing permissions and limitations
|
|
%% under the License.
|
|
%%
|
|
%% -------------------------------------------------------------------
|
|
-module(partition_repair).
|
|
-compile(export_all).
|
|
-include_lib("eunit/include/eunit.hrl").
|
|
|
|
-behavior(riak_test).
|
|
-export([confirm/0]).
|
|
|
|
-define(FMT(S, L), lists:flatten(io_lib:format(S, L))).
|
|
|
|
%% @doc This test verifies that partition repair successfully repairs
|
|
%% all data after it has wiped out by a simulated disk crash.
|
|
confirm() ->
|
|
SpamDir = rt_config:config_or_os_env(spam_dir),
|
|
RingSize = list_to_integer(rt_config:config_or_os_env(ring_size, "16")),
|
|
NVal = rt_config:config_or_os_env(n_val, undefined),
|
|
TestMetaData = riak_test_runner:metadata(),
|
|
KVBackend = proplists:get_value(backend, TestMetaData),
|
|
|
|
NumNodes = rt_config:config_or_os_env(num_nodes, 4),
|
|
HOConcurrency = rt_config:config_or_os_env(ho_concurrency, 2),
|
|
{_KVBackendMod, KVDataDir} = backend_mod_dir(KVBackend),
|
|
Bucket = <<"scotts_spam">>,
|
|
|
|
lager:info("Build a cluster"),
|
|
lager:info("riak_search enabled: true"),
|
|
lager:info("ring_creation_size: ~p", [RingSize]),
|
|
lager:info("n_val: ~p", [NVal]),
|
|
lager:info("num nodes: ~p", [NumNodes]),
|
|
lager:info("riak_core handoff_concurrency: ~p", [HOConcurrency]),
|
|
lager:info("riak_core vnode_management_timer 1000"),
|
|
Conf = [
|
|
{riak_core,
|
|
[
|
|
{ring_creation_size, RingSize},
|
|
{handoff_manager_timeout, 1000},
|
|
{vnode_management_timer, 1000},
|
|
{handoff_concurrency, HOConcurrency}
|
|
]},
|
|
{riak_search,
|
|
[
|
|
{enabled, true}
|
|
]},
|
|
%% @TODO This is only to test whether the test failure happens
|
|
%% without AAE. The AAE errors found in the logs could be unrelated
|
|
{riak_kv,
|
|
[
|
|
{anti_entropy, {off, []}}
|
|
]
|
|
}
|
|
%% {lager,
|
|
%% [{handlers,
|
|
%% [{lager_file_backend,
|
|
%% [{"./log/console.log",debug,10485760,"$D0",5}]}]}]}
|
|
],
|
|
|
|
Nodes = rt:build_cluster(NumNodes, Conf),
|
|
|
|
case NVal of
|
|
undefined ->
|
|
ok;
|
|
_ ->
|
|
lager:info("Set n_val to ~p", [NVal]),
|
|
set_search_schema_nval(Bucket, NVal)
|
|
end,
|
|
|
|
lager:info("Enable search hook"),
|
|
rt:enable_search_hook(hd(Nodes), Bucket),
|
|
|
|
lager:info("Insert Scott's spam emails"),
|
|
Pbc = rt:pbc(hd(Nodes)),
|
|
rt:pbc_put_dir(Pbc, Bucket, SpamDir),
|
|
|
|
lager:info("Stash ITFs for each partition"),
|
|
%% @todo Should riak_test guarantee that the scratch pad is clean instead?
|
|
?assertCmd("rm -rf " ++ base_stash_path()),
|
|
%% need to load the module so riak can see the fold fun
|
|
rt:load_modules_on_nodes([?MODULE], Nodes),
|
|
Ring = rt:get_ring(hd(Nodes)),
|
|
Owners = riak_core_ring:all_owners(Ring),
|
|
[stash_data(riak_search, Owner) || Owner <- Owners],
|
|
|
|
lager:info("Stash KV data for each partition"),
|
|
[stash_data(riak_kv, Owner) || Owner <- Owners],
|
|
|
|
lager:info("Emulate data loss for riak_search, repair, verify correct data"),
|
|
[kill_repair_verify(Owner, "merge_index", riak_search) || Owner <- Owners],
|
|
|
|
%% TODO: parameterize backend
|
|
lager:info("Emulate data loss for riak_kv, repair, verify correct data"),
|
|
[kill_repair_verify(Owner, KVDataDir, riak_kv) || Owner <- Owners],
|
|
|
|
lager:info("TEST PASSED"),
|
|
pass.
|
|
|
|
kill_repair_verify({Partition, Node}, DataSuffix, Service) ->
|
|
StashPath = stash_path(Service, Partition),
|
|
{ok, [Stash]} = file:consult(StashPath),
|
|
ExpectToVerify = dict:size(Stash),
|
|
VNodeName = list_to_atom(atom_to_list(Service) ++ "_vnode"),
|
|
|
|
%% kill the partition data
|
|
Path = DataSuffix ++ "/" ++ integer_to_list(Partition),
|
|
lager:info("Killing data for ~p on ~p at ~s", [Partition, Node, Path]),
|
|
rt:clean_data_dir([Node], Path),
|
|
|
|
%% force restart of vnode since some data is kept in memory
|
|
lager:info("Restarting ~p vnode for ~p on ~p", [Service, Partition, Node]),
|
|
{ok, Pid} = rpc:call(Node, riak_core_vnode_manager, get_vnode_pid,
|
|
[Partition, VNodeName]),
|
|
?assert(rpc:call(Node, erlang, exit, [Pid, kill_for_test])),
|
|
|
|
%% We used to wait for the old pid to die here, but there is a delay between
|
|
%% the vnode process dying and a new one being registered with the vnode
|
|
%% manager. If we don't wait for the manager to return a new vnode pid, it's
|
|
%% possible for the test to fail with a gen_server:call timeout.
|
|
rt:wait_until(fun() -> {ok, Pid} =/=
|
|
rpc:call(Node, riak_core_vnode_manager, get_vnode_pid,
|
|
[Partition, VNodeName])
|
|
end),
|
|
|
|
lager:info("Verify data is missing"),
|
|
?assertEqual(0, count_data(Service, {Partition, Node})),
|
|
|
|
%% repair the partition, ignore return for now
|
|
lager:info("Invoking repair for ~p on ~p", [Partition, Node]),
|
|
%% TODO: Don't ignore return, check version of Riak and if greater
|
|
%% or equal to 1.x then expect OK.
|
|
Return =
|
|
case Service of
|
|
riak_kv ->
|
|
rpc:call(Node, riak_kv_vnode, repair, [Partition]);
|
|
riak_search ->
|
|
rpc:call(Node, riak_search_vnode, repair, [Partition])
|
|
end,
|
|
|
|
%% Kill sending vnode to verify HO sender is killed
|
|
%% {ok, [{KPart, KNode}|_]} = Return,
|
|
%% {ok, NewPid} = rpc:call(KNode, riak_core_vnode_manager, get_vnode_pid,
|
|
%% [KPart, VNodeName]),
|
|
%% lager:info("killing src pid: ~p/~p ~p", [KNode, KPart, NewPid]),
|
|
%% KR = rpc:call(KNode, erlang, exit, [NewPid, kill]),
|
|
%% lager:info("result of kill: ~p", [KR]),
|
|
%% timer:sleep(1000),
|
|
%% ?assertNot(rpc:call(KNode, erlang, is_process_alive, [NewPid])),
|
|
|
|
|
|
lager:info("return value of repair_index ~p", [Return]),
|
|
lager:info("Wait for repair to finish"),
|
|
wait_for_repair(Service, {Partition, Node}, 30),
|
|
|
|
lager:info("Verify ~p on ~p is fully repaired", [Partition, Node]),
|
|
Data2 = get_data(Service, {Partition, Node}),
|
|
{Verified, NotFound} = dict:fold(verify(Service, Data2), {0, []}, Stash),
|
|
case NotFound of
|
|
[] -> ok;
|
|
_ ->
|
|
NF = StashPath ++ ".nofound",
|
|
lager:info("Some data not found, writing that to ~s", [NF]),
|
|
?assertEqual(ok, file:write_file(NF, io_lib:format("~p.", [NotFound])))
|
|
end,
|
|
%% NOTE: If the following assert fails then check the .notfound
|
|
%% file written above...it contains all postings that were in the
|
|
%% stash that weren't found after the repair.
|
|
?assertEqual({Service, ExpectToVerify}, {Service, Verified}),
|
|
|
|
{ok, [{BeforeP, _BeforeOwner}=B, _, {AfterP, _AfterOwner}=A]} = Return,
|
|
lager:info("Verify before src partition ~p still has data", [B]),
|
|
StashPathB = stash_path(Service, BeforeP),
|
|
{ok, [StashB]} = file:consult(StashPathB),
|
|
ExpectToVerifyB = dict:size(StashB),
|
|
BeforeData = get_data(Service, B),
|
|
{VerifiedB, NotFoundB} = dict:fold(verify(Service, BeforeData), {0, []}, StashB),
|
|
case NotFoundB of
|
|
[] -> ok;
|
|
_ ->
|
|
NFB = StashPathB ++ ".notfound",
|
|
?assertEqual(ok, file:write_file(NFB, io_lib:format("~p.", [NotFoundB]))),
|
|
throw({src_partition_missing_data, NFB})
|
|
end,
|
|
?assertEqual(ExpectToVerifyB, VerifiedB),
|
|
|
|
lager:info("Verify after src partition ~p still has data", [A]),
|
|
StashPathA = stash_path(Service, AfterP),
|
|
{ok, [StashA]} = file:consult(StashPathA),
|
|
ExpectToVerifyA = dict:size(StashA),
|
|
AfterData = get_data(Service, A),
|
|
{VerifiedA, NotFoundA} = dict:fold(verify(Service, AfterData), {0, []}, StashA),
|
|
case NotFoundA of
|
|
[] -> ok;
|
|
_ ->
|
|
NFA = StashPathA ++ ".notfound",
|
|
?assertEqual(ok, file:write_file(NFA, io_lib:format("~p.", [NotFoundA]))),
|
|
throw({src_partition_missing_data, NFA})
|
|
end,
|
|
?assertEqual(ExpectToVerifyA, VerifiedA).
|
|
|
|
|
|
verify(riak_kv, DataAfterRepair) ->
|
|
fun(BKey, StashedValue, {Verified, NotFound}) ->
|
|
StashedData={BKey, StashedValue},
|
|
case dict:find(BKey, DataAfterRepair) of
|
|
error -> {Verified, [StashedData|NotFound]};
|
|
{ok, Value} ->
|
|
if Value == StashedValue -> {Verified+1, NotFound};
|
|
true -> {Verified, [StashedData|NotFound]}
|
|
end
|
|
end
|
|
end;
|
|
|
|
verify(riak_search, PostingsAfterRepair) ->
|
|
fun(IFT, StashedPostings, {Verified, NotFound}) ->
|
|
StashedPosting={IFT, StashedPostings},
|
|
case dict:find(IFT, PostingsAfterRepair) of
|
|
error -> {Verified, [StashedPosting|NotFound]};
|
|
{ok, RepairedPostings} ->
|
|
case lists:all(fun is_true/1,
|
|
[lists:member(P, RepairedPostings)
|
|
|| P <- StashedPostings]) of
|
|
true -> {Verified+1, NotFound};
|
|
false -> {Verified, [StashedPosting|NotFound]}
|
|
end
|
|
end
|
|
end.
|
|
|
|
is_true(X) ->
|
|
X == true.
|
|
|
|
count_data(Service, {Partition, Node}) ->
|
|
dict:size(get_data(Service, {Partition, Node})).
|
|
|
|
get_data(Service, {Partition, Node}) ->
|
|
VMaster =
|
|
case Service of
|
|
riak_kv -> riak_kv_vnode_master;
|
|
riak_search -> riak_search_vnode_master
|
|
end,
|
|
%% TODO: add compile time support for riak_test
|
|
Req =
|
|
case Service of
|
|
riak_kv ->
|
|
{riak_core_fold_req_v1, fun stash_kv/3, dict:new()};
|
|
riak_search ->
|
|
{riak_core_fold_req_v1, fun stash_search/3, dict:new()}
|
|
end,
|
|
Data = riak_core_vnode_master:sync_command({Partition, Node},
|
|
Req,
|
|
VMaster,
|
|
rt_config:get(rt_max_wait_time)),
|
|
Data.
|
|
|
|
stash_data(Service, {Partition, Node}) ->
|
|
File = stash_path(Service, Partition),
|
|
?assertEqual(ok, filelib:ensure_dir(File)),
|
|
lager:info("Stashing ~p/~p at ~p to ~p", [Service, Partition, Node, File]),
|
|
Postings = get_data(Service, {Partition, Node}),
|
|
?assertEqual(ok, file:write_file(File, io_lib:format("~p.", [Postings]))).
|
|
|
|
stash_kv(Key, Value, Stash) ->
|
|
dict:store(Key, Value, Stash).
|
|
|
|
stash_search({_I,{_F,_T}}=K, _Postings=V, Stash) ->
|
|
dict:append_list(K, V, Stash).
|
|
|
|
base_stash_path() ->
|
|
rt_config:get(rt_scratch_dir) ++ "/dev/data_stash/".
|
|
|
|
stash_path(Service, Partition) ->
|
|
base_stash_path() ++ atom_to_list(Service) ++ "/" ++ integer_to_list(Partition) ++ ".stash".
|
|
|
|
file_list(Dir) ->
|
|
filelib:wildcard(Dir ++ "/*").
|
|
|
|
wait_for_repair(_, _, 0) ->
|
|
throw(wait_for_repair_max_tries);
|
|
wait_for_repair(Service, {Partition, Node}, Tries) ->
|
|
Reply =
|
|
case Service of
|
|
riak_kv ->
|
|
rpc:call(Node, riak_kv_vnode, repair_status, [Partition]);
|
|
riak_search ->
|
|
rpc:call(Node, riak_search_vnode, repair_status, [Partition])
|
|
end,
|
|
case Reply of
|
|
not_found -> ok;
|
|
in_progress ->
|
|
timer:sleep(timer:seconds(1)),
|
|
wait_for_repair(Service, {Partition, Node}, Tries - 1)
|
|
end.
|
|
|
|
data_path(Node, Suffix, Partition) ->
|
|
[Name, _] = string:tokens(atom_to_list(Node), "@"),
|
|
Base = rt_config:get('rtdev_path.current') ++ "/dev/" ++ Name ++ "/data",
|
|
Base ++ "/" ++ Suffix ++ "/" ++ integer_to_list(Partition).
|
|
|
|
backend_mod_dir(undefined) ->
|
|
%% riak_test defaults to bitcask when undefined
|
|
backend_mod_dir(bitcask);
|
|
backend_mod_dir(bitcask) ->
|
|
{riak_kv_bitcask_backend, "bitcask"};
|
|
backend_mod_dir(eleveldb) ->
|
|
{riak_kv_eleveldb_backend, "leveldb"}.
|
|
|
|
|
|
-spec set_search_schema_nval(binary(), pos_integer()) -> ok.
|
|
set_search_schema_nval(Bucket, NVal) ->
|
|
%% TODO: Search currently offers no easy way to pragmatically
|
|
%% change a schema and save it. This is because the external and
|
|
%% internal formats of the schema are different. The parser reads
|
|
%% the external format and an internal representation is created
|
|
%% which is then stored/access via `riak_search_config'. Rather
|
|
%% than allowing the internal format to be modified and set you
|
|
%% must send the update in the external format.
|
|
BucketStr = binary_to_list(Bucket),
|
|
SearchCmd = ?FMT("~s/dev/dev1/bin/search-cmd", [rt_config:get('rtdev_path.current')]),
|
|
GetSchema = ?FMT("~s show-schema ~s > current-schema",
|
|
[SearchCmd, BucketStr]),
|
|
ModifyNVal = ?FMT("sed -E 's/n_val, [0-9]+/n_val, ~s/' "
|
|
"current-schema > new-schema",
|
|
[NVal]),
|
|
SetSchema = ?FMT("~s set-schema ~s new-schema", [SearchCmd, BucketStr]),
|
|
ClearCache = ?FMT("~s clear-schema-cache", [SearchCmd]),
|
|
?assertCmd(GetSchema),
|
|
?assertCmd(ModifyNVal),
|
|
?assertCmd(SetSchema),
|
|
?assertCmd(ClearCache).
|