riak_test/tests/partition_repair.erl
Christopher Meiklejohn 202e22495f Assume integer.
All of the replication tests assume that num_nodes will be an integer,
not a list.  Fix partition repair to do similar.  Also, change
ho_concurrency to be a integer as well.
2014-01-15 10:23:41 -05:00

338 lines
13 KiB
Erlang

%% -------------------------------------------------------------------
%%
%% Copyright (c) 2012 Basho Technologies, Inc.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(partition_repair).
-compile(export_all).
-include_lib("eunit/include/eunit.hrl").
-behavior(riak_test).
-export([confirm/0]).
-define(FMT(S, L), lists:flatten(io_lib:format(S, L))).
%% @doc This test verifies that partition repair successfully repairs
%% all data after it has wiped out by a simulated disk crash.
confirm() ->
SpamDir = rt_config:config_or_os_env(spam_dir),
RingSize = list_to_integer(rt_config:config_or_os_env(ring_size, "16")),
NVal = rt_config:config_or_os_env(n_val, undefined),
TestMetaData = riak_test_runner:metadata(),
KVBackend = proplists:get_value(backend, TestMetaData),
NumNodes = rt_config:config_or_os_env(num_nodes, 4),
HOConcurrency = rt_config:config_or_os_env(ho_concurrency, 2),
{_KVBackendMod, KVDataDir} = backend_mod_dir(KVBackend),
Bucket = <<"scotts_spam">>,
lager:info("Build a cluster"),
lager:info("riak_search enabled: true"),
lager:info("ring_creation_size: ~p", [RingSize]),
lager:info("n_val: ~p", [NVal]),
lager:info("num nodes: ~p", [NumNodes]),
lager:info("riak_core handoff_concurrency: ~p", [HOConcurrency]),
lager:info("riak_core vnode_management_timer 1000"),
Conf = [
{riak_core,
[
{ring_creation_size, RingSize},
{handoff_manager_timeout, 1000},
{vnode_management_timer, 1000},
{handoff_concurrency, HOConcurrency}
]},
{riak_search,
[
{enabled, true}
]},
%% @TODO This is only to test whether the test failure happens
%% without AAE. The AAE errors found in the logs could be unrelated
{riak_kv,
[
{anti_entropy, {off, []}}
]
}
%% {lager,
%% [{handlers,
%% [{lager_file_backend,
%% [{"./log/console.log",debug,10485760,"$D0",5}]}]}]}
],
Nodes = rt:build_cluster(NumNodes, Conf),
case NVal of
undefined ->
ok;
_ ->
lager:info("Set n_val to ~p", [NVal]),
set_search_schema_nval(Bucket, NVal)
end,
lager:info("Enable search hook"),
rt:enable_search_hook(hd(Nodes), Bucket),
lager:info("Insert Scott's spam emails"),
Pbc = rt:pbc(hd(Nodes)),
rt:pbc_put_dir(Pbc, Bucket, SpamDir),
lager:info("Stash ITFs for each partition"),
%% @todo Should riak_test guarantee that the scratch pad is clean instead?
?assertCmd("rm -rf " ++ base_stash_path()),
%% need to load the module so riak can see the fold fun
rt:load_modules_on_nodes([?MODULE], Nodes),
Ring = rt:get_ring(hd(Nodes)),
Owners = riak_core_ring:all_owners(Ring),
[stash_data(riak_search, Owner) || Owner <- Owners],
lager:info("Stash KV data for each partition"),
[stash_data(riak_kv, Owner) || Owner <- Owners],
lager:info("Emulate data loss for riak_search, repair, verify correct data"),
[kill_repair_verify(Owner, "merge_index", riak_search) || Owner <- Owners],
%% TODO: parameterize backend
lager:info("Emulate data loss for riak_kv, repair, verify correct data"),
[kill_repair_verify(Owner, KVDataDir, riak_kv) || Owner <- Owners],
lager:info("TEST PASSED"),
pass.
kill_repair_verify({Partition, Node}, DataSuffix, Service) ->
StashPath = stash_path(Service, Partition),
{ok, [Stash]} = file:consult(StashPath),
ExpectToVerify = dict:size(Stash),
VNodeName = list_to_atom(atom_to_list(Service) ++ "_vnode"),
%% kill the partition data
Path = DataSuffix ++ "/" ++ integer_to_list(Partition),
lager:info("Killing data for ~p on ~p at ~s", [Partition, Node, Path]),
rt:clean_data_dir([Node], Path),
%% force restart of vnode since some data is kept in memory
lager:info("Restarting ~p vnode for ~p on ~p", [Service, Partition, Node]),
{ok, Pid} = rpc:call(Node, riak_core_vnode_manager, get_vnode_pid,
[Partition, VNodeName]),
?assert(rpc:call(Node, erlang, exit, [Pid, kill_for_test])),
rt:wait_until(Node, fun(N) -> not(rpc:call(N, erlang, is_process_alive, [Pid])) end),
lager:info("Verify data is missing"),
?assertEqual(0, count_data(Service, {Partition, Node})),
%% repair the partition, ignore return for now
lager:info("Invoking repair for ~p on ~p", [Partition, Node]),
%% TODO: Don't ignore return, check version of Riak and if greater
%% or equal to 1.x then expect OK.
Return =
case Service of
riak_kv ->
rpc:call(Node, riak_kv_vnode, repair, [Partition]);
riak_search ->
rpc:call(Node, riak_search_vnode, repair, [Partition])
end,
%% Kill sending vnode to verify HO sender is killed
%% {ok, [{KPart, KNode}|_]} = Return,
%% {ok, NewPid} = rpc:call(KNode, riak_core_vnode_manager, get_vnode_pid,
%% [KPart, VNodeName]),
%% lager:info("killing src pid: ~p/~p ~p", [KNode, KPart, NewPid]),
%% KR = rpc:call(KNode, erlang, exit, [NewPid, kill]),
%% lager:info("result of kill: ~p", [KR]),
%% timer:sleep(1000),
%% ?assertNot(rpc:call(KNode, erlang, is_process_alive, [NewPid])),
lager:info("return value of repair_index ~p", [Return]),
lager:info("Wait for repair to finish"),
wait_for_repair(Service, {Partition, Node}, 30),
lager:info("Verify ~p on ~p is fully repaired", [Partition, Node]),
Data2 = get_data(Service, {Partition, Node}),
{Verified, NotFound} = dict:fold(verify(Service, Data2), {0, []}, Stash),
case NotFound of
[] -> ok;
_ ->
NF = StashPath ++ ".nofound",
lager:info("Some data not found, writing that to ~s", [NF]),
?assertEqual(ok, file:write_file(NF, io_lib:format("~p.", [NotFound])))
end,
%% NOTE: If the following assert fails then check the .notfound
%% file written above...it contains all postings that were in the
%% stash that weren't found after the repair.
?assertEqual({Service, ExpectToVerify}, {Service, Verified}),
{ok, [{BeforeP, _BeforeOwner}=B, _, {AfterP, _AfterOwner}=A]} = Return,
lager:info("Verify before src partition ~p still has data", [B]),
StashPathB = stash_path(Service, BeforeP),
{ok, [StashB]} = file:consult(StashPathB),
ExpectToVerifyB = dict:size(StashB),
BeforeData = get_data(Service, B),
{VerifiedB, NotFoundB} = dict:fold(verify(Service, BeforeData), {0, []}, StashB),
case NotFoundB of
[] -> ok;
_ ->
NFB = StashPathB ++ ".notfound",
?assertEqual(ok, file:write_file(NFB, io_lib:format("~p.", [NotFoundB]))),
throw({src_partition_missing_data, NFB})
end,
?assertEqual(ExpectToVerifyB, VerifiedB),
lager:info("Verify after src partition ~p still has data", [A]),
StashPathA = stash_path(Service, AfterP),
{ok, [StashA]} = file:consult(StashPathA),
ExpectToVerifyA = dict:size(StashA),
AfterData = get_data(Service, A),
{VerifiedA, NotFoundA} = dict:fold(verify(Service, AfterData), {0, []}, StashA),
case NotFoundA of
[] -> ok;
_ ->
NFA = StashPathA ++ ".notfound",
?assertEqual(ok, file:write_file(NFA, io_lib:format("~p.", [NotFoundA]))),
throw({src_partition_missing_data, NFA})
end,
?assertEqual(ExpectToVerifyA, VerifiedA).
verify(riak_kv, DataAfterRepair) ->
fun(BKey, StashedValue, {Verified, NotFound}) ->
StashedData={BKey, StashedValue},
case dict:find(BKey, DataAfterRepair) of
error -> {Verified, [StashedData|NotFound]};
{ok, Value} ->
if Value == StashedValue -> {Verified+1, NotFound};
true -> {Verified, [StashedData|NotFound]}
end
end
end;
verify(riak_search, PostingsAfterRepair) ->
fun(IFT, StashedPostings, {Verified, NotFound}) ->
StashedPosting={IFT, StashedPostings},
case dict:find(IFT, PostingsAfterRepair) of
error -> {Verified, [StashedPosting|NotFound]};
{ok, RepairedPostings} ->
case lists:all(fun is_true/1,
[lists:member(P, RepairedPostings)
|| P <- StashedPostings]) of
true -> {Verified+1, NotFound};
false -> {Verified, [StashedPosting|NotFound]}
end
end
end.
is_true(X) ->
X == true.
count_data(Service, {Partition, Node}) ->
dict:size(get_data(Service, {Partition, Node})).
get_data(Service, {Partition, Node}) ->
VMaster =
case Service of
riak_kv -> riak_kv_vnode_master;
riak_search -> riak_search_vnode_master
end,
%% TODO: add compile time support for riak_test
Req =
case Service of
riak_kv ->
{riak_core_fold_req_v1, fun stash_kv/3, dict:new()};
riak_search ->
{riak_core_fold_req_v1, fun stash_search/3, dict:new()}
end,
Data = riak_core_vnode_master:sync_command({Partition, Node},
Req,
VMaster,
rt_config:get(rt_max_wait_time)),
Data.
stash_data(Service, {Partition, Node}) ->
File = stash_path(Service, Partition),
?assertEqual(ok, filelib:ensure_dir(File)),
lager:info("Stashing ~p/~p at ~p to ~p", [Service, Partition, Node, File]),
Postings = get_data(Service, {Partition, Node}),
?assertEqual(ok, file:write_file(File, io_lib:format("~p.", [Postings]))).
stash_kv(Key, Value, Stash) ->
dict:store(Key, Value, Stash).
stash_search({_I,{_F,_T}}=K, _Postings=V, Stash) ->
dict:append_list(K, V, Stash).
base_stash_path() ->
rt_config:get(rt_scratch_dir) ++ "/dev/data_stash/".
stash_path(Service, Partition) ->
base_stash_path() ++ atom_to_list(Service) ++ "/" ++ integer_to_list(Partition) ++ ".stash".
file_list(Dir) ->
filelib:wildcard(Dir ++ "/*").
wait_for_repair(_, _, 0) ->
throw(wait_for_repair_max_tries);
wait_for_repair(Service, {Partition, Node}, Tries) ->
Reply =
case Service of
riak_kv ->
rpc:call(Node, riak_kv_vnode, repair_status, [Partition]);
riak_search ->
rpc:call(Node, riak_search_vnode, repair_status, [Partition])
end,
case Reply of
not_found -> ok;
in_progress ->
timer:sleep(timer:seconds(1)),
wait_for_repair(Service, {Partition, Node}, Tries - 1)
end.
data_path(Node, Suffix, Partition) ->
[Name, _] = string:tokens(atom_to_list(Node), "@"),
Base = rt_config:get('rtdev_path.current') ++ "/dev/" ++ Name ++ "/data",
Base ++ "/" ++ Suffix ++ "/" ++ integer_to_list(Partition).
backend_mod_dir(undefined) ->
%% riak_test defaults to bitcask when undefined
backend_mod_dir(bitcask);
backend_mod_dir(bitcask) ->
{riak_kv_bitcask_backend, "bitcask"};
backend_mod_dir(eleveldb) ->
{riak_kv_eleveldb_backend, "leveldb"}.
-spec set_search_schema_nval(binary(), pos_integer()) -> ok.
set_search_schema_nval(Bucket, NVal) ->
%% TODO: Search currently offers no easy way to pragmatically
%% change a schema and save it. This is because the external and
%% internal formats of the schema are different. The parser reads
%% the external format and an internal representation is created
%% which is then stored/access via `riak_search_config'. Rather
%% than allowing the internal format to be modified and set you
%% must send the update in the external format.
BucketStr = binary_to_list(Bucket),
SearchCmd = ?FMT("~s/dev/dev1/bin/search-cmd", [rt_config:get('rtdev_path.current')]),
GetSchema = ?FMT("~s show-schema ~s > current-schema",
[SearchCmd, BucketStr]),
ModifyNVal = ?FMT("sed -E 's/n_val, [0-9]+/n_val, ~s/' "
"current-schema > new-schema",
[NVal]),
SetSchema = ?FMT("~s set-schema ~s new-schema", [SearchCmd, BucketStr]),
ClearCache = ?FMT("~s clear-schema-cache", [SearchCmd]),
?assertCmd(GetSchema),
?assertCmd(ModifyNVal),
?assertCmd(SetSchema),
?assertCmd(ClearCache).