riak_test/tests/ts_simple_aggregation.erl

200 lines
7.4 KiB
Erlang

%% -------------------------------------------------------------------
%%
%% Copyright (c) 2016 Basho Technologies, Inc.
%%
%% This file is provided to you under the Apache License,
%% Version 2.0 (the "License"); you may not use this file
%% except in compliance with the License. You may obtain
%% a copy of the License at
%%
%% http://www.apache.org/licenses/LICENSE-2.0
%%
%% Unless required by applicable law or agreed to in writing,
%% software distributed under the License is distributed on an
%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
%% KIND, either express or implied. See the License for the
%% specific language governing permissions and limitations
%% under the License.
%%
%% -------------------------------------------------------------------
-module(ts_simple_aggregation).
-behavior(riak_test).
-export([
confirm/0,
verify_aggregation/1
]).
-include_lib("eunit/include/eunit.hrl").
-define(TEMPERATURE_COL_INDEX, 4).
-define(PRESSURE_COL_INDEX, 5).
-define(PRECIPITATION_COL_INDEX, 6).
%% Test basic aggregation functions
confirm() ->
verify_aggregation(single),
pass.
stddev_fun_builder(Avg) ->
fun(X, Acc) -> Acc + (Avg-X)*(Avg-X) end.
test_name(ClusterType, Name) ->
lists:flatten(io_lib:format("~p:~p", [atom_to_list(ClusterType), Name])).
verify_aggregation(ClusterType) ->
Size = case ClusterType of
single ->
1;
multiple ->
3;
one_down ->
3;
delayed_one_down ->
3;
_ ->
1
end,
DDL = ts_data:get_ddl(aggregation),
lager:info("DDL is ~p", [DDL]),
Cluster = ts_setup:start_cluster(Size),
Conn = ts_setup:conn(Cluster),
Count = 10,
Data = ts_data:get_valid_aggregation_data(Count),
lager:info("Data is ~p", [Data]),
Column4 = [element(?TEMPERATURE_COL_INDEX, X) || X <- Data],
Column5 = [element(?PRESSURE_COL_INDEX, X) || X <- Data],
Column6 = [element(?PRECIPITATION_COL_INDEX, X) || X <- Data],
Bucket = "WeatherData",
Where = " WHERE myfamily = 'family1' and myseries = 'seriesX' and time >= 1 and time <= 10",
Qry = "SELECT COUNT(myseries) FROM " ++ Bucket ++ Where,
ts_setup:create_bucket_type(Cluster, DDL, Bucket),
ts_setup:activate_bucket_type(Cluster, Bucket),
%% Degraded clusters need to have DDL applied BEFORE taking down a node
case ClusterType of
delayed_one_down ->
rt:stop(hd(tl(Cluster)));
one_down ->
rt:stop(hd(tl(Cluster)));
_ -> ok
end,
ok = riakc_ts:put(Conn, Bucket, Data),
Got = ts_ops:query(Cluster, Qry),
Expected = {ok, {[<<"COUNT(myseries)">>], [{Count}]}},
Result = ts_data:assert(test_name(ClusterType, "Count Strings"), Expected, Got),
Qry2 = "SELECT COUNT(time) FROM " ++ Bucket ++ Where,
Got2 = ts_ops:query(Cluster, Qry2),
Expected2 = {ok, {[<<"COUNT(time)">>], [{Count}]}},
Result2 = ts_data:assert(test_name(ClusterType, "Count Timestamps"), Expected2, Got2),
Qry3 = "SELECT COUNT(pressure), count(temperature), cOuNt(precipitation) FROM " ++ Bucket ++ Where,
Got3 = ts_ops:query(Cluster, Qry3),
Expected3 = {ok, {
[<<"COUNT(pressure)">>,
<<"COUNT(temperature)">>,
<<"COUNT(precipitation)">>
],
[{count_non_nulls(Column5),
count_non_nulls(Column4),
count_non_nulls(Column6)}]}},
Result3 = ts_data:assert(test_name(ClusterType, "Count Multiple Floats"), Expected3, Got3),
Qry4 = "SELECT SUM(temperature) FROM " ++ Bucket ++ Where,
Got4 = ts_ops:query(Cluster, Qry4),
Sum4 = lists:sum([X || X <- Column4, is_number(X)]),
Expected4 = {ok, {[<<"SUM(temperature)">>],
[{Sum4}]}},
Result4 = ts_data:assert(test_name(ClusterType, "Single Float Sum"), Expected4, Got4),
Qry5 = "SELECT SUM(temperature), sum(pressure), sUM(precipitation) FROM " ++ Bucket ++ Where,
Got5 = ts_ops:query(Cluster, Qry5),
Sum5 = lists:sum([X || X <- Column5, is_number(X)]),
Sum6 = lists:sum([X || X <- Column6, is_number(X)]),
Expected5 = {ok, {[<<"SUM(temperature)">>, <<"SUM(pressure)">>, <<"SUM(precipitation)">>],
[{Sum4, Sum5, Sum6}]}},
Result5 = ts_data:assert(test_name(ClusterType, "Multiple Float Sums"), Expected5, Got5),
Qry6 = "SELECT MIN(temperature), MIN(pressure) FROM " ++ Bucket ++ Where,
Got6 = ts_ops:query(Cluster, Qry6),
Min4 = lists:min([X || X <- Column4, is_number(X)]),
Min5 = lists:min([X || X <- Column5, is_number(X)]),
Expected6 = {ok, {[<<"MIN(temperature)">>, <<"MIN(pressure)">>],
[{Min4, Min5}]}},
Result6 = ts_data:assert(test_name(ClusterType, "Min Floats"), Expected6, Got6),
Qry7 = "SELECT MAX(temperature), MAX(pressure) FROM " ++ Bucket ++ Where,
Got7 = ts_ops:query(Cluster, Qry7),
Max4 = lists:max([X || X <- Column4, is_number(X)]),
Max5 = lists:max([X || X <- Column5, is_number(X)]),
Expected7 = {ok, {[<<"MAX(temperature)">>, <<"MAX(pressure)">>],
[{Max4, Max5}]}},
Result7 = ts_data:assert(test_name(ClusterType, "Max Floats"), Expected7, Got7),
C4 = [X || X <- Column4, is_number(X)],
C5 = [X || X <- Column5, is_number(X)],
Count4 = length(C4),
Count5 = length(C5),
Avg4 = Sum4 / Count4,
Avg5 = Sum5 / Count5,
Qry8 = "SELECT AVG(temperature), MEAN(pressure) FROM " ++ Bucket ++ Where,
Got8 = ts_ops:query(Cluster, Qry8),
Expected8 = {ok, {[<<"AVG(temperature)">>, <<"MEAN(pressure)">>],
[{Avg4, Avg5}]}},
Result8 = ts_data:assert(test_name(ClusterType, "Avg and Mean"), Expected8, Got8),
StdDevFun4 = stddev_fun_builder(Avg4),
StdDevFun5 = stddev_fun_builder(Avg5),
StdDev4 = math:sqrt(lists:foldl(StdDevFun4, 0, C4) / Count4),
StdDev5 = math:sqrt(lists:foldl(StdDevFun5, 0, C5) / Count5),
Sample4 = math:sqrt(lists:foldl(StdDevFun4, 0, C4) / (Count4-1)),
Sample5 = math:sqrt(lists:foldl(StdDevFun5, 0, C5) / (Count5-1)),
POPSTD = 2.8722813232690143, %%this is the std of 1-10, calculated using numpy
Qry9 = "SELECT STDDEV_POP(temperature), STDDEV_POP(pressure)," ++
" STDDEV(temperature), STDDEV(pressure), " ++
" STDDEV_SAMP(temperature), STDDEV_SAMP(pressure)," ++
" STDDEV_POP(time) FROM " ++ Bucket ++ Where,
Got9 = ts_ops:query(Cluster, Qry9),
Expected9 = {ok, {[<<"STDDEV_POP(temperature)">>, <<"STDDEV_POP(pressure)">>,
<<"STDDEV(temperature)">>, <<"STDDEV(pressure)">>,
<<"STDDEV_SAMP(temperature)">>, <<"STDDEV_SAMP(pressure)">>, <<"STDDEV_POP(time)">>],
[{StdDev4, StdDev5, Sample4, Sample5, Sample4, Sample5, POPSTD}]}},
Result9 = ts_data:assert_float(test_name(ClusterType, "Standard Deviation"), Expected9, Got9),
Qry10 = "SELECT SUM(temperature), MIN(pressure), AVG(pressure) FROM " ++ Bucket ++ Where,
Got10 = ts_ops:query(Cluster, Qry10),
Expected10 = {ok, {[<<"SUM(temperature)">>, <<"MIN(pressure)">>, <<"AVG(pressure)">>],
[{Sum4, Min5, Avg5}]}},
Result10 = ts_data:assert(test_name(ClusterType, "Mixter Maxter"), Expected10, Got10),
ts_data:results([
Result,
Result2,
Result3,
Result4,
Result5,
Result6,
Result7,
Result8,
Result9,
Result10
]),
riakc_pb_socket:stop(Conn),
Cluster.
count_non_nulls(Col) ->
length([V || V <- Col, V =/= []]).