TD-788: Adds metrics collectors for hackney and ranch stats (#30)

* Adds ranch active_connections metric
* Adds collectors test checks
* Adds explicit 'return' clause to label helper func
* Rename collector modules for consistency
* Adds event handler option for RPC events severity mapping
* Adds events severity mapping description
This commit is contained in:
Aleksey Kashapov 2023-12-19 15:17:08 +03:00 committed by GitHub
parent 5d46291a6b
commit 2cbe19998c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 573 additions and 83 deletions

107
README.md
View File

@ -168,6 +168,80 @@ check_loan_limits(Limits, Context, N) ->
### Woody Event Handler
Интерфейс для получения и логирования событий RPC библиотеки. Также содержит вспомогательные функции для удобного форматирования событий. Пример реализации _event handler_'а - [woody_event_handler_default.erl](src/woody_event_handler_default.erl).
Через опции обработчика можно сообщить параметры соответствия событий RPC для уровня логирования:
``` erlang
woody_event_handler_default:handle_event(Event, RpcId, Meta, #{
formatter_opts => ...,
events_severity => #{
['call service'] => debug,
...
}
}).
```
Где эти параметры имеют значения по умолчанию в следующем виде:
``` erlang
#{
events_severity => #{
%% Пограничные события работы клиента
['client begin'] => debug,
['client end'] => debug,
%% Начало вызова сервиса, перед формированием запроса
['call service'] => info,
%% Результат вызова сервиса на клиенте
['service result'] => info,
['service result', error] => error,
%% Событие состоявшегося вызова с возвращённой ошибкой в качестве
%% результата
['service result', warning] => warning,
%% Клиентские события, включая обнаружения хоста
['client send'] => debug,
['client resolve begin'] => debug,
['client resolve result'] => debug,
['client receive'] => debug,
['client receive', error] => warning,
%% Непосредственные события обслуживания запроса сервером
['server receive'] => debug,
['server send'] => debug,
['server send', error] => warning,
%% Начало обслуживания вызова функции сервиса
['invoke service handler'] => info,
%% Завершение обслуживание вызова с разным итогом
['service handler result'] => info,
['service handler result', error, business] => info,
['service handler result', error, system] => error,
%% Обслуживание вызова завершилось поправимой ошибкой;
%% по крайней мере она не в рамках бизнес-логики но и не системное
%% исключение
['service handler result', warning] => warning,
%% События кеширующей обертки клиента
['client cache begin'] => debug,
['client cache end'] => debug,
['client cache hit'] => info,
['client cache miss'] => debug,
['client cache update'] => debug,
['client cache result'] => debug,
%% Внутренние ошибки с разным контекстом/происхождением
['internal error', system] => error,
['internal error', business] => warning,
%% Событие трассировки на уровне woody, см. далее
['trace event'] => debug
}
}.
```
### Tracing
Можно динамически включать и выключать трассировку http запросов и ответов.
@ -176,3 +250,36 @@ check_loan_limits(Limits, Context, N) ->
application:set_env(woody, trace_http_server, true).
application:unset_env(woody, trace_http_server).
```
### Prometheus metrics
Чтобы осуществлять экспорт метрик следует добавить [соответствующий хэндлер](https://github.com/deadtrickster/prometheus-cowboy#exporting-metrics-with-handlers) для cowboy-сервера.
``` erlang
{deps, [
...
{prometheus_cowboy, "0.1.8"}
]}
```
Для сбора серверных метрик необходимо на старте приложения объявить их
``` erlang
ok = woody_ranch_prometheus_collector:setup()
```
Если дополнительно интересуют все метрики ковбоя то можно добавить [реализацию обсервера из библиотеки](https://github.com/deadtrickster/prometheus-cowboy?tab=readme-ov-file#exporting-cowboy2-metrics).
Для сбора клиентских метрик необходимо на старте приложения объявить их
``` erlang
ok = woody_hackney_prometheus_collector:setup()
```
Это будет публиковать целочисленные значения в шкале 'woody_hackney_pool_usage' с метками `pool` в качестве названия пула и `status` в качестве параметра соответствующего значения:
- `in_use_count` -- используемые соединения в пуле;
- `free_count` -- свободное количество в пуле;
- `queue_count` -- очередь за свободными соединенеиями
**TODO** Возможно стоит рассмотреть публикацию метрик по количеству исполняемых запросов в общем, с разбивкой по хосту и количества новых и переиспользуемых соедининий в каждом из пулов. [Хакни это предоставляет](https://github.com/benoitc/hackney/tree/1.18.0#metrics).

View File

@ -33,6 +33,7 @@
{thrift, {git, "https://github.com/valitydev/thrift_erlang.git", {branch, "master"}}},
{snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}},
{genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}},
{prometheus, "4.8.1"},
{opentelemetry_api, "1.2.1"}
]}.
@ -81,9 +82,6 @@
{deps, [
{cth_readable, "1.4.9"},
{proper, "1.4.0"},
{woody_api_hay,
{git, "https://github.com/valitydev/woody_api_hay.git",
{ref, "4c39134cddaa9bf6fb8db18e7030ae64f1efb3a9"}}},
{damsel,
{git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}},
{mg_proto,
@ -92,7 +90,7 @@
{opentelemetry, "1.3.0"}
]},
{dialyzer, [
{plt_extra_apps, [how_are_you, eunit, proper, common_test, cth_readable, opentelemetry]}
{plt_extra_apps, [prometheus, eunit, proper, common_test, cth_readable, opentelemetry]}
]}
]}
]}.

View File

@ -17,6 +17,8 @@
{pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>},
1},
{<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1},
{<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0},
{<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1},
{<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1},
{<<"snowflake">>,
{git,"https://github.com/valitydev/snowflake.git",
@ -42,6 +44,8 @@
{<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>},
{<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>},
{<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>},
{<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>},
{<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>},
{<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>},
{<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>},
{<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]},
@ -58,6 +62,8 @@
{<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>},
{<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>},
{<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>},
{<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>},
{<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>},
{<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>},
{<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>},
{<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]}

View File

@ -4,6 +4,7 @@
-export([handle_event/3, handle_event/4]).
-export([get_event_severity/2]).
-export([get_event_severity/3]).
-export([format_event/3, format_event/4]).
-export([format_meta/2, format_meta/3]).
-export([format_rpc_id/1]).
@ -221,7 +222,8 @@
-export_type([meta/0]).
-type options() :: #{
formatter_opts => woody_event_formatter:opts()
formatter_opts => woody_event_formatter:opts(),
events_severity => #{nonempty_list(atom()) => severity()}
}.
-export_type([options/0]).
@ -379,68 +381,77 @@ format_event(UnknownEventType, Meta, _Opts) ->
{" unknown woody event type '~s' with meta ~p", [UnknownEventType, Meta]}.
-spec get_event_severity(event(), event_meta()) -> severity().
get_event_severity(?EV_CLIENT_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_END, _Meta) ->
debug;
get_event_severity(?EV_CALL_SERVICE, _Meta) ->
info;
get_event_severity(?EV_SERVICE_RESULT, #{status := ok}) ->
info;
get_event_severity(?EV_SERVICE_RESULT, #{status := error, stack := _Stack}) ->
error;
get_event_severity(?EV_SERVICE_RESULT, #{status := error, result := _Error}) ->
warning;
get_event_severity(?EV_CLIENT_SEND, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RESOLVE_RESULT, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RECEIVE, #{status := ok}) ->
debug;
get_event_severity(?EV_CLIENT_RECEIVE, #{status := error}) ->
warning;
get_event_severity(?EV_SERVER_RECEIVE, _Meta) ->
debug;
get_event_severity(?EV_SERVER_SEND, #{status := ok}) ->
debug;
get_event_severity(?EV_SERVER_SEND, #{status := error}) ->
warning;
get_event_severity(?EV_INVOKE_SERVICE_HANDLER, _Meta) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := ok}) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := business}) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, stack := _Stack}) ->
error;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, result := _Error}) ->
warning;
get_event_severity(?EV_CLIENT_CACHE_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_END, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_HIT, _Meta) ->
info;
get_event_severity(?EV_CLIENT_CACHE_MISS, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_UPDATE, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_RESULT, _Meta) ->
debug;
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, class := _Class, stack := _Stack}) ->
error;
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, reason := _Reason}) ->
warning;
get_event_severity(?EV_TRACE, _Meta) ->
debug;
get_event_severity(_UnknownEventType, _Meta) ->
warning.
get_event_severity(Event, Meta) ->
get_event_severity(Event, Meta, #{}).
-spec get_event_severity(event(), event_meta(), options()) -> severity().
get_event_severity(?EV_CLIENT_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_END = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CALL_SERVICE = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, stack := _Stack}, Opts) ->
map_severity([Event, error], Opts, error);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, result := _Error}, Opts) ->
map_severity([Event, warning], Opts, warning);
get_event_severity(?EV_CLIENT_SEND = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RESOLVE_RESULT = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := error}, Opts) ->
map_severity([Event, error], Opts, warning);
get_event_severity(?EV_SERVER_RECEIVE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_SERVER_SEND = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_SERVER_SEND = Event, #{status := error}, Opts) ->
map_severity([Event, error], Opts, warning);
get_event_severity(?EV_INVOKE_SERVICE_HANDLER = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := business}, Opts) ->
map_severity([Event, error, business], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, stack := _Stack}, Opts) ->
map_severity([Event, error, system], Opts, error);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, result := _Error}, Opts) ->
map_severity([Event, warning], Opts, warning);
get_event_severity(?EV_CLIENT_CACHE_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_END = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_HIT = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_CLIENT_CACHE_MISS = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_UPDATE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_RESULT = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, class := _Class, stack := _Stack}, Opts) ->
map_severity([Event, system], Opts, error);
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, reason := _Reason}, Opts) ->
map_severity([Event, business], Opts, warning);
get_event_severity(?EV_TRACE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(UnknownEventType, _Meta, Opts) ->
map_severity([UnknownEventType], Opts, warning).
%%
%% Internal functions
%%
-spec map_severity(nonempty_list(atom()), options(), severity()) -> severity().
map_severity(Code, Opts, Default) ->
EventsSeverity = maps:get(events_severity, Opts, #{}),
maps:get(Code, EventsSeverity, Default).
-spec format_service_request(map(), options()) -> msg().
format_service_request(#{service_schema := {Module, Service}, function := Function, args := Args}, Opts) ->
woody_event_formatter:format_call(Module, Service, Function, Args, get_formatter_opts(Opts)).
@ -1582,4 +1593,157 @@ exception_test_() ->
)
].
-spec event_severity_defaults_test_() -> _.
event_severity_defaults_test_() ->
Opts = #{events_severity => default_severity_map()},
Meta = #{role => server},
[
?_assertEqual(debug, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(error, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
?_assertEqual(info, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(
info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
),
?_assertEqual(
error,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
),
?_assertEqual(
warning,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
?_assertEqual(
error, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
),
?_assertEqual(warning, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_TRACE, Meta, Opts))
].
-spec event_severity_all_info_test_() -> _.
event_severity_all_info_test_() ->
ExpectAll = info,
Opts = #{events_severity => replace_all_with(ExpectAll, default_severity_map())},
Meta = #{role => server},
[
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(
ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
),
?_assertEqual(
ExpectAll,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
),
?_assertEqual(
ExpectAll,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
?_assertEqual(
ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
),
?_assertEqual(ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_TRACE, Meta, Opts))
].
replace_all_with(NewSeverity, SeverityMap) ->
maps:map(fun(_K, _V) -> NewSeverity end, SeverityMap).
default_severity_map() ->
#{
['client begin'] => debug,
['client end'] => debug,
['call service'] => info,
['service result'] => info,
['service result', error] => error,
['service result', warning] => warning,
['client send'] => debug,
['client resolve begin'] => debug,
['client resolve result'] => debug,
['client receive'] => debug,
['client receive', error] => warning,
['server receive'] => debug,
['server send'] => debug,
['server send', error] => warning,
['invoke service handler'] => info,
['service handler result'] => info,
['service handler result', error, business] => info,
['service handler result', error, system] => error,
['service handler result', warning] => warning,
['client cache begin'] => debug,
['client cache end'] => debug,
['client cache hit'] => info,
['client cache miss'] => debug,
['client cache update'] => debug,
['client cache result'] => debug,
['internal error', system] => error,
['internal error', business] => warning,
['trace event'] => debug
}.
-endif.

View File

@ -22,7 +22,8 @@
handle_event(Event, RpcId, Meta, Opts) ->
EHOpts = get_event_handler_opts(Opts),
{Format, Msg} = woody_event_handler:format_event(Event, Meta, RpcId, EHOpts),
Function = get_logger_function(woody_event_handler:get_event_severity(Event, Meta)),
Severity = woody_event_handler:get_event_severity(Event, Meta, Opts),
Function = get_logger_function(Severity),
_ = error_logger:Function(Format, Msg),
ok.

View File

@ -0,0 +1,83 @@
-module(woody_hackney_prometheus_collector).
-export([setup/0]).
%%
-behaviour(prometheus_collector).
-export([collect_mf/2]).
-export([collect_metrics/2]).
-export([deregister_cleanup/1]).
-define(POOL_USAGE, woody_hackney_pool_usage).
%% Installation
%% @doc Installs custom collector for hackney's pool metrics
-spec setup() -> ok.
setup() ->
prometheus_registry:register_collector(registry(), ?MODULE).
%% Collector API
-type data() :: [data_item()].
-type data_item() :: {data_labels(), non_neg_integer()}.
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
-type pool_stats() :: [{atom(), any()}].
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
collect_mf(_Registry, Callback) ->
F = fun({Pool, _Pid}) ->
make_pool_data(Pool, get_pool_stats(Pool))
end,
Data = lists:flatten(lists:map(F, get_hackney_pools())),
Callback(create_gauge(Data)).
-spec collect_metrics(prometheus_metric:name(), data()) ->
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
collect_metrics(_Name, Data) ->
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
deregister_cleanup(_Registry) ->
%% Nothing to clean up
ok.
%% Private
registry() ->
default.
get_pool_stats(Pool) ->
%% NOTE It looks like 'hackney_pool' table data can occasionally contain
%% dead pools
try
hackney_pool:get_stats(Pool)
catch
%% "Time to make the chimi-fuckin'-changas."
exit:{noproc, _Reason} ->
[]
end.
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
create_gauge(Data) ->
Help = "Connection pool status by used, free and queued connections count",
prometheus_model_helpers:create_mf(?POOL_USAGE, Help, gauge, ?MODULE, Data).
-spec make_pool_data(atom(), pool_stats()) -> data().
make_pool_data(Pool, Stats0) ->
Stats1 = maps:with([in_use_count, free_count, queue_count], maps:from_list(Stats0)),
lists:foldl(fun({S, V}, Data) -> [make_data_item(Pool, S, V) | Data] end, [], maps:to_list(Stats1)).
make_data_item(Pool, Status, Value) ->
Labels = [{pool, Pool}, {status, Status}],
{Labels, Value}.
get_hackney_pools() ->
%% Shamelessly pasted from
%% https://github.com/soundtrackyourbrand/prometheus-hackney-collector
%%
%% Basically, we rely on not publicly exposed table containing started pools
%% under hackney's hood.
ets:tab2list(hackney_pool).

View File

@ -0,0 +1,83 @@
-module(woody_ranch_prometheus_collector).
-export([setup/0]).
%%
-behaviour(prometheus_collector).
-export([collect_mf/2]).
-export([collect_metrics/2]).
-export([deregister_cleanup/1]).
-define(CONNECTIONS, woody_ranch_listener_connections).
%% Installation
-spec setup() -> ok.
setup() ->
prometheus_registry:register_collector(registry(), ?MODULE).
%% Collector API
-type data() :: [data_item()].
-type data_item() :: {data_labels(), non_neg_integer()}.
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
-type maybe_woody_server_ref() :: {module(), ID :: atom()} | ranch:ref().
-type ranch_info() ::
[{maybe_woody_server_ref(), [{atom(), any()}]}]
| #{maybe_woody_server_ref() => #{atom() => any()}}.
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
collect_mf(_Registry, Callback) ->
F = fun({ListenerRef, ListenerInfo}) ->
make_listener_data(ListenerRef, ListenerInfo)
end,
Data = lists:flatten(lists:map(F, get_listeners_info())),
Callback(create_gauge(Data)).
-spec collect_metrics(prometheus_metric:name(), data()) ->
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
collect_metrics(_Name, Data) ->
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
deregister_cleanup(_Registry) ->
%% Nothing to clean up
ok.
%% Private
registry() ->
default.
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
create_gauge(Data) ->
Help = "Number of active connections",
prometheus_model_helpers:create_mf(?CONNECTIONS, Help, gauge, ?MODULE, Data).
-spec make_listener_data(maybe_woody_server_ref(), #{atom() => any()}) -> data().
make_listener_data(Ref, #{active_connections := V}) ->
Labels = [{listener, Ref}],
[{Labels, V}];
make_listener_data(_Ref, _Info) ->
[].
get_listeners_info() ->
lists:filter(
fun
({_Ref, #{active_connections := _}}) -> true;
(_Else) -> false
end,
%% See https://ninenines.eu/docs/en/ranch/1.8/guide/listeners/#_obtaining_information_about_listeners
normalize_listeners_info(ranch:info())
).
-dialyzer({no_match, normalize_listeners_info/1}).
-spec normalize_listeners_info(ranch_info()) -> [{maybe_woody_server_ref(), #{atom() => any()}}].
%% Ranch v2
normalize_listeners_info(#{} = Info) ->
maps:to_list(Info);
%% Ranch v1
normalize_listeners_info(Info) ->
lists:map(fun({Ref, ListenerInfo}) -> {Ref, maps:from_list(ListenerInfo)} end, Info).

View File

@ -5,6 +5,7 @@
-include_lib("hackney/include/hackney_lib.hrl").
-include_lib("opentelemetry_api/include/opentelemetry.hrl").
-include_lib("opentelemetry/include/otel_span.hrl").
-include_lib("prometheus/include/prometheus_model.hrl").
-include("woody_test_thrift.hrl").
-include("woody_defs.hrl").
@ -280,6 +281,8 @@ cross_test_groups() ->
}
}.
-define(RANCH_REF, woody_ct).
%%
%% starting/stopping
%%
@ -287,21 +290,12 @@ init_per_suite(C) ->
% dbg:tracer(), dbg:p(all, c),
% dbg:tpl({woody_joint_workers, '_', '_'}, x),
%%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]),
application:set_env(hackney, mod_metrics, woody_client_metrics),
application:set_env(woody, woody_client_metrics_options, #{
metric_key_mapping => #{
[hackney, nb_requests] => [hackney, requests_in_process]
}
}),
application:set_env(how_are_you, metrics_handlers, [
{woody_api_hay, #{
interval => 1000
}}
]),
{ok, Apps} = application:ensure_all_started(woody),
{ok, HayApps} = application:ensure_all_started(how_are_you),
{ok, MetricsApps} = application:ensure_all_started(prometheus),
ok = woody_ranch_prometheus_collector:setup(),
ok = woody_hackney_prometheus_collector:setup(),
{ok, OtelApps} = setup_opentelemetry(),
[{apps, OtelApps ++ HayApps ++ Apps} | C].
[{apps, OtelApps ++ MetricsApps ++ Apps} | C].
end_per_suite(C) ->
% unset so it won't report metrics next suite
@ -334,21 +328,21 @@ init_per_testcase(find_multiple_pools_test, C) ->
{ok, Sup} = start_tc_sup(),
Pool1 = {swords, 15000, 100},
Pool2 = {shields, undefined, 50},
ok = start_woody_server_with_pools(woody_ct, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
ok = start_woody_server_with_pools(?RANCH_REF, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
[{sup, Sup} | C];
init_per_testcase(calls_with_cache, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = start_caching_client(caching_client_ct, Sup),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
[{sup, Sup} | C];
init_per_testcase(server_handled_client_timeout_test, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = supervisor:start_child(Sup, server_timeout_event_handler:child_spec()),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
[{sup, Sup} | C];
init_per_testcase(_, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
[{sup, Sup} | C].
init_per_group(woody_resolver, Config) ->
@ -381,7 +375,7 @@ start_error_server(TC, Sup) ->
Code = get_fail_code(TC),
Dispatch = cowboy_router:compile([{'_', [{?PATH_WEAPONS, ?MODULE, Code}]}]),
Server = ranch:child_spec(
woody_ct,
?RANCH_REF,
ranch_tcp,
[{ip, ?SERVER_IP}, {port, ?SERVER_PORT}],
cowboy_clear,
@ -826,7 +820,18 @@ call_fail_w_no_headers(Id, Class, _Code) ->
find_multiple_pools_test(_) ->
true = is_pid(hackney_pool:find_pool(swords)),
true = is_pid(hackney_pool:find_pool(shields)).
true = is_pid(hackney_pool:find_pool(shields)),
MF = smuggle_mf_return_value(fun(F) -> woody_hackney_prometheus_collector:collect_mf(default, F) end),
%% We can't know order and values from hackney's pool info, but we can
%% expect that values for those pools must be provided. However exact number
%% of values can vary based on order of testcase execution and other
%% side-effects.
ValuesCount = length([swords, shields]) * length([queue_count, in_use_count, free_count]),
?assertMatch(
#'MetricFamily'{type = 'GAUGE', name = <<"woody_hackney_pool_usage">>, metric = Values} when
length(Values) >= ValuesCount,
MF
).
call_thrift_multiplexed_test(_) ->
Client = make_thrift_multiplexed_client(
@ -1229,6 +1234,7 @@ gun_test_basic(Id, Gun, Expect, C) ->
catch
Class:Reason -> ok
end,
ok = assert_connections_metrics(),
check_msg(Gun, Context).
get_except({ok, _}) ->
@ -1350,3 +1356,45 @@ mk_otel_attributes(Attributes) ->
SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(),
SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(),
otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit).
assert_connections_metrics() ->
MF = smuggle_mf_return_value(fun(F) -> woody_ranch_prometheus_collector:collect_mf(default, F) end),
%% Sadly we can't match on listener ref since its an iodata representation of tuple.
?assertMatch(
#'MetricFamily'{
name = <<"woody_ranch_listener_connections">>,
type = 'GAUGE',
metric = [
#'Metric'{
label = [#'LabelPair'{name = <<"listener">>, value = _Ref}],
gauge = #'Gauge'{value = V}
}
]
} when V >= 1,
MF
),
%% NOTE See `prometheus_model_helpers:ensure_binary_or_string/1`
RefImpl1 = io_lib:format("~p", [{woody_server_thrift_http_handler, ?RANCH_REF}]),
RefImpl2 = io_lib:format("~p", [{woody_server_thrift_v2, ?RANCH_REF}]),
?assertMatch(
Expected when Expected =:= RefImpl1 orelse Expected =:= RefImpl2,
get_metric_ref(hd(MF#'MetricFamily'.metric))
).
get_metric_ref(#'Metric'{label = Labels}) ->
get_metric_ref_(Labels).
get_metric_ref_([]) ->
undefined;
get_metric_ref_([#'LabelPair'{name = <<"listener">>, value = Ref} | _]) ->
Ref;
get_metric_ref_([_Label | Labels]) ->
get_metric_ref_(Labels).
%% Collector module expects 'ok' return. Because of that dialyzer won't quit bitching.
smuggle_mf_return_value(Fun) ->
_ = Fun(fun(MF) ->
_ = erlang:put(smuggle, MF),
ok
end),
erlang:get(smuggle).