mirror of
https://github.com/valitydev/woody_erlang.git
synced 2024-11-06 02:15:19 +00:00
TD-788: Adds metrics collectors for hackney
and ranch
stats (#30)
* Adds ranch active_connections metric * Adds collectors test checks * Adds explicit 'return' clause to label helper func * Rename collector modules for consistency * Adds event handler option for RPC events severity mapping * Adds events severity mapping description
This commit is contained in:
parent
5d46291a6b
commit
2cbe19998c
107
README.md
107
README.md
@ -168,6 +168,80 @@ check_loan_limits(Limits, Context, N) ->
|
||||
### Woody Event Handler
|
||||
Интерфейс для получения и логирования событий RPC библиотеки. Также содержит вспомогательные функции для удобного форматирования событий. Пример реализации _event handler_'а - [woody_event_handler_default.erl](src/woody_event_handler_default.erl).
|
||||
|
||||
Через опции обработчика можно сообщить параметры соответствия событий RPC для уровня логирования:
|
||||
|
||||
``` erlang
|
||||
woody_event_handler_default:handle_event(Event, RpcId, Meta, #{
|
||||
formatter_opts => ...,
|
||||
events_severity => #{
|
||||
['call service'] => debug,
|
||||
...
|
||||
}
|
||||
}).
|
||||
```
|
||||
|
||||
Где эти параметры имеют значения по умолчанию в следующем виде:
|
||||
|
||||
``` erlang
|
||||
#{
|
||||
events_severity => #{
|
||||
%% Пограничные события работы клиента
|
||||
['client begin'] => debug,
|
||||
['client end'] => debug,
|
||||
|
||||
%% Начало вызова сервиса, перед формированием запроса
|
||||
['call service'] => info,
|
||||
|
||||
%% Результат вызова сервиса на клиенте
|
||||
['service result'] => info,
|
||||
['service result', error] => error,
|
||||
%% Событие состоявшегося вызова с возвращённой ошибкой в качестве
|
||||
%% результата
|
||||
['service result', warning] => warning,
|
||||
|
||||
%% Клиентские события, включая обнаружения хоста
|
||||
['client send'] => debug,
|
||||
['client resolve begin'] => debug,
|
||||
['client resolve result'] => debug,
|
||||
['client receive'] => debug,
|
||||
['client receive', error] => warning,
|
||||
|
||||
%% Непосредственные события обслуживания запроса сервером
|
||||
['server receive'] => debug,
|
||||
['server send'] => debug,
|
||||
['server send', error] => warning,
|
||||
|
||||
%% Начало обслуживания вызова функции сервиса
|
||||
['invoke service handler'] => info,
|
||||
|
||||
%% Завершение обслуживание вызова с разным итогом
|
||||
['service handler result'] => info,
|
||||
['service handler result', error, business] => info,
|
||||
['service handler result', error, system] => error,
|
||||
%% Обслуживание вызова завершилось поправимой ошибкой;
|
||||
%% по крайней мере она не в рамках бизнес-логики но и не системное
|
||||
%% исключение
|
||||
['service handler result', warning] => warning,
|
||||
|
||||
%% События кеширующей обертки клиента
|
||||
['client cache begin'] => debug,
|
||||
['client cache end'] => debug,
|
||||
['client cache hit'] => info,
|
||||
['client cache miss'] => debug,
|
||||
['client cache update'] => debug,
|
||||
['client cache result'] => debug,
|
||||
|
||||
%% Внутренние ошибки с разным контекстом/происхождением
|
||||
['internal error', system] => error,
|
||||
['internal error', business] => warning,
|
||||
%% Событие трассировки на уровне woody, см. далее
|
||||
|
||||
['trace event'] => debug
|
||||
}
|
||||
}.
|
||||
```
|
||||
|
||||
|
||||
### Tracing
|
||||
Можно динамически включать и выключать трассировку http запросов и ответов.
|
||||
|
||||
@ -176,3 +250,36 @@ check_loan_limits(Limits, Context, N) ->
|
||||
application:set_env(woody, trace_http_server, true).
|
||||
application:unset_env(woody, trace_http_server).
|
||||
```
|
||||
|
||||
### Prometheus metrics
|
||||
|
||||
Чтобы осуществлять экспорт метрик следует добавить [соответствующий хэндлер](https://github.com/deadtrickster/prometheus-cowboy#exporting-metrics-with-handlers) для cowboy-сервера.
|
||||
|
||||
``` erlang
|
||||
{deps, [
|
||||
...
|
||||
{prometheus_cowboy, "0.1.8"}
|
||||
]}
|
||||
```
|
||||
|
||||
Для сбора серверных метрик необходимо на старте приложения объявить их
|
||||
|
||||
``` erlang
|
||||
ok = woody_ranch_prometheus_collector:setup()
|
||||
```
|
||||
|
||||
Если дополнительно интересуют все метрики ковбоя то можно добавить [реализацию обсервера из библиотеки](https://github.com/deadtrickster/prometheus-cowboy?tab=readme-ov-file#exporting-cowboy2-metrics).
|
||||
|
||||
Для сбора клиентских метрик необходимо на старте приложения объявить их
|
||||
|
||||
``` erlang
|
||||
ok = woody_hackney_prometheus_collector:setup()
|
||||
```
|
||||
|
||||
Это будет публиковать целочисленные значения в шкале 'woody_hackney_pool_usage' с метками `pool` в качестве названия пула и `status` в качестве параметра соответствующего значения:
|
||||
|
||||
- `in_use_count` -- используемые соединения в пуле;
|
||||
- `free_count` -- свободное количество в пуле;
|
||||
- `queue_count` -- очередь за свободными соединенеиями
|
||||
|
||||
**TODO** Возможно стоит рассмотреть публикацию метрик по количеству исполняемых запросов в общем, с разбивкой по хосту и количества новых и переиспользуемых соедининий в каждом из пулов. [Хакни это предоставляет](https://github.com/benoitc/hackney/tree/1.18.0#metrics).
|
||||
|
@ -33,6 +33,7 @@
|
||||
{thrift, {git, "https://github.com/valitydev/thrift_erlang.git", {branch, "master"}}},
|
||||
{snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}},
|
||||
{genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}},
|
||||
{prometheus, "4.8.1"},
|
||||
{opentelemetry_api, "1.2.1"}
|
||||
]}.
|
||||
|
||||
@ -81,9 +82,6 @@
|
||||
{deps, [
|
||||
{cth_readable, "1.4.9"},
|
||||
{proper, "1.4.0"},
|
||||
{woody_api_hay,
|
||||
{git, "https://github.com/valitydev/woody_api_hay.git",
|
||||
{ref, "4c39134cddaa9bf6fb8db18e7030ae64f1efb3a9"}}},
|
||||
{damsel,
|
||||
{git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}},
|
||||
{mg_proto,
|
||||
@ -92,7 +90,7 @@
|
||||
{opentelemetry, "1.3.0"}
|
||||
]},
|
||||
{dialyzer, [
|
||||
{plt_extra_apps, [how_are_you, eunit, proper, common_test, cth_readable, opentelemetry]}
|
||||
{plt_extra_apps, [prometheus, eunit, proper, common_test, cth_readable, opentelemetry]}
|
||||
]}
|
||||
]}
|
||||
]}.
|
||||
|
@ -17,6 +17,8 @@
|
||||
{pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>},
|
||||
1},
|
||||
{<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1},
|
||||
{<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0},
|
||||
{<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1},
|
||||
{<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1},
|
||||
{<<"snowflake">>,
|
||||
{git,"https://github.com/valitydev/snowflake.git",
|
||||
@ -42,6 +44,8 @@
|
||||
{<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>},
|
||||
{<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>},
|
||||
{<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>},
|
||||
{<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>},
|
||||
{<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>},
|
||||
{<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>},
|
||||
{<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>},
|
||||
{<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]},
|
||||
@ -58,6 +62,8 @@
|
||||
{<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>},
|
||||
{<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>},
|
||||
{<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>},
|
||||
{<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>},
|
||||
{<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>},
|
||||
{<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>},
|
||||
{<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>},
|
||||
{<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]}
|
||||
|
@ -4,6 +4,7 @@
|
||||
-export([handle_event/3, handle_event/4]).
|
||||
|
||||
-export([get_event_severity/2]).
|
||||
-export([get_event_severity/3]).
|
||||
-export([format_event/3, format_event/4]).
|
||||
-export([format_meta/2, format_meta/3]).
|
||||
-export([format_rpc_id/1]).
|
||||
@ -221,7 +222,8 @@
|
||||
-export_type([meta/0]).
|
||||
|
||||
-type options() :: #{
|
||||
formatter_opts => woody_event_formatter:opts()
|
||||
formatter_opts => woody_event_formatter:opts(),
|
||||
events_severity => #{nonempty_list(atom()) => severity()}
|
||||
}.
|
||||
|
||||
-export_type([options/0]).
|
||||
@ -379,68 +381,77 @@ format_event(UnknownEventType, Meta, _Opts) ->
|
||||
{" unknown woody event type '~s' with meta ~p", [UnknownEventType, Meta]}.
|
||||
|
||||
-spec get_event_severity(event(), event_meta()) -> severity().
|
||||
get_event_severity(?EV_CLIENT_BEGIN, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_END, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CALL_SERVICE, _Meta) ->
|
||||
info;
|
||||
get_event_severity(?EV_SERVICE_RESULT, #{status := ok}) ->
|
||||
info;
|
||||
get_event_severity(?EV_SERVICE_RESULT, #{status := error, stack := _Stack}) ->
|
||||
error;
|
||||
get_event_severity(?EV_SERVICE_RESULT, #{status := error, result := _Error}) ->
|
||||
warning;
|
||||
get_event_severity(?EV_CLIENT_SEND, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_RESOLVE_RESULT, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_RECEIVE, #{status := ok}) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_RECEIVE, #{status := error}) ->
|
||||
warning;
|
||||
get_event_severity(?EV_SERVER_RECEIVE, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_SERVER_SEND, #{status := ok}) ->
|
||||
debug;
|
||||
get_event_severity(?EV_SERVER_SEND, #{status := error}) ->
|
||||
warning;
|
||||
get_event_severity(?EV_INVOKE_SERVICE_HANDLER, _Meta) ->
|
||||
info;
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := ok}) ->
|
||||
info;
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := business}) ->
|
||||
info;
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, stack := _Stack}) ->
|
||||
error;
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, result := _Error}) ->
|
||||
warning;
|
||||
get_event_severity(?EV_CLIENT_CACHE_BEGIN, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_CACHE_END, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_CACHE_HIT, _Meta) ->
|
||||
info;
|
||||
get_event_severity(?EV_CLIENT_CACHE_MISS, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_CACHE_UPDATE, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_CLIENT_CACHE_RESULT, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, class := _Class, stack := _Stack}) ->
|
||||
error;
|
||||
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, reason := _Reason}) ->
|
||||
warning;
|
||||
get_event_severity(?EV_TRACE, _Meta) ->
|
||||
debug;
|
||||
get_event_severity(_UnknownEventType, _Meta) ->
|
||||
warning.
|
||||
get_event_severity(Event, Meta) ->
|
||||
get_event_severity(Event, Meta, #{}).
|
||||
|
||||
-spec get_event_severity(event(), event_meta(), options()) -> severity().
|
||||
get_event_severity(?EV_CLIENT_BEGIN = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_END = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CALL_SERVICE = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, info);
|
||||
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := ok}, Opts) ->
|
||||
map_severity([Event], Opts, info);
|
||||
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, stack := _Stack}, Opts) ->
|
||||
map_severity([Event, error], Opts, error);
|
||||
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, result := _Error}, Opts) ->
|
||||
map_severity([Event, warning], Opts, warning);
|
||||
get_event_severity(?EV_CLIENT_SEND = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_RESOLVE_RESULT = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := ok}, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := error}, Opts) ->
|
||||
map_severity([Event, error], Opts, warning);
|
||||
get_event_severity(?EV_SERVER_RECEIVE = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_SERVER_SEND = Event, #{status := ok}, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_SERVER_SEND = Event, #{status := error}, Opts) ->
|
||||
map_severity([Event, error], Opts, warning);
|
||||
get_event_severity(?EV_INVOKE_SERVICE_HANDLER = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, info);
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := ok}, Opts) ->
|
||||
map_severity([Event], Opts, info);
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := business}, Opts) ->
|
||||
map_severity([Event, error, business], Opts, info);
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, stack := _Stack}, Opts) ->
|
||||
map_severity([Event, error, system], Opts, error);
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, result := _Error}, Opts) ->
|
||||
map_severity([Event, warning], Opts, warning);
|
||||
get_event_severity(?EV_CLIENT_CACHE_BEGIN = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_CACHE_END = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_CACHE_HIT = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, info);
|
||||
get_event_severity(?EV_CLIENT_CACHE_MISS = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_CACHE_UPDATE = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_CLIENT_CACHE_RESULT = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, class := _Class, stack := _Stack}, Opts) ->
|
||||
map_severity([Event, system], Opts, error);
|
||||
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, reason := _Reason}, Opts) ->
|
||||
map_severity([Event, business], Opts, warning);
|
||||
get_event_severity(?EV_TRACE = Event, _Meta, Opts) ->
|
||||
map_severity([Event], Opts, debug);
|
||||
get_event_severity(UnknownEventType, _Meta, Opts) ->
|
||||
map_severity([UnknownEventType], Opts, warning).
|
||||
|
||||
%%
|
||||
%% Internal functions
|
||||
%%
|
||||
-spec map_severity(nonempty_list(atom()), options(), severity()) -> severity().
|
||||
map_severity(Code, Opts, Default) ->
|
||||
EventsSeverity = maps:get(events_severity, Opts, #{}),
|
||||
maps:get(Code, EventsSeverity, Default).
|
||||
|
||||
-spec format_service_request(map(), options()) -> msg().
|
||||
format_service_request(#{service_schema := {Module, Service}, function := Function, args := Args}, Opts) ->
|
||||
woody_event_formatter:format_call(Module, Service, Function, Args, get_formatter_opts(Opts)).
|
||||
@ -1582,4 +1593,157 @@ exception_test_() ->
|
||||
)
|
||||
].
|
||||
|
||||
-spec event_severity_defaults_test_() -> _.
|
||||
event_severity_defaults_test_() ->
|
||||
Opts = #{events_severity => default_severity_map()},
|
||||
Meta = #{role => server},
|
||||
[
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
|
||||
|
||||
?_assertEqual(info, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
|
||||
|
||||
?_assertEqual(info, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(error, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
|
||||
?_assertEqual(warning, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
|
||||
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(warning, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
|
||||
|
||||
?_assertEqual(debug, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(warning, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
|
||||
|
||||
?_assertEqual(info, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
|
||||
|
||||
?_assertEqual(info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(
|
||||
info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
|
||||
),
|
||||
?_assertEqual(
|
||||
error,
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
|
||||
),
|
||||
?_assertEqual(
|
||||
warning,
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
|
||||
),
|
||||
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
|
||||
?_assertEqual(info, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
|
||||
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
|
||||
|
||||
?_assertEqual(
|
||||
error, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
|
||||
),
|
||||
?_assertEqual(warning, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
|
||||
|
||||
?_assertEqual(debug, get_event_severity(?EV_TRACE, Meta, Opts))
|
||||
].
|
||||
|
||||
-spec event_severity_all_info_test_() -> _.
|
||||
event_severity_all_info_test_() ->
|
||||
ExpectAll = info,
|
||||
Opts = #{events_severity => replace_all_with(ExpectAll, default_severity_map())},
|
||||
Meta = #{role => server},
|
||||
[
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
|
||||
?_assertEqual(
|
||||
ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
|
||||
),
|
||||
?_assertEqual(
|
||||
ExpectAll,
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
|
||||
),
|
||||
?_assertEqual(
|
||||
ExpectAll,
|
||||
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
|
||||
),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
|
||||
|
||||
?_assertEqual(
|
||||
ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
|
||||
),
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
|
||||
|
||||
?_assertEqual(ExpectAll, get_event_severity(?EV_TRACE, Meta, Opts))
|
||||
].
|
||||
|
||||
replace_all_with(NewSeverity, SeverityMap) ->
|
||||
maps:map(fun(_K, _V) -> NewSeverity end, SeverityMap).
|
||||
|
||||
default_severity_map() ->
|
||||
#{
|
||||
['client begin'] => debug,
|
||||
['client end'] => debug,
|
||||
|
||||
['call service'] => info,
|
||||
|
||||
['service result'] => info,
|
||||
['service result', error] => error,
|
||||
['service result', warning] => warning,
|
||||
|
||||
['client send'] => debug,
|
||||
['client resolve begin'] => debug,
|
||||
['client resolve result'] => debug,
|
||||
['client receive'] => debug,
|
||||
['client receive', error] => warning,
|
||||
|
||||
['server receive'] => debug,
|
||||
['server send'] => debug,
|
||||
['server send', error] => warning,
|
||||
|
||||
['invoke service handler'] => info,
|
||||
|
||||
['service handler result'] => info,
|
||||
['service handler result', error, business] => info,
|
||||
['service handler result', error, system] => error,
|
||||
['service handler result', warning] => warning,
|
||||
|
||||
['client cache begin'] => debug,
|
||||
['client cache end'] => debug,
|
||||
['client cache hit'] => info,
|
||||
['client cache miss'] => debug,
|
||||
['client cache update'] => debug,
|
||||
['client cache result'] => debug,
|
||||
|
||||
['internal error', system] => error,
|
||||
['internal error', business] => warning,
|
||||
|
||||
['trace event'] => debug
|
||||
}.
|
||||
|
||||
-endif.
|
||||
|
@ -22,7 +22,8 @@
|
||||
handle_event(Event, RpcId, Meta, Opts) ->
|
||||
EHOpts = get_event_handler_opts(Opts),
|
||||
{Format, Msg} = woody_event_handler:format_event(Event, Meta, RpcId, EHOpts),
|
||||
Function = get_logger_function(woody_event_handler:get_event_severity(Event, Meta)),
|
||||
Severity = woody_event_handler:get_event_severity(Event, Meta, Opts),
|
||||
Function = get_logger_function(Severity),
|
||||
_ = error_logger:Function(Format, Msg),
|
||||
ok.
|
||||
|
||||
|
83
src/woody_hackney_prometheus_collector.erl
Normal file
83
src/woody_hackney_prometheus_collector.erl
Normal file
@ -0,0 +1,83 @@
|
||||
-module(woody_hackney_prometheus_collector).
|
||||
|
||||
-export([setup/0]).
|
||||
|
||||
%%
|
||||
|
||||
-behaviour(prometheus_collector).
|
||||
|
||||
-export([collect_mf/2]).
|
||||
-export([collect_metrics/2]).
|
||||
-export([deregister_cleanup/1]).
|
||||
|
||||
-define(POOL_USAGE, woody_hackney_pool_usage).
|
||||
|
||||
%% Installation
|
||||
|
||||
%% @doc Installs custom collector for hackney's pool metrics
|
||||
-spec setup() -> ok.
|
||||
setup() ->
|
||||
prometheus_registry:register_collector(registry(), ?MODULE).
|
||||
|
||||
%% Collector API
|
||||
|
||||
-type data() :: [data_item()].
|
||||
-type data_item() :: {data_labels(), non_neg_integer()}.
|
||||
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
|
||||
-type pool_stats() :: [{atom(), any()}].
|
||||
|
||||
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
|
||||
collect_mf(_Registry, Callback) ->
|
||||
F = fun({Pool, _Pid}) ->
|
||||
make_pool_data(Pool, get_pool_stats(Pool))
|
||||
end,
|
||||
Data = lists:flatten(lists:map(F, get_hackney_pools())),
|
||||
Callback(create_gauge(Data)).
|
||||
|
||||
-spec collect_metrics(prometheus_metric:name(), data()) ->
|
||||
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
|
||||
collect_metrics(_Name, Data) ->
|
||||
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
|
||||
|
||||
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
|
||||
deregister_cleanup(_Registry) ->
|
||||
%% Nothing to clean up
|
||||
ok.
|
||||
|
||||
%% Private
|
||||
|
||||
registry() ->
|
||||
default.
|
||||
|
||||
get_pool_stats(Pool) ->
|
||||
%% NOTE It looks like 'hackney_pool' table data can occasionally contain
|
||||
%% dead pools
|
||||
try
|
||||
hackney_pool:get_stats(Pool)
|
||||
catch
|
||||
%% "Time to make the chimi-fuckin'-changas."
|
||||
exit:{noproc, _Reason} ->
|
||||
[]
|
||||
end.
|
||||
|
||||
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
|
||||
create_gauge(Data) ->
|
||||
Help = "Connection pool status by used, free and queued connections count",
|
||||
prometheus_model_helpers:create_mf(?POOL_USAGE, Help, gauge, ?MODULE, Data).
|
||||
|
||||
-spec make_pool_data(atom(), pool_stats()) -> data().
|
||||
make_pool_data(Pool, Stats0) ->
|
||||
Stats1 = maps:with([in_use_count, free_count, queue_count], maps:from_list(Stats0)),
|
||||
lists:foldl(fun({S, V}, Data) -> [make_data_item(Pool, S, V) | Data] end, [], maps:to_list(Stats1)).
|
||||
|
||||
make_data_item(Pool, Status, Value) ->
|
||||
Labels = [{pool, Pool}, {status, Status}],
|
||||
{Labels, Value}.
|
||||
|
||||
get_hackney_pools() ->
|
||||
%% Shamelessly pasted from
|
||||
%% https://github.com/soundtrackyourbrand/prometheus-hackney-collector
|
||||
%%
|
||||
%% Basically, we rely on not publicly exposed table containing started pools
|
||||
%% under hackney's hood.
|
||||
ets:tab2list(hackney_pool).
|
83
src/woody_ranch_prometheus_collector.erl
Normal file
83
src/woody_ranch_prometheus_collector.erl
Normal file
@ -0,0 +1,83 @@
|
||||
-module(woody_ranch_prometheus_collector).
|
||||
|
||||
-export([setup/0]).
|
||||
|
||||
%%
|
||||
|
||||
-behaviour(prometheus_collector).
|
||||
|
||||
-export([collect_mf/2]).
|
||||
-export([collect_metrics/2]).
|
||||
-export([deregister_cleanup/1]).
|
||||
|
||||
-define(CONNECTIONS, woody_ranch_listener_connections).
|
||||
|
||||
%% Installation
|
||||
|
||||
-spec setup() -> ok.
|
||||
setup() ->
|
||||
prometheus_registry:register_collector(registry(), ?MODULE).
|
||||
|
||||
%% Collector API
|
||||
|
||||
-type data() :: [data_item()].
|
||||
-type data_item() :: {data_labels(), non_neg_integer()}.
|
||||
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
|
||||
-type maybe_woody_server_ref() :: {module(), ID :: atom()} | ranch:ref().
|
||||
-type ranch_info() ::
|
||||
[{maybe_woody_server_ref(), [{atom(), any()}]}]
|
||||
| #{maybe_woody_server_ref() => #{atom() => any()}}.
|
||||
|
||||
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
|
||||
collect_mf(_Registry, Callback) ->
|
||||
F = fun({ListenerRef, ListenerInfo}) ->
|
||||
make_listener_data(ListenerRef, ListenerInfo)
|
||||
end,
|
||||
Data = lists:flatten(lists:map(F, get_listeners_info())),
|
||||
Callback(create_gauge(Data)).
|
||||
|
||||
-spec collect_metrics(prometheus_metric:name(), data()) ->
|
||||
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
|
||||
collect_metrics(_Name, Data) ->
|
||||
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
|
||||
|
||||
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
|
||||
deregister_cleanup(_Registry) ->
|
||||
%% Nothing to clean up
|
||||
ok.
|
||||
|
||||
%% Private
|
||||
|
||||
registry() ->
|
||||
default.
|
||||
|
||||
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
|
||||
create_gauge(Data) ->
|
||||
Help = "Number of active connections",
|
||||
prometheus_model_helpers:create_mf(?CONNECTIONS, Help, gauge, ?MODULE, Data).
|
||||
|
||||
-spec make_listener_data(maybe_woody_server_ref(), #{atom() => any()}) -> data().
|
||||
make_listener_data(Ref, #{active_connections := V}) ->
|
||||
Labels = [{listener, Ref}],
|
||||
[{Labels, V}];
|
||||
make_listener_data(_Ref, _Info) ->
|
||||
[].
|
||||
|
||||
get_listeners_info() ->
|
||||
lists:filter(
|
||||
fun
|
||||
({_Ref, #{active_connections := _}}) -> true;
|
||||
(_Else) -> false
|
||||
end,
|
||||
%% See https://ninenines.eu/docs/en/ranch/1.8/guide/listeners/#_obtaining_information_about_listeners
|
||||
normalize_listeners_info(ranch:info())
|
||||
).
|
||||
|
||||
-dialyzer({no_match, normalize_listeners_info/1}).
|
||||
-spec normalize_listeners_info(ranch_info()) -> [{maybe_woody_server_ref(), #{atom() => any()}}].
|
||||
%% Ranch v2
|
||||
normalize_listeners_info(#{} = Info) ->
|
||||
maps:to_list(Info);
|
||||
%% Ranch v1
|
||||
normalize_listeners_info(Info) ->
|
||||
lists:map(fun({Ref, ListenerInfo}) -> {Ref, maps:from_list(ListenerInfo)} end, Info).
|
@ -5,6 +5,7 @@
|
||||
-include_lib("hackney/include/hackney_lib.hrl").
|
||||
-include_lib("opentelemetry_api/include/opentelemetry.hrl").
|
||||
-include_lib("opentelemetry/include/otel_span.hrl").
|
||||
-include_lib("prometheus/include/prometheus_model.hrl").
|
||||
|
||||
-include("woody_test_thrift.hrl").
|
||||
-include("woody_defs.hrl").
|
||||
@ -280,6 +281,8 @@ cross_test_groups() ->
|
||||
}
|
||||
}.
|
||||
|
||||
-define(RANCH_REF, woody_ct).
|
||||
|
||||
%%
|
||||
%% starting/stopping
|
||||
%%
|
||||
@ -287,21 +290,12 @@ init_per_suite(C) ->
|
||||
% dbg:tracer(), dbg:p(all, c),
|
||||
% dbg:tpl({woody_joint_workers, '_', '_'}, x),
|
||||
%%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]),
|
||||
application:set_env(hackney, mod_metrics, woody_client_metrics),
|
||||
application:set_env(woody, woody_client_metrics_options, #{
|
||||
metric_key_mapping => #{
|
||||
[hackney, nb_requests] => [hackney, requests_in_process]
|
||||
}
|
||||
}),
|
||||
application:set_env(how_are_you, metrics_handlers, [
|
||||
{woody_api_hay, #{
|
||||
interval => 1000
|
||||
}}
|
||||
]),
|
||||
{ok, Apps} = application:ensure_all_started(woody),
|
||||
{ok, HayApps} = application:ensure_all_started(how_are_you),
|
||||
{ok, MetricsApps} = application:ensure_all_started(prometheus),
|
||||
ok = woody_ranch_prometheus_collector:setup(),
|
||||
ok = woody_hackney_prometheus_collector:setup(),
|
||||
{ok, OtelApps} = setup_opentelemetry(),
|
||||
[{apps, OtelApps ++ HayApps ++ Apps} | C].
|
||||
[{apps, OtelApps ++ MetricsApps ++ Apps} | C].
|
||||
|
||||
end_per_suite(C) ->
|
||||
% unset so it won't report metrics next suite
|
||||
@ -334,21 +328,21 @@ init_per_testcase(find_multiple_pools_test, C) ->
|
||||
{ok, Sup} = start_tc_sup(),
|
||||
Pool1 = {swords, 15000, 100},
|
||||
Pool2 = {shields, undefined, 50},
|
||||
ok = start_woody_server_with_pools(woody_ct, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
|
||||
ok = start_woody_server_with_pools(?RANCH_REF, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
|
||||
[{sup, Sup} | C];
|
||||
init_per_testcase(calls_with_cache, C) ->
|
||||
{ok, Sup} = start_tc_sup(),
|
||||
{ok, _} = start_caching_client(caching_client_ct, Sup),
|
||||
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
|
||||
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
|
||||
[{sup, Sup} | C];
|
||||
init_per_testcase(server_handled_client_timeout_test, C) ->
|
||||
{ok, Sup} = start_tc_sup(),
|
||||
{ok, _} = supervisor:start_child(Sup, server_timeout_event_handler:child_spec()),
|
||||
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
|
||||
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
|
||||
[{sup, Sup} | C];
|
||||
init_per_testcase(_, C) ->
|
||||
{ok, Sup} = start_tc_sup(),
|
||||
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
|
||||
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
|
||||
[{sup, Sup} | C].
|
||||
|
||||
init_per_group(woody_resolver, Config) ->
|
||||
@ -381,7 +375,7 @@ start_error_server(TC, Sup) ->
|
||||
Code = get_fail_code(TC),
|
||||
Dispatch = cowboy_router:compile([{'_', [{?PATH_WEAPONS, ?MODULE, Code}]}]),
|
||||
Server = ranch:child_spec(
|
||||
woody_ct,
|
||||
?RANCH_REF,
|
||||
ranch_tcp,
|
||||
[{ip, ?SERVER_IP}, {port, ?SERVER_PORT}],
|
||||
cowboy_clear,
|
||||
@ -826,7 +820,18 @@ call_fail_w_no_headers(Id, Class, _Code) ->
|
||||
|
||||
find_multiple_pools_test(_) ->
|
||||
true = is_pid(hackney_pool:find_pool(swords)),
|
||||
true = is_pid(hackney_pool:find_pool(shields)).
|
||||
true = is_pid(hackney_pool:find_pool(shields)),
|
||||
MF = smuggle_mf_return_value(fun(F) -> woody_hackney_prometheus_collector:collect_mf(default, F) end),
|
||||
%% We can't know order and values from hackney's pool info, but we can
|
||||
%% expect that values for those pools must be provided. However exact number
|
||||
%% of values can vary based on order of testcase execution and other
|
||||
%% side-effects.
|
||||
ValuesCount = length([swords, shields]) * length([queue_count, in_use_count, free_count]),
|
||||
?assertMatch(
|
||||
#'MetricFamily'{type = 'GAUGE', name = <<"woody_hackney_pool_usage">>, metric = Values} when
|
||||
length(Values) >= ValuesCount,
|
||||
MF
|
||||
).
|
||||
|
||||
call_thrift_multiplexed_test(_) ->
|
||||
Client = make_thrift_multiplexed_client(
|
||||
@ -1229,6 +1234,7 @@ gun_test_basic(Id, Gun, Expect, C) ->
|
||||
catch
|
||||
Class:Reason -> ok
|
||||
end,
|
||||
ok = assert_connections_metrics(),
|
||||
check_msg(Gun, Context).
|
||||
|
||||
get_except({ok, _}) ->
|
||||
@ -1350,3 +1356,45 @@ mk_otel_attributes(Attributes) ->
|
||||
SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(),
|
||||
SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(),
|
||||
otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit).
|
||||
|
||||
assert_connections_metrics() ->
|
||||
MF = smuggle_mf_return_value(fun(F) -> woody_ranch_prometheus_collector:collect_mf(default, F) end),
|
||||
%% Sadly we can't match on listener ref since its an iodata representation of tuple.
|
||||
?assertMatch(
|
||||
#'MetricFamily'{
|
||||
name = <<"woody_ranch_listener_connections">>,
|
||||
type = 'GAUGE',
|
||||
metric = [
|
||||
#'Metric'{
|
||||
label = [#'LabelPair'{name = <<"listener">>, value = _Ref}],
|
||||
gauge = #'Gauge'{value = V}
|
||||
}
|
||||
]
|
||||
} when V >= 1,
|
||||
MF
|
||||
),
|
||||
%% NOTE See `prometheus_model_helpers:ensure_binary_or_string/1`
|
||||
RefImpl1 = io_lib:format("~p", [{woody_server_thrift_http_handler, ?RANCH_REF}]),
|
||||
RefImpl2 = io_lib:format("~p", [{woody_server_thrift_v2, ?RANCH_REF}]),
|
||||
?assertMatch(
|
||||
Expected when Expected =:= RefImpl1 orelse Expected =:= RefImpl2,
|
||||
get_metric_ref(hd(MF#'MetricFamily'.metric))
|
||||
).
|
||||
|
||||
get_metric_ref(#'Metric'{label = Labels}) ->
|
||||
get_metric_ref_(Labels).
|
||||
|
||||
get_metric_ref_([]) ->
|
||||
undefined;
|
||||
get_metric_ref_([#'LabelPair'{name = <<"listener">>, value = Ref} | _]) ->
|
||||
Ref;
|
||||
get_metric_ref_([_Label | Labels]) ->
|
||||
get_metric_ref_(Labels).
|
||||
|
||||
%% Collector module expects 'ok' return. Because of that dialyzer won't quit bitching.
|
||||
smuggle_mf_return_value(Fun) ->
|
||||
_ = Fun(fun(MF) ->
|
||||
_ = erlang:put(smuggle, MF),
|
||||
ok
|
||||
end),
|
||||
erlang:get(smuggle).
|
||||
|
Loading…
Reference in New Issue
Block a user