diff --git a/README.md b/README.md index 318a622..2c7f8b4 100644 --- a/README.md +++ b/README.md @@ -168,6 +168,80 @@ check_loan_limits(Limits, Context, N) -> ### Woody Event Handler Интерфейс для получения и логирования событий RPC библиотеки. Также содержит вспомогательные функции для удобного форматирования событий. Пример реализации _event handler_'а - [woody_event_handler_default.erl](src/woody_event_handler_default.erl). +Через опции обработчика можно сообщить параметры соответствия событий RPC для уровня логирования: + +``` erlang +woody_event_handler_default:handle_event(Event, RpcId, Meta, #{ + formatter_opts => ..., + events_severity => #{ + ['call service'] => debug, + ... + } +}). +``` + +Где эти параметры имеют значения по умолчанию в следующем виде: + +``` erlang +#{ + events_severity => #{ + %% Пограничные события работы клиента + ['client begin'] => debug, + ['client end'] => debug, + + %% Начало вызова сервиса, перед формированием запроса + ['call service'] => info, + + %% Результат вызова сервиса на клиенте + ['service result'] => info, + ['service result', error] => error, + %% Событие состоявшегося вызова с возвращённой ошибкой в качестве + %% результата + ['service result', warning] => warning, + + %% Клиентские события, включая обнаружения хоста + ['client send'] => debug, + ['client resolve begin'] => debug, + ['client resolve result'] => debug, + ['client receive'] => debug, + ['client receive', error] => warning, + + %% Непосредственные события обслуживания запроса сервером + ['server receive'] => debug, + ['server send'] => debug, + ['server send', error] => warning, + + %% Начало обслуживания вызова функции сервиса + ['invoke service handler'] => info, + + %% Завершение обслуживание вызова с разным итогом + ['service handler result'] => info, + ['service handler result', error, business] => info, + ['service handler result', error, system] => error, + %% Обслуживание вызова завершилось поправимой ошибкой; + %% по крайней мере она не в рамках бизнес-логики но и не системное + %% исключение + ['service handler result', warning] => warning, + + %% События кеширующей обертки клиента + ['client cache begin'] => debug, + ['client cache end'] => debug, + ['client cache hit'] => info, + ['client cache miss'] => debug, + ['client cache update'] => debug, + ['client cache result'] => debug, + + %% Внутренние ошибки с разным контекстом/происхождением + ['internal error', system] => error, + ['internal error', business] => warning, + %% Событие трассировки на уровне woody, см. далее + + ['trace event'] => debug + } +}. +``` + + ### Tracing Можно динамически включать и выключать трассировку http запросов и ответов. @@ -176,3 +250,36 @@ check_loan_limits(Limits, Context, N) -> application:set_env(woody, trace_http_server, true). application:unset_env(woody, trace_http_server). ``` + +### Prometheus metrics + +Чтобы осуществлять экспорт метрик следует добавить [соответствующий хэндлер](https://github.com/deadtrickster/prometheus-cowboy#exporting-metrics-with-handlers) для cowboy-сервера. + +``` erlang +{deps, [ + ... + {prometheus_cowboy, "0.1.8"} +]} +``` + +Для сбора серверных метрик необходимо на старте приложения объявить их + +``` erlang +ok = woody_ranch_prometheus_collector:setup() +``` + +Если дополнительно интересуют все метрики ковбоя то можно добавить [реализацию обсервера из библиотеки](https://github.com/deadtrickster/prometheus-cowboy?tab=readme-ov-file#exporting-cowboy2-metrics). + +Для сбора клиентских метрик необходимо на старте приложения объявить их + +``` erlang +ok = woody_hackney_prometheus_collector:setup() +``` + +Это будет публиковать целочисленные значения в шкале 'woody_hackney_pool_usage' с метками `pool` в качестве названия пула и `status` в качестве параметра соответствующего значения: + +- `in_use_count` -- используемые соединения в пуле; +- `free_count` -- свободное количество в пуле; +- `queue_count` -- очередь за свободными соединенеиями + +**TODO** Возможно стоит рассмотреть публикацию метрик по количеству исполняемых запросов в общем, с разбивкой по хосту и количества новых и переиспользуемых соедининий в каждом из пулов. [Хакни это предоставляет](https://github.com/benoitc/hackney/tree/1.18.0#metrics). diff --git a/rebar.config b/rebar.config index 749cd4b..5ae0b31 100644 --- a/rebar.config +++ b/rebar.config @@ -33,6 +33,7 @@ {thrift, {git, "https://github.com/valitydev/thrift_erlang.git", {branch, "master"}}}, {snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}}, {genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}}, + {prometheus, "4.8.1"}, {opentelemetry_api, "1.2.1"} ]}. @@ -81,9 +82,6 @@ {deps, [ {cth_readable, "1.4.9"}, {proper, "1.4.0"}, - {woody_api_hay, - {git, "https://github.com/valitydev/woody_api_hay.git", - {ref, "4c39134cddaa9bf6fb8db18e7030ae64f1efb3a9"}}}, {damsel, {git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}}, {mg_proto, @@ -92,7 +90,7 @@ {opentelemetry, "1.3.0"} ]}, {dialyzer, [ - {plt_extra_apps, [how_are_you, eunit, proper, common_test, cth_readable, opentelemetry]} + {plt_extra_apps, [prometheus, eunit, proper, common_test, cth_readable, opentelemetry]} ]} ]} ]}. diff --git a/rebar.lock b/rebar.lock index 103c490..191d79b 100644 --- a/rebar.lock +++ b/rebar.lock @@ -17,6 +17,8 @@ {pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>}, 1}, {<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1}, + {<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0}, + {<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1}, {<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1}, {<<"snowflake">>, {git,"https://github.com/valitydev/snowflake.git", @@ -42,6 +44,8 @@ {<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>}, {<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>}, {<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>}, + {<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>}, + {<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>}, {<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>}, {<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>}, {<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]}, @@ -58,6 +62,8 @@ {<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>}, {<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>}, {<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>}, + {<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>}, + {<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>}, {<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>}, {<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>}, {<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]} diff --git a/src/woody_event_handler.erl b/src/woody_event_handler.erl index f819d41..493eea4 100644 --- a/src/woody_event_handler.erl +++ b/src/woody_event_handler.erl @@ -4,6 +4,7 @@ -export([handle_event/3, handle_event/4]). -export([get_event_severity/2]). +-export([get_event_severity/3]). -export([format_event/3, format_event/4]). -export([format_meta/2, format_meta/3]). -export([format_rpc_id/1]). @@ -221,7 +222,8 @@ -export_type([meta/0]). -type options() :: #{ - formatter_opts => woody_event_formatter:opts() + formatter_opts => woody_event_formatter:opts(), + events_severity => #{nonempty_list(atom()) => severity()} }. -export_type([options/0]). @@ -379,68 +381,77 @@ format_event(UnknownEventType, Meta, _Opts) -> {" unknown woody event type '~s' with meta ~p", [UnknownEventType, Meta]}. -spec get_event_severity(event(), event_meta()) -> severity(). -get_event_severity(?EV_CLIENT_BEGIN, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_END, _Meta) -> - debug; -get_event_severity(?EV_CALL_SERVICE, _Meta) -> - info; -get_event_severity(?EV_SERVICE_RESULT, #{status := ok}) -> - info; -get_event_severity(?EV_SERVICE_RESULT, #{status := error, stack := _Stack}) -> - error; -get_event_severity(?EV_SERVICE_RESULT, #{status := error, result := _Error}) -> - warning; -get_event_severity(?EV_CLIENT_SEND, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_RESOLVE_RESULT, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_RECEIVE, #{status := ok}) -> - debug; -get_event_severity(?EV_CLIENT_RECEIVE, #{status := error}) -> - warning; -get_event_severity(?EV_SERVER_RECEIVE, _Meta) -> - debug; -get_event_severity(?EV_SERVER_SEND, #{status := ok}) -> - debug; -get_event_severity(?EV_SERVER_SEND, #{status := error}) -> - warning; -get_event_severity(?EV_INVOKE_SERVICE_HANDLER, _Meta) -> - info; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := ok}) -> - info; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := business}) -> - info; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, stack := _Stack}) -> - error; -get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, result := _Error}) -> - warning; -get_event_severity(?EV_CLIENT_CACHE_BEGIN, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_END, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_HIT, _Meta) -> - info; -get_event_severity(?EV_CLIENT_CACHE_MISS, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_UPDATE, _Meta) -> - debug; -get_event_severity(?EV_CLIENT_CACHE_RESULT, _Meta) -> - debug; -get_event_severity(?EV_INTERNAL_ERROR, #{error := _, class := _Class, stack := _Stack}) -> - error; -get_event_severity(?EV_INTERNAL_ERROR, #{error := _, reason := _Reason}) -> - warning; -get_event_severity(?EV_TRACE, _Meta) -> - debug; -get_event_severity(_UnknownEventType, _Meta) -> - warning. +get_event_severity(Event, Meta) -> + get_event_severity(Event, Meta, #{}). + +-spec get_event_severity(event(), event_meta(), options()) -> severity(). +get_event_severity(?EV_CLIENT_BEGIN = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_END = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CALL_SERVICE = Event, _Meta, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_RESULT = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, stack := _Stack}, Opts) -> + map_severity([Event, error], Opts, error); +get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, result := _Error}, Opts) -> + map_severity([Event, warning], Opts, warning); +get_event_severity(?EV_CLIENT_SEND = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RESOLVE_BEGIN = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RESOLVE_RESULT = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := error}, Opts) -> + map_severity([Event, error], Opts, warning); +get_event_severity(?EV_SERVER_RECEIVE = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_SERVER_SEND = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_SERVER_SEND = Event, #{status := error}, Opts) -> + map_severity([Event, error], Opts, warning); +get_event_severity(?EV_INVOKE_SERVICE_HANDLER = Event, _Meta, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := ok}, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := business}, Opts) -> + map_severity([Event, error, business], Opts, info); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, stack := _Stack}, Opts) -> + map_severity([Event, error, system], Opts, error); +get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, result := _Error}, Opts) -> + map_severity([Event, warning], Opts, warning); +get_event_severity(?EV_CLIENT_CACHE_BEGIN = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_END = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_HIT = Event, _Meta, Opts) -> + map_severity([Event], Opts, info); +get_event_severity(?EV_CLIENT_CACHE_MISS = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_UPDATE = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_CLIENT_CACHE_RESULT = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, class := _Class, stack := _Stack}, Opts) -> + map_severity([Event, system], Opts, error); +get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, reason := _Reason}, Opts) -> + map_severity([Event, business], Opts, warning); +get_event_severity(?EV_TRACE = Event, _Meta, Opts) -> + map_severity([Event], Opts, debug); +get_event_severity(UnknownEventType, _Meta, Opts) -> + map_severity([UnknownEventType], Opts, warning). %% %% Internal functions %% +-spec map_severity(nonempty_list(atom()), options(), severity()) -> severity(). +map_severity(Code, Opts, Default) -> + EventsSeverity = maps:get(events_severity, Opts, #{}), + maps:get(Code, EventsSeverity, Default). + -spec format_service_request(map(), options()) -> msg(). format_service_request(#{service_schema := {Module, Service}, function := Function, args := Args}, Opts) -> woody_event_formatter:format_call(Module, Service, Function, Args, get_formatter_opts(Opts)). @@ -1582,4 +1593,157 @@ exception_test_() -> ) ]. +-spec event_severity_defaults_test_() -> _. +event_severity_defaults_test_() -> + Opts = #{events_severity => default_severity_map()}, + Meta = #{role => server}, + [ + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_END, Meta, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual(error, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)), + ?_assertEqual(warning, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)), + + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)), + ?_assertEqual(warning, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)), + + ?_assertEqual(debug, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)), + ?_assertEqual(warning, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)), + + ?_assertEqual(info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual( + info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts) + ), + ?_assertEqual( + error, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts) + ), + ?_assertEqual( + warning, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts) + ), + + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)), + ?_assertEqual(info, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)), + ?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)), + + ?_assertEqual( + error, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts) + ), + ?_assertEqual(warning, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)), + + ?_assertEqual(debug, get_event_severity(?EV_TRACE, Meta, Opts)) + ]. + +-spec event_severity_all_info_test_() -> _. +event_severity_all_info_test_() -> + ExpectAll = info, + Opts = #{events_severity => replace_all_with(ExpectAll, default_severity_map())}, + Meta = #{role => server}, + [ + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_END, Meta, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)), + ?_assertEqual( + ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts) + ), + ?_assertEqual( + ExpectAll, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts) + ), + ?_assertEqual( + ExpectAll, + get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts) + ), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)), + ?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)), + + ?_assertEqual( + ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts) + ), + ?_assertEqual(ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)), + + ?_assertEqual(ExpectAll, get_event_severity(?EV_TRACE, Meta, Opts)) + ]. + +replace_all_with(NewSeverity, SeverityMap) -> + maps:map(fun(_K, _V) -> NewSeverity end, SeverityMap). + +default_severity_map() -> + #{ + ['client begin'] => debug, + ['client end'] => debug, + + ['call service'] => info, + + ['service result'] => info, + ['service result', error] => error, + ['service result', warning] => warning, + + ['client send'] => debug, + ['client resolve begin'] => debug, + ['client resolve result'] => debug, + ['client receive'] => debug, + ['client receive', error] => warning, + + ['server receive'] => debug, + ['server send'] => debug, + ['server send', error] => warning, + + ['invoke service handler'] => info, + + ['service handler result'] => info, + ['service handler result', error, business] => info, + ['service handler result', error, system] => error, + ['service handler result', warning] => warning, + + ['client cache begin'] => debug, + ['client cache end'] => debug, + ['client cache hit'] => info, + ['client cache miss'] => debug, + ['client cache update'] => debug, + ['client cache result'] => debug, + + ['internal error', system] => error, + ['internal error', business] => warning, + + ['trace event'] => debug + }. + -endif. diff --git a/src/woody_event_handler_default.erl b/src/woody_event_handler_default.erl index e7efdfc..9dcb6dd 100644 --- a/src/woody_event_handler_default.erl +++ b/src/woody_event_handler_default.erl @@ -22,7 +22,8 @@ handle_event(Event, RpcId, Meta, Opts) -> EHOpts = get_event_handler_opts(Opts), {Format, Msg} = woody_event_handler:format_event(Event, Meta, RpcId, EHOpts), - Function = get_logger_function(woody_event_handler:get_event_severity(Event, Meta)), + Severity = woody_event_handler:get_event_severity(Event, Meta, Opts), + Function = get_logger_function(Severity), _ = error_logger:Function(Format, Msg), ok. diff --git a/src/woody_hackney_prometheus_collector.erl b/src/woody_hackney_prometheus_collector.erl new file mode 100644 index 0000000..a2b26a0 --- /dev/null +++ b/src/woody_hackney_prometheus_collector.erl @@ -0,0 +1,83 @@ +-module(woody_hackney_prometheus_collector). + +-export([setup/0]). + +%% + +-behaviour(prometheus_collector). + +-export([collect_mf/2]). +-export([collect_metrics/2]). +-export([deregister_cleanup/1]). + +-define(POOL_USAGE, woody_hackney_pool_usage). + +%% Installation + +%% @doc Installs custom collector for hackney's pool metrics +-spec setup() -> ok. +setup() -> + prometheus_registry:register_collector(registry(), ?MODULE). + +%% Collector API + +-type data() :: [data_item()]. +-type data_item() :: {data_labels(), non_neg_integer()}. +-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}]. +-type pool_stats() :: [{atom(), any()}]. + +-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok. +collect_mf(_Registry, Callback) -> + F = fun({Pool, _Pid}) -> + make_pool_data(Pool, get_pool_stats(Pool)) + end, + Data = lists:flatten(lists:map(F, get_hackney_pools())), + Callback(create_gauge(Data)). + +-spec collect_metrics(prometheus_metric:name(), data()) -> + prometheus_model:'Metric'() | [prometheus_model:'Metric'()]. +collect_metrics(_Name, Data) -> + [prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data]. + +-spec deregister_cleanup(prometheus_registry:registry()) -> ok. +deregister_cleanup(_Registry) -> + %% Nothing to clean up + ok. + +%% Private + +registry() -> + default. + +get_pool_stats(Pool) -> + %% NOTE It looks like 'hackney_pool' table data can occasionally contain + %% dead pools + try + hackney_pool:get_stats(Pool) + catch + %% "Time to make the chimi-fuckin'-changas." + exit:{noproc, _Reason} -> + [] + end. + +-spec create_gauge(data()) -> prometheus_model:'MetricFamily'(). +create_gauge(Data) -> + Help = "Connection pool status by used, free and queued connections count", + prometheus_model_helpers:create_mf(?POOL_USAGE, Help, gauge, ?MODULE, Data). + +-spec make_pool_data(atom(), pool_stats()) -> data(). +make_pool_data(Pool, Stats0) -> + Stats1 = maps:with([in_use_count, free_count, queue_count], maps:from_list(Stats0)), + lists:foldl(fun({S, V}, Data) -> [make_data_item(Pool, S, V) | Data] end, [], maps:to_list(Stats1)). + +make_data_item(Pool, Status, Value) -> + Labels = [{pool, Pool}, {status, Status}], + {Labels, Value}. + +get_hackney_pools() -> + %% Shamelessly pasted from + %% https://github.com/soundtrackyourbrand/prometheus-hackney-collector + %% + %% Basically, we rely on not publicly exposed table containing started pools + %% under hackney's hood. + ets:tab2list(hackney_pool). diff --git a/src/woody_ranch_prometheus_collector.erl b/src/woody_ranch_prometheus_collector.erl new file mode 100644 index 0000000..38e7733 --- /dev/null +++ b/src/woody_ranch_prometheus_collector.erl @@ -0,0 +1,83 @@ +-module(woody_ranch_prometheus_collector). + +-export([setup/0]). + +%% + +-behaviour(prometheus_collector). + +-export([collect_mf/2]). +-export([collect_metrics/2]). +-export([deregister_cleanup/1]). + +-define(CONNECTIONS, woody_ranch_listener_connections). + +%% Installation + +-spec setup() -> ok. +setup() -> + prometheus_registry:register_collector(registry(), ?MODULE). + +%% Collector API + +-type data() :: [data_item()]. +-type data_item() :: {data_labels(), non_neg_integer()}. +-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}]. +-type maybe_woody_server_ref() :: {module(), ID :: atom()} | ranch:ref(). +-type ranch_info() :: + [{maybe_woody_server_ref(), [{atom(), any()}]}] + | #{maybe_woody_server_ref() => #{atom() => any()}}. + +-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok. +collect_mf(_Registry, Callback) -> + F = fun({ListenerRef, ListenerInfo}) -> + make_listener_data(ListenerRef, ListenerInfo) + end, + Data = lists:flatten(lists:map(F, get_listeners_info())), + Callback(create_gauge(Data)). + +-spec collect_metrics(prometheus_metric:name(), data()) -> + prometheus_model:'Metric'() | [prometheus_model:'Metric'()]. +collect_metrics(_Name, Data) -> + [prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data]. + +-spec deregister_cleanup(prometheus_registry:registry()) -> ok. +deregister_cleanup(_Registry) -> + %% Nothing to clean up + ok. + +%% Private + +registry() -> + default. + +-spec create_gauge(data()) -> prometheus_model:'MetricFamily'(). +create_gauge(Data) -> + Help = "Number of active connections", + prometheus_model_helpers:create_mf(?CONNECTIONS, Help, gauge, ?MODULE, Data). + +-spec make_listener_data(maybe_woody_server_ref(), #{atom() => any()}) -> data(). +make_listener_data(Ref, #{active_connections := V}) -> + Labels = [{listener, Ref}], + [{Labels, V}]; +make_listener_data(_Ref, _Info) -> + []. + +get_listeners_info() -> + lists:filter( + fun + ({_Ref, #{active_connections := _}}) -> true; + (_Else) -> false + end, + %% See https://ninenines.eu/docs/en/ranch/1.8/guide/listeners/#_obtaining_information_about_listeners + normalize_listeners_info(ranch:info()) + ). + +-dialyzer({no_match, normalize_listeners_info/1}). +-spec normalize_listeners_info(ranch_info()) -> [{maybe_woody_server_ref(), #{atom() => any()}}]. +%% Ranch v2 +normalize_listeners_info(#{} = Info) -> + maps:to_list(Info); +%% Ranch v1 +normalize_listeners_info(Info) -> + lists:map(fun({Ref, ListenerInfo}) -> {Ref, maps:from_list(ListenerInfo)} end, Info). diff --git a/test/woody_tests_SUITE.erl b/test/woody_tests_SUITE.erl index 6ffd9a7..79d1a5b 100644 --- a/test/woody_tests_SUITE.erl +++ b/test/woody_tests_SUITE.erl @@ -5,6 +5,7 @@ -include_lib("hackney/include/hackney_lib.hrl"). -include_lib("opentelemetry_api/include/opentelemetry.hrl"). -include_lib("opentelemetry/include/otel_span.hrl"). +-include_lib("prometheus/include/prometheus_model.hrl"). -include("woody_test_thrift.hrl"). -include("woody_defs.hrl"). @@ -280,6 +281,8 @@ cross_test_groups() -> } }. +-define(RANCH_REF, woody_ct). + %% %% starting/stopping %% @@ -287,21 +290,12 @@ init_per_suite(C) -> % dbg:tracer(), dbg:p(all, c), % dbg:tpl({woody_joint_workers, '_', '_'}, x), %%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]), - application:set_env(hackney, mod_metrics, woody_client_metrics), - application:set_env(woody, woody_client_metrics_options, #{ - metric_key_mapping => #{ - [hackney, nb_requests] => [hackney, requests_in_process] - } - }), - application:set_env(how_are_you, metrics_handlers, [ - {woody_api_hay, #{ - interval => 1000 - }} - ]), {ok, Apps} = application:ensure_all_started(woody), - {ok, HayApps} = application:ensure_all_started(how_are_you), + {ok, MetricsApps} = application:ensure_all_started(prometheus), + ok = woody_ranch_prometheus_collector:setup(), + ok = woody_hackney_prometheus_collector:setup(), {ok, OtelApps} = setup_opentelemetry(), - [{apps, OtelApps ++ HayApps ++ Apps} | C]. + [{apps, OtelApps ++ MetricsApps ++ Apps} | C]. end_per_suite(C) -> % unset so it won't report metrics next suite @@ -334,21 +328,21 @@ init_per_testcase(find_multiple_pools_test, C) -> {ok, Sup} = start_tc_sup(), Pool1 = {swords, 15000, 100}, Pool2 = {shields, undefined, 50}, - ok = start_woody_server_with_pools(woody_ct, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C), + ok = start_woody_server_with_pools(?RANCH_REF, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C), [{sup, Sup} | C]; init_per_testcase(calls_with_cache, C) -> {ok, Sup} = start_tc_sup(), {ok, _} = start_caching_client(caching_client_ct, Sup), - {ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C), + {ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C), [{sup, Sup} | C]; init_per_testcase(server_handled_client_timeout_test, C) -> {ok, Sup} = start_tc_sup(), {ok, _} = supervisor:start_child(Sup, server_timeout_event_handler:child_spec()), - {ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C), + {ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C), [{sup, Sup} | C]; init_per_testcase(_, C) -> {ok, Sup} = start_tc_sup(), - {ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C), + {ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C), [{sup, Sup} | C]. init_per_group(woody_resolver, Config) -> @@ -381,7 +375,7 @@ start_error_server(TC, Sup) -> Code = get_fail_code(TC), Dispatch = cowboy_router:compile([{'_', [{?PATH_WEAPONS, ?MODULE, Code}]}]), Server = ranch:child_spec( - woody_ct, + ?RANCH_REF, ranch_tcp, [{ip, ?SERVER_IP}, {port, ?SERVER_PORT}], cowboy_clear, @@ -826,7 +820,18 @@ call_fail_w_no_headers(Id, Class, _Code) -> find_multiple_pools_test(_) -> true = is_pid(hackney_pool:find_pool(swords)), - true = is_pid(hackney_pool:find_pool(shields)). + true = is_pid(hackney_pool:find_pool(shields)), + MF = smuggle_mf_return_value(fun(F) -> woody_hackney_prometheus_collector:collect_mf(default, F) end), + %% We can't know order and values from hackney's pool info, but we can + %% expect that values for those pools must be provided. However exact number + %% of values can vary based on order of testcase execution and other + %% side-effects. + ValuesCount = length([swords, shields]) * length([queue_count, in_use_count, free_count]), + ?assertMatch( + #'MetricFamily'{type = 'GAUGE', name = <<"woody_hackney_pool_usage">>, metric = Values} when + length(Values) >= ValuesCount, + MF + ). call_thrift_multiplexed_test(_) -> Client = make_thrift_multiplexed_client( @@ -1229,6 +1234,7 @@ gun_test_basic(Id, Gun, Expect, C) -> catch Class:Reason -> ok end, + ok = assert_connections_metrics(), check_msg(Gun, Context). get_except({ok, _}) -> @@ -1350,3 +1356,45 @@ mk_otel_attributes(Attributes) -> SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(), SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(), otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit). + +assert_connections_metrics() -> + MF = smuggle_mf_return_value(fun(F) -> woody_ranch_prometheus_collector:collect_mf(default, F) end), + %% Sadly we can't match on listener ref since its an iodata representation of tuple. + ?assertMatch( + #'MetricFamily'{ + name = <<"woody_ranch_listener_connections">>, + type = 'GAUGE', + metric = [ + #'Metric'{ + label = [#'LabelPair'{name = <<"listener">>, value = _Ref}], + gauge = #'Gauge'{value = V} + } + ] + } when V >= 1, + MF + ), + %% NOTE See `prometheus_model_helpers:ensure_binary_or_string/1` + RefImpl1 = io_lib:format("~p", [{woody_server_thrift_http_handler, ?RANCH_REF}]), + RefImpl2 = io_lib:format("~p", [{woody_server_thrift_v2, ?RANCH_REF}]), + ?assertMatch( + Expected when Expected =:= RefImpl1 orelse Expected =:= RefImpl2, + get_metric_ref(hd(MF#'MetricFamily'.metric)) + ). + +get_metric_ref(#'Metric'{label = Labels}) -> + get_metric_ref_(Labels). + +get_metric_ref_([]) -> + undefined; +get_metric_ref_([#'LabelPair'{name = <<"listener">>, value = Ref} | _]) -> + Ref; +get_metric_ref_([_Label | Labels]) -> + get_metric_ref_(Labels). + +%% Collector module expects 'ok' return. Because of that dialyzer won't quit bitching. +smuggle_mf_return_value(Fun) -> + _ = Fun(fun(MF) -> + _ = erlang:put(smuggle, MF), + ok + end), + erlang:get(smuggle).