Merge branch 'master' into compat/woody_ex

This commit is contained in:
Aleksey Kashapov 2024-05-17 13:45:54 +03:00
commit c4f6b9c62c
No known key found for this signature in database
GPG Key ID: E7A24E92707DA3D0
21 changed files with 1098 additions and 94 deletions

46
.github/settings.yml vendored Normal file
View File

@ -0,0 +1,46 @@
# These settings are synced to GitHub by https://probot.github.io/apps/settings/
_extends: .github
teams:
- name: devs
permission: push
- name: bots
permission: push
- name: maintainers
permission: push
- name: erlang
permission: maintain
branches:
- name: "master"
# https://developer.github.com/v3/repos/branches/#update-branch-protection
# Branch Protection settings. Set to null to disable
protection:
required_pull_request_reviews:
required_approving_review_count: 1
dismiss_stale_reviews: false
require_code_owner_reviews: true
dismissal_restrictions: {}
required_status_checks:
strict: true
checks:
- context: "Load .env"
app_id: 15368
- context: "Run checks / Build"
app_id: 15368
- context: "Run checks / Check"
app_id: 15368
- context: "Run checks / Dialyze"
app_id: 15368
- context: "Run checks / Test"
app_id: 15368
- context: "codecov/patch"
app_id: 254
- context: "codecov/project"
app_id: 254
enforce_admins: false
required_conversation_resolution: false
required_linear_history: true
restrictions: null
allow_force_pushes: false
allow_deletions: false

10
.github/workflows/basic-linters.yml vendored Normal file
View File

@ -0,0 +1,10 @@
name: Vality basic linters
on:
pull_request:
branches:
- "*"
jobs:
lint:
uses: valitydev/base-workflows/.github/workflows/basic-linters.yml@v1

View File

@ -18,7 +18,7 @@ jobs:
thrift-version: ${{ steps.thrift-version.outputs.version }}
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
- run: grep -v '^#' .env >> $GITHUB_ENV
- id: otp-version
run: echo "::set-output name=version::$OTP_VERSION"
@ -30,9 +30,10 @@ jobs:
run:
name: Run checks
needs: setup
uses: valitydev/erlang-workflows/.github/workflows/erlang-parallel-build.yml@v1.0.9
uses: valitydev/erlang-workflows/.github/workflows/erlang-parallel-build.yml@v1.0.10
with:
otp-version: ${{ needs.setup.outputs.otp-version }}
rebar-version: ${{ needs.setup.outputs.rebar-version }}
use-thrift: true
thrift-version: ${{ needs.setup.outputs.thrift-version }}
upload-coverage: false

107
README.md
View File

@ -168,6 +168,80 @@ check_loan_limits(Limits, Context, N) ->
### Woody Event Handler
Интерфейс для получения и логирования событий RPC библиотеки. Также содержит вспомогательные функции для удобного форматирования событий. Пример реализации _event handler_'а - [woody_event_handler_default.erl](src/woody_event_handler_default.erl).
Через опции обработчика можно сообщить параметры соответствия событий RPC для уровня логирования:
``` erlang
woody_event_handler_default:handle_event(Event, RpcId, Meta, #{
formatter_opts => ...,
events_severity => #{
['call service'] => debug,
...
}
}).
```
Где эти параметры имеют значения по умолчанию в следующем виде:
``` erlang
#{
events_severity => #{
%% Пограничные события работы клиента
['client begin'] => debug,
['client end'] => debug,
%% Начало вызова сервиса, перед формированием запроса
['call service'] => info,
%% Результат вызова сервиса на клиенте
['service result'] => info,
['service result', error] => error,
%% Событие состоявшегося вызова с возвращённой ошибкой в качестве
%% результата
['service result', warning] => warning,
%% Клиентские события, включая обнаружения хоста
['client send'] => debug,
['client resolve begin'] => debug,
['client resolve result'] => debug,
['client receive'] => debug,
['client receive', error] => warning,
%% Непосредственные события обслуживания запроса сервером
['server receive'] => debug,
['server send'] => debug,
['server send', error] => warning,
%% Начало обслуживания вызова функции сервиса
['invoke service handler'] => info,
%% Завершение обслуживание вызова с разным итогом
['service handler result'] => info,
['service handler result', error, business] => info,
['service handler result', error, system] => error,
%% Обслуживание вызова завершилось поправимой ошибкой;
%% по крайней мере она не в рамках бизнес-логики но и не системное
%% исключение
['service handler result', warning] => warning,
%% События кеширующей обертки клиента
['client cache begin'] => debug,
['client cache end'] => debug,
['client cache hit'] => info,
['client cache miss'] => debug,
['client cache update'] => debug,
['client cache result'] => debug,
%% Внутренние ошибки с разным контекстом/происхождением
['internal error', system] => error,
['internal error', business] => warning,
%% Событие трассировки на уровне woody, см. далее
['trace event'] => debug
}
}.
```
### Tracing
Можно динамически включать и выключать трассировку http запросов и ответов.
@ -176,3 +250,36 @@ check_loan_limits(Limits, Context, N) ->
application:set_env(woody, trace_http_server, true).
application:unset_env(woody, trace_http_server).
```
### Prometheus metrics
Чтобы осуществлять экспорт метрик следует добавить [соответствующий хэндлер](https://github.com/deadtrickster/prometheus-cowboy#exporting-metrics-with-handlers) для cowboy-сервера.
``` erlang
{deps, [
...
{prometheus_cowboy, "0.1.8"}
]}
```
Для сбора серверных метрик необходимо на старте приложения объявить их
``` erlang
ok = woody_ranch_prometheus_collector:setup()
```
Если дополнительно интересуют все метрики ковбоя то можно добавить [реализацию обсервера из библиотеки](https://github.com/deadtrickster/prometheus-cowboy?tab=readme-ov-file#exporting-cowboy2-metrics).
Для сбора клиентских метрик необходимо на старте приложения объявить их
``` erlang
ok = woody_hackney_prometheus_collector:setup()
```
Это будет публиковать целочисленные значения в шкале 'woody_hackney_pool_usage' с метками `pool` в качестве названия пула и `status` в качестве параметра соответствующего значения:
- `in_use_count` -- используемые соединения в пуле;
- `free_count` -- свободное количество в пуле;
- `queue_count` -- очередь за свободными соединенеиями
**TODO** Возможно стоит рассмотреть публикацию метрик по количеству исполняемых запросов в общем, с разбивкой по хосту и количества новых и переиспользуемых соедининий в каждом из пулов. [Хакни это предоставляет](https://github.com/benoitc/hackney/tree/1.18.0#metrics).

View File

@ -31,7 +31,9 @@
{gproc, "0.9.0"},
{cache, "2.3.3"},
{snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}},
{genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}}
{genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}},
{prometheus, "4.8.1"},
{opentelemetry_api, "1.2.1"}
]}.
%% XRef checks
@ -102,17 +104,15 @@
{thrift,
{git, "https://github.com/valitydev/thrift_erlang.git",
{ref, "3f3e11246d90aefa8f58b35e4f2eab14c0c28bd2"}}},
{woody_api_hay,
{git, "https://github.com/valitydev/woody_api_hay.git",
{ref, "4c39134cddaa9bf6fb8db18e7030ae64f1efb3a9"}}},
{damsel,
{git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}},
{mg_proto,
{git, "https://github.com/valitydev/machinegun-proto.git",
{ref, "ebae56fe2b3e79e4eb34afc8cb55c9012ae989f8"}}}
{ref, "ebae56fe2b3e79e4eb34afc8cb55c9012ae989f8"}}},
{opentelemetry, "1.3.0"}
]},
{dialyzer, [
{plt_extra_apps, [thrift, how_are_you, eunit, proper, common_test, cth_readable]}
{plt_extra_apps, [thrift, prometheus, eunit, proper, common_test, cth_readable, opentelemetry]}
]}
]}
]}.

View File

@ -12,7 +12,13 @@
{<<"idna">>,{pkg,<<"idna">>,<<"6.1.1">>},1},
{<<"metrics">>,{pkg,<<"metrics">>,<<"1.0.1">>},1},
{<<"mimerl">>,{pkg,<<"mimerl">>,<<"1.2.0">>},1},
{<<"opentelemetry_api">>,{pkg,<<"opentelemetry_api">>,<<"1.2.1">>},0},
{<<"opentelemetry_semantic_conventions">>,
{pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>},
1},
{<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1},
{<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0},
{<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1},
{<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1},
{<<"snowflake">>,
{git,"https://github.com/valitydev/snowflake.git",
@ -31,7 +37,11 @@
{<<"idna">>, <<"8A63070E9F7D0C62EB9D9FCB360A7DE382448200FBBD1B106CC96D3D8099DF8D">>},
{<<"metrics">>, <<"25F094DEA2CDA98213CECC3AEFF09E940299D950904393B2A29D191C346A8486">>},
{<<"mimerl">>, <<"67E2D3F571088D5CFD3E550C383094B47159F3EEE8FFA08E64106CDF5E981BE3">>},
{<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>},
{<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>},
{<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>},
{<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>},
{<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>},
{<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>},
{<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>},
{<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]},
@ -45,7 +55,11 @@
{<<"idna">>, <<"92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA">>},
{<<"metrics">>, <<"69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16">>},
{<<"mimerl">>, <<"F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323">>},
{<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>},
{<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>},
{<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>},
{<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>},
{<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>},
{<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>},
{<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>},
{<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]}

View File

@ -10,7 +10,8 @@
cowboy,
hackney,
gproc,
cache
cache,
opentelemetry_api
]},
{optional_applications, [
thrift

View File

@ -128,7 +128,7 @@ send(Url, Body, Options, ResOpts, WoodyState) ->
Headers = add_host_header(OldUrl, make_woody_headers(Context)),
Options1 = set_defaults(Options),
Options2 = set_timeouts(Options1, Context),
HeaderList = maps:to_list(Headers),
HeaderList = otel_propagator_text_map:inject(maps:to_list(Headers)),
Result = hackney:request(post, NewUrl, HeaderList, Body, maps:to_list(Options2)),
transform_request_results(Result);
{error, Reason} ->

View File

@ -155,7 +155,8 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) ->
% reusing keep-alive connections to dead hosts
case woody_resolver:resolve_url(Url, WoodyState, ResolverOpts) of
{ok, {OldUrl, NewUrl}, ConnectOpts} ->
Headers = add_host_header(OldUrl, make_woody_headers(Context)),
Headers0 = add_host_header(OldUrl, make_woody_headers(Context)),
Headers1 = otel_propagator_text_map:inject(Headers0),
TransportOpts1 = set_defaults(TransportOpts),
TransportOpts2 = set_timeouts(TransportOpts1, Context),
% NOTE
@ -163,7 +164,7 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) ->
% `set_tls_overrides/2`.
TransportOpts3 = append_connect_opts(TransportOpts2, ConnectOpts),
TransportOpts4 = set_tls_overrides(TransportOpts3, OldUrl),
Result = hackney:request(post, NewUrl, Headers, Buffer, maps:to_list(TransportOpts4)),
Result = hackney:request(post, NewUrl, Headers1, Buffer, maps:to_list(TransportOpts4)),
handle_response(Result, WoodyState);
{error, Reason} ->
Error = {error, {resolve_failed, Reason}},

View File

@ -4,6 +4,7 @@
-export([handle_event/3, handle_event/4]).
-export([get_event_severity/2]).
-export([get_event_severity/3]).
-export([format_event/3, format_event/4]).
-export([format_meta/2, format_meta/3]).
-export([format_rpc_id/1]).
@ -221,7 +222,8 @@
-export_type([meta/0]).
-type options() :: #{
formatter_opts => woody_event_formatter:opts()
formatter_opts => woody_event_formatter:opts(),
events_severity => #{nonempty_list(atom()) => severity()}
}.
-export_type([options/0]).
@ -379,68 +381,77 @@ format_event(UnknownEventType, Meta, _Opts) ->
{" unknown woody event type '~s' with meta ~p", [UnknownEventType, Meta]}.
-spec get_event_severity(event(), event_meta()) -> severity().
get_event_severity(?EV_CLIENT_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_END, _Meta) ->
debug;
get_event_severity(?EV_CALL_SERVICE, _Meta) ->
info;
get_event_severity(?EV_SERVICE_RESULT, #{status := ok}) ->
info;
get_event_severity(?EV_SERVICE_RESULT, #{status := error, stack := _Stack}) ->
error;
get_event_severity(?EV_SERVICE_RESULT, #{status := error, result := _Error}) ->
warning;
get_event_severity(?EV_CLIENT_SEND, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RESOLVE_RESULT, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RECEIVE, #{status := ok}) ->
debug;
get_event_severity(?EV_CLIENT_RECEIVE, #{status := error}) ->
warning;
get_event_severity(?EV_SERVER_RECEIVE, _Meta) ->
debug;
get_event_severity(?EV_SERVER_SEND, #{status := ok}) ->
debug;
get_event_severity(?EV_SERVER_SEND, #{status := error}) ->
warning;
get_event_severity(?EV_INVOKE_SERVICE_HANDLER, _Meta) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := ok}) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := business}) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, stack := _Stack}) ->
error;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, result := _Error}) ->
warning;
get_event_severity(?EV_CLIENT_CACHE_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_END, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_HIT, _Meta) ->
info;
get_event_severity(?EV_CLIENT_CACHE_MISS, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_UPDATE, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_RESULT, _Meta) ->
debug;
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, class := _Class, stack := _Stack}) ->
error;
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, reason := _Reason}) ->
warning;
get_event_severity(?EV_TRACE, _Meta) ->
debug;
get_event_severity(_UnknownEventType, _Meta) ->
warning.
get_event_severity(Event, Meta) ->
get_event_severity(Event, Meta, #{}).
-spec get_event_severity(event(), event_meta(), options()) -> severity().
get_event_severity(?EV_CLIENT_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_END = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CALL_SERVICE = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, stack := _Stack}, Opts) ->
map_severity([Event, error], Opts, error);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, result := _Error}, Opts) ->
map_severity([Event, warning], Opts, warning);
get_event_severity(?EV_CLIENT_SEND = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RESOLVE_RESULT = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := error}, Opts) ->
map_severity([Event, error], Opts, warning);
get_event_severity(?EV_SERVER_RECEIVE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_SERVER_SEND = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_SERVER_SEND = Event, #{status := error}, Opts) ->
map_severity([Event, error], Opts, warning);
get_event_severity(?EV_INVOKE_SERVICE_HANDLER = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := business}, Opts) ->
map_severity([Event, error, business], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, stack := _Stack}, Opts) ->
map_severity([Event, error, system], Opts, error);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, result := _Error}, Opts) ->
map_severity([Event, warning], Opts, warning);
get_event_severity(?EV_CLIENT_CACHE_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_END = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_HIT = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_CLIENT_CACHE_MISS = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_UPDATE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_RESULT = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, class := _Class, stack := _Stack}, Opts) ->
map_severity([Event, system], Opts, error);
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, reason := _Reason}, Opts) ->
map_severity([Event, business], Opts, warning);
get_event_severity(?EV_TRACE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(UnknownEventType, _Meta, Opts) ->
map_severity([UnknownEventType], Opts, warning).
%%
%% Internal functions
%%
-spec map_severity(nonempty_list(atom()), options(), severity()) -> severity().
map_severity(Code, Opts, Default) ->
EventsSeverity = maps:get(events_severity, Opts, #{}),
maps:get(Code, EventsSeverity, Default).
-spec format_service_request(map(), options()) -> msg().
format_service_request(#{service_schema := {Module, Service}, function := Function, args := Args}, Opts) ->
woody_event_formatter:format_call(Module, Service, Function, Args, get_formatter_opts(Opts)).
@ -1582,4 +1593,157 @@ exception_test_() ->
)
].
-spec event_severity_defaults_test_() -> _.
event_severity_defaults_test_() ->
Opts = #{events_severity => default_severity_map()},
Meta = #{role => server},
[
?_assertEqual(debug, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(error, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
?_assertEqual(info, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(
info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
),
?_assertEqual(
error,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
),
?_assertEqual(
warning,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
?_assertEqual(
error, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
),
?_assertEqual(warning, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_TRACE, Meta, Opts))
].
-spec event_severity_all_info_test_() -> _.
event_severity_all_info_test_() ->
ExpectAll = info,
Opts = #{events_severity => replace_all_with(ExpectAll, default_severity_map())},
Meta = #{role => server},
[
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(
ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
),
?_assertEqual(
ExpectAll,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
),
?_assertEqual(
ExpectAll,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
?_assertEqual(
ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
),
?_assertEqual(ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_TRACE, Meta, Opts))
].
replace_all_with(NewSeverity, SeverityMap) ->
maps:map(fun(_K, _V) -> NewSeverity end, SeverityMap).
default_severity_map() ->
#{
['client begin'] => debug,
['client end'] => debug,
['call service'] => info,
['service result'] => info,
['service result', error] => error,
['service result', warning] => warning,
['client send'] => debug,
['client resolve begin'] => debug,
['client resolve result'] => debug,
['client receive'] => debug,
['client receive', error] => warning,
['server receive'] => debug,
['server send'] => debug,
['server send', error] => warning,
['invoke service handler'] => info,
['service handler result'] => info,
['service handler result', error, business] => info,
['service handler result', error, system] => error,
['service handler result', warning] => warning,
['client cache begin'] => debug,
['client cache end'] => debug,
['client cache hit'] => info,
['client cache miss'] => debug,
['client cache update'] => debug,
['client cache result'] => debug,
['internal error', system] => error,
['internal error', business] => warning,
['trace event'] => debug
}.
-endif.

View File

@ -22,7 +22,8 @@
handle_event(Event, RpcId, Meta, Opts) ->
EHOpts = get_event_handler_opts(Opts),
{Format, Msg} = woody_event_handler:format_event(Event, Meta, RpcId, EHOpts),
Function = get_logger_function(woody_event_handler:get_event_severity(Event, Meta)),
Severity = woody_event_handler:get_event_severity(Event, Meta, Opts),
Function = get_logger_function(Severity),
_ = error_logger:Function(Format, Msg),
ok.

View File

@ -0,0 +1,100 @@
-module(woody_event_handler_otel).
-include_lib("opentelemetry_api/include/opentelemetry.hrl").
-include("woody_defs.hrl").
-behaviour(woody_event_handler).
-export([handle_event/4]).
-spec handle_event(Event, RpcId, Meta, Opts) -> ok when
Event :: woody_event_handler:event(),
RpcId :: woody:rpc_id() | undefined,
Meta :: woody_event_handler:event_meta(),
Opts :: woody:options().
-define(IS_SPAN_START(Event), Event =:= ?EV_CALL_SERVICE orelse Event =:= ?EV_INVOKE_SERVICE_HANDLER).
-define(IS_SPAN_END(Event), Event =:= ?EV_SERVICE_RESULT orelse Event =:= ?EV_SERVICE_HANDLER_RESULT).
-define(IS_CLIENT_INTERNAL(Event),
Event =:= ?EV_CLIENT_CACHE_HIT orelse
Event =:= ?EV_CLIENT_CACHE_MISS orelse
Event =:= ?EV_CLIENT_CACHE_UPDATE orelse
Event =:= ?EV_CLIENT_SEND orelse
Event =:= ?EV_CLIENT_RECEIVE
).
%% Client events
handle_event(Event, RpcID, _Meta = #{url := Url}, _Opts) when ?IS_CLIENT_INTERNAL(Event) ->
with_span(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) ->
_ = otel_span:add_event(SpanCtx, atom_to_binary(Event), #{url => Url})
end);
%% Internal error handling
handle_event(?EV_INTERNAL_ERROR, RpcID, Meta = #{error := Error, class := Class, reason := Reason}, _Opts) ->
Stacktrace = maps:get(stack, Meta, []),
Details = io_lib:format("~ts: ~ts", [Error, Reason]),
with_span(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) ->
_ = otel_span:record_exception(SpanCtx, genlib:define(Class, error), Details, Stacktrace, #{}),
otel_maybe_cleanup(Meta, SpanCtx)
end);
%% Registers span starts/ends for woody client calls and woody server function invocations.
handle_event(Event, RpcID, Meta, _Opts) when ?IS_SPAN_START(Event) ->
Tracer = opentelemetry:get_application_tracer(?MODULE),
span_start(Tracer, otel_ctx:get_current(), mk_ref(RpcID), mk_name(Meta), mk_opts(Event));
handle_event(Event, RpcID, Meta, _Opts) when ?IS_SPAN_END(Event) ->
span_end(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) ->
otel_maybe_erroneous_result(SpanCtx, Meta)
end);
handle_event(_Event, _RpcID, _Meta, _Opts) ->
ok.
%%
span_start(Tracer, Ctx, Key, SpanName, Opts) ->
SpanCtx = otel_tracer:start_span(Ctx, Tracer, SpanName, Opts),
Ctx1 = woody_util:span_stack_put(Key, SpanCtx, Ctx),
Ctx2 = otel_tracer:set_current_span(Ctx1, SpanCtx),
_ = otel_ctx:attach(Ctx2),
ok.
span_end(Ctx, Key, OnBeforeEnd) ->
case woody_util:span_stack_pop(Key, Ctx) of
{error, notfound} ->
ok;
{ok, SpanCtx, ParentSpanCtx, Ctx1} ->
SpanCtx1 = OnBeforeEnd(SpanCtx),
_ = otel_span:end_span(SpanCtx1, undefined),
Ctx2 = otel_tracer:set_current_span(Ctx1, ParentSpanCtx),
_ = otel_ctx:attach(Ctx2),
ok
end.
with_span(Ctx, Key, F) ->
SpanCtx = woody_util:span_stack_get(Key, Ctx, otel_tracer:current_span_ctx(Ctx)),
_ = F(SpanCtx),
ok.
otel_maybe_cleanup(#{final := true}, SpanCtx) ->
_ = otel_span:end_span(SpanCtx, undefined),
otel_ctx:clear(),
ok;
otel_maybe_cleanup(_Meta, _SpanCtx) ->
ok.
otel_maybe_erroneous_result(SpanCtx, Meta = #{status := error, result := Reason}) ->
Class = maps:get(except_class, Meta, error),
Stacktrace = maps:get(stack, Meta, []),
_ = otel_span:record_exception(SpanCtx, Class, Reason, Stacktrace, #{}),
SpanCtx;
otel_maybe_erroneous_result(SpanCtx, _Meta) ->
SpanCtx.
mk_opts(?EV_CALL_SERVICE) ->
#{kind => ?SPAN_KIND_CLIENT};
mk_opts(?EV_INVOKE_SERVICE_HANDLER) ->
#{kind => ?SPAN_KIND_SERVER}.
mk_ref(#{span_id := WoodySpanId}) ->
WoodySpanId.
mk_name(#{role := Role, service := Service, function := Function}) ->
woody_util:to_binary([Role, " ", Service, ":", Function]).

View File

@ -0,0 +1,134 @@
%% Translates hackney metrics for prometheus.
%% See
%% https://github.com/benoitc/hackney/tree/1.18.0?tab=readme-ov-file#metrics
%%
%% For pool metrics see dedicated collector module
%% `woody_hackney_prometheus_collector`.
-module(woody_hackney_prometheus).
-export([
new/2,
delete/1,
increment_counter/1,
increment_counter/2,
decrement_counter/1,
decrement_counter/2,
update_histogram/2,
update_gauge/2,
update_meter/2
]).
-type name() :: any().
-type metric() :: counter | histogram | gauge | meter.
%% API
-define(NB_REQUESTS, hackney_nb_requests).
-define(TOTAL_REQUESTS, hackney_total_requests).
-define(HOST_NB_REQUESTS, hackney_host_nb_requests).
-define(HOST_CONNECT_TIMEOUT, hackney_host_connect_timeout).
-define(HOST_CONNECT_ERROR, hackney_host_connect_error).
-define(HOST_NEW_CONNECTION, hackney_host_new_connection).
-define(HOST_REUSE_CONNECTION, hackney_host_reuse_connection).
%% Ignore unsupported metric
-spec new(metric(), name()) -> ok.
%% Total counters
new(counter, [hackney, nb_requests]) ->
true = prometheus_gauge:declare([
{name, ?NB_REQUESTS},
{registry, registry()},
{labels, []},
{help, "Number of running requests."}
]),
%% Per host counters, see
%% https://github.com/benoitc/hackney/tree/1.18.0?tab=readme-ov-file#metrics-per-hosts
%% NOTE Hackney won't call `metrics:new/3` for those counters
true = prometheus_gauge:declare([
{name, ?HOST_NB_REQUESTS},
{registry, registry()},
{labels, [host]},
{help, "Number of running requests."}
]),
[
true = prometheus_counter:declare([
{name, Name},
{registry, registry()},
{labels, [host]},
{help, Help}
])
|| {Name, Help} <- [
{?HOST_CONNECT_TIMEOUT, "Number of connect timeout."},
{?HOST_CONNECT_ERROR, "Number of timeout errors."},
{?HOST_NEW_CONNECTION, "Number of new pool connections per host."},
{?HOST_REUSE_CONNECTION, "Number of reused pool connections per host."}
]
],
ok;
new(counter, [hackney, total_requests]) ->
true = prometheus_counter:declare([
{name, ?TOTAL_REQUESTS},
{registry, registry()},
{labels, []},
{help, "Total number of requests."}
]),
ok;
new(_Type, _Name) ->
ok.
-spec delete(name()) -> ok.
delete(_Name) ->
ok.
-spec increment_counter(name()) -> ok.
increment_counter(Name) ->
increment_counter(Name, 1).
-spec increment_counter(name(), pos_integer()) -> ok.
increment_counter([hackney, nb_requests], Value) ->
prometheus_gauge:inc(registry(), ?NB_REQUESTS, [], Value);
increment_counter([hackney, total_requests], Value) ->
prometheus_counter:inc(registry(), ?TOTAL_REQUESTS, [], Value);
increment_counter([hackney, Host, nb_requests], Value) ->
prometheus_gauge:inc(registry(), ?HOST_NB_REQUESTS, [Host], Value);
increment_counter([hackney, Host, connect_timeout], Value) ->
prometheus_counter:inc(registry(), ?HOST_CONNECT_TIMEOUT, [Host], Value);
increment_counter([hackney, Host, connect_error], Value) ->
prometheus_counter:inc(registry(), ?HOST_CONNECT_ERROR, [Host], Value);
increment_counter([hackney_pool, Host, new_connection], Value) ->
prometheus_counter:inc(registry(), ?HOST_NEW_CONNECTION, [Host], Value);
increment_counter([hackney_pool, Host, reuse_connection], Value) ->
prometheus_counter:inc(registry(), ?HOST_REUSE_CONNECTION, [Host], Value);
increment_counter(_Name, _Value) ->
ok.
-spec decrement_counter(name()) -> ok.
decrement_counter(Name) ->
decrement_counter(Name, 1).
-spec decrement_counter(name(), pos_integer()) -> ok.
decrement_counter([hackney, nb_requests], Value) ->
prometheus_gauge:dec(registry(), ?NB_REQUESTS, [], Value);
decrement_counter([hackney, Host, nb_requests], Value) ->
prometheus_gauge:dec(registry(), ?HOST_NB_REQUESTS, [Host], Value);
decrement_counter(_Name, _Value) ->
ok.
-spec update_histogram(name(), fun(() -> ok) | number()) -> ok.
update_histogram(_Name, Fun) when is_function(Fun, 0) ->
Fun();
update_histogram(_Name, _Value) ->
ok.
-spec update_gauge(name(), number()) -> ok.
update_gauge(_Name, _Value) ->
ok.
-spec update_meter(name(), number()) -> ok.
update_meter(_Name, _Value) ->
ok.
%%
registry() ->
default.

View File

@ -0,0 +1,84 @@
-module(woody_hackney_prometheus_collector).
-export([setup/0]).
%%
-behaviour(prometheus_collector).
-export([collect_mf/2]).
-export([collect_metrics/2]).
-export([deregister_cleanup/1]).
-define(POOL_USAGE, woody_hackney_pool_usage).
%% Installation
%% @doc Installs custom collector for hackney's pool metrics
-spec setup() -> ok.
setup() ->
prometheus_registry:register_collector(registry(), ?MODULE).
%% Collector API
-type data() :: [data_item()].
-type data_item() :: {data_labels(), non_neg_integer()}.
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
-type pool_stats() :: [{atom(), any()}].
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
collect_mf(_Registry, Callback) ->
F = fun({Pool, _Pid}) ->
make_pool_data(Pool, get_pool_stats(Pool))
end,
Data = lists:flatten(lists:map(F, get_hackney_pools())),
_ = Callback(create_gauge(Data)),
ok.
-spec collect_metrics(prometheus_metric:name(), data()) ->
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
collect_metrics(_Name, Data) ->
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
deregister_cleanup(_Registry) ->
%% Nothing to clean up
ok.
%% Private
registry() ->
default.
get_pool_stats(Pool) ->
%% NOTE It looks like 'hackney_pool' table data can occasionally contain
%% dead pools
try
hackney_pool:get_stats(Pool)
catch
%% "Time to make the chimi-fuckin'-changas."
exit:{noproc, _Reason} ->
[]
end.
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
create_gauge(Data) ->
Help = "Connection pool status by used, free and queued connections count",
prometheus_model_helpers:create_mf(?POOL_USAGE, Help, gauge, ?MODULE, Data).
-spec make_pool_data(atom(), pool_stats()) -> data().
make_pool_data(Pool, Stats0) ->
Stats1 = maps:with([in_use_count, free_count, queue_count], maps:from_list(Stats0)),
lists:foldl(fun({S, V}, Data) -> [make_data_item(Pool, S, V) | Data] end, [], maps:to_list(Stats1)).
make_data_item(Pool, Status, Value) ->
Labels = [{pool, Pool}, {status, Status}],
{Labels, Value}.
get_hackney_pools() ->
%% Shamelessly pasted from
%% https://github.com/soundtrackyourbrand/prometheus-hackney-collector
%%
%% Basically, we rely on not publicly exposed table containing started pools
%% under hackney's hood.
ets:tab2list(hackney_pool).

View File

@ -0,0 +1,84 @@
-module(woody_ranch_prometheus_collector).
-export([setup/0]).
%%
-behaviour(prometheus_collector).
-export([collect_mf/2]).
-export([collect_metrics/2]).
-export([deregister_cleanup/1]).
-define(CONNECTIONS, woody_ranch_listener_connections).
%% Installation
-spec setup() -> ok.
setup() ->
prometheus_registry:register_collector(registry(), ?MODULE).
%% Collector API
-type data() :: [data_item()].
-type data_item() :: {data_labels(), non_neg_integer()}.
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
-type maybe_woody_server_ref() :: {module(), ID :: atom()} | ranch:ref().
-type ranch_info() ::
[{maybe_woody_server_ref(), [{atom(), any()}]}]
| #{maybe_woody_server_ref() => #{atom() => any()}}.
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
collect_mf(_Registry, Callback) ->
F = fun({ListenerRef, ListenerInfo}) ->
make_listener_data(ListenerRef, ListenerInfo)
end,
Data = lists:flatten(lists:map(F, get_listeners_info())),
_ = Callback(create_gauge(Data)),
ok.
-spec collect_metrics(prometheus_metric:name(), data()) ->
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
collect_metrics(_Name, Data) ->
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
deregister_cleanup(_Registry) ->
%% Nothing to clean up
ok.
%% Private
registry() ->
default.
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
create_gauge(Data) ->
Help = "Number of active connections",
prometheus_model_helpers:create_mf(?CONNECTIONS, Help, gauge, ?MODULE, Data).
-spec make_listener_data(maybe_woody_server_ref(), #{atom() => any()}) -> data().
make_listener_data(Ref, #{active_connections := V}) ->
Labels = [{listener, Ref}],
[{Labels, V}];
make_listener_data(_Ref, _Info) ->
[].
get_listeners_info() ->
lists:filter(
fun
({_Ref, #{active_connections := _}}) -> true;
(_Else) -> false
end,
%% See https://ninenines.eu/docs/en/ranch/1.8/guide/listeners/#_obtaining_information_about_listeners
normalize_listeners_info(ranch:info())
).
-dialyzer({no_match, normalize_listeners_info/1}).
-spec normalize_listeners_info(ranch_info()) -> [{maybe_woody_server_ref(), #{atom() => any()}}].
%% Ranch v2
normalize_listeners_info(#{} = Info) ->
maps:to_list(Info);
%% Ranch v1
normalize_listeners_info(Info) ->
lists:map(fun({Ref, ListenerInfo}) -> {Ref, maps:from_list(ListenerInfo)} end, Info).

View File

@ -393,6 +393,7 @@ check_deadline(Deadline, Req, State = #{url := Url, woody_state := WoodyState})
-spec check_metadata_headers(woody:http_headers(), cowboy_req:req(), state()) -> check_result().
check_metadata_headers(Headers, Req, State = #{woody_state := WoodyState, regexp_meta := ReMeta}) ->
_OtelCtx = otel_propagator_text_map:extract(maps:to_list(Headers)),
WoodyState1 = set_metadata(find_metadata(Headers, ReMeta), WoodyState),
{ok, Req, update_woody_state(State, WoodyState1, Req)}.

View File

@ -407,6 +407,7 @@ check_deadline(Deadline, Req, State = #{url := Url, woody_state := WoodyState})
-spec check_metadata_headers(woody:http_headers(), cowboy_req:req(), state()) -> check_result().
check_metadata_headers(Headers, Req, State = #{woody_state := WoodyState, regexp_meta := ReMeta}) ->
_OtelCtx = otel_propagator_text_map:extract(maps:to_list(Headers)),
WoodyState1 = set_metadata(find_metadata(Headers, ReMeta), WoodyState),
{ok, Req, update_woody_state(State, WoodyState1, Req)}.

View File

@ -11,6 +11,12 @@
-define(DEFAULT_HANDLER_OPTS, undefined).
%%
-export([span_stack_put/3]).
-export([span_stack_get/3]).
-export([span_stack_pop/2]).
%%
%% Internal API
%%
@ -51,3 +57,41 @@ get_rpc_type({Module, Service}, Function) ->
-spec get_rpc_reply_type(_ThriftReplyType) -> woody:rpc_type().
get_rpc_reply_type(oneway_void) -> cast;
get_rpc_reply_type(_) -> call.
%% OTEL context span helpers
%% NOTE Those helpers are designed specifically to manage stacking spans during
%% woody client (or server) calls _inside_ one single process context.
%% Thus, use of process dictionary via `otel_ctx'.
-define(OTEL_SPANS_STACK, 'spans_ctx_stack').
-type span_key() :: atom() | binary() | string().
-type maybe_span_ctx() :: opentelemetry:span_ctx() | undefined.
-spec span_stack_put(span_key(), opentelemetry:span_ctx(), otel_ctx:t()) -> otel_ctx:t().
span_stack_put(Key, SpanCtx, Context) ->
Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []),
Entry = {Key, SpanCtx, otel_tracer:current_span_ctx(Context)},
otel_ctx:set_value(Context, ?OTEL_SPANS_STACK, [Entry | Stack]).
-spec span_stack_get(span_key(), otel_ctx:t(), maybe_span_ctx()) -> maybe_span_ctx().
span_stack_get(Key, Context, Default) ->
Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []),
case lists:keyfind(Key, 1, Stack) of
false ->
Default;
{_Key, SpanCtx, _ParentSpanCtx} ->
SpanCtx
end.
-spec span_stack_pop(span_key(), otel_ctx:t()) ->
{ok, opentelemetry:span_ctx(), maybe_span_ctx(), otel_ctx:t()} | {error, notfound}.
span_stack_pop(Key, Context) ->
Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []),
case lists:keytake(Key, 1, Stack) of
false ->
{error, notfound};
{value, {_Key, SpanCtx, ParentSpanCtx}, Stack1} ->
Context1 = otel_ctx:set_value(Context, ?OTEL_SPANS_STACK, Stack1),
{ok, SpanCtx, ParentSpanCtx, Context1}
end.

View File

@ -0,0 +1,97 @@
-module(woody_ct_otel_collector).
-behaviour(gen_server).
-export([
start_link/0,
get_trace/1,
get_traces/0
]).
-export([
init/1,
handle_call/3,
handle_cast/2,
handle_info/2
]).
-include_lib("opentelemetry/include/otel_span.hrl").
-type span() :: #span{}.
-type span_node() :: #{span := span(), children := [span_node()]}.
-type trace() :: #{
id := opentelemetry:trace_id(),
node := span_node()
}.
%
-spec start_link() -> {ok, pid()}.
start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-spec get_trace(opentelemetry:trace_id()) -> {ok, trace()} | {error, notfound}.
get_trace(TraceId) ->
gen_server:call(?MODULE, {trace, TraceId}).
-spec get_traces() -> {ok, [trace()]}.
get_traces() ->
gen_server:call(?MODULE, traces).
-spec init(_) -> {ok, _}.
init(_Opts) ->
{ok, #{}}.
-spec handle_info(_, T) -> {noreply, T}.
handle_info({span, Span}, State0) ->
State1 = maps:update_with(Span#span.trace_id, fun(V) -> [Span | V] end, [Span], State0),
{noreply, State1};
handle_info(_Msg, State) ->
{noreply, State}.
-spec handle_call(_, _, T) -> {noreply, T}.
handle_call(traces, _From, State) ->
Result = maps:map(fun(TraceId, Spans) -> build_trace(TraceId, Spans) end, State),
{reply, maps:values(Result), State};
handle_call({trace, TraceId}, _From, State) ->
Result =
case maps:get(TraceId, State, undefined) of
undefined -> {error, notfound};
Spans -> {ok, build_trace(TraceId, Spans)}
end,
{reply, Result, State};
handle_call(_Msg, _From, State) ->
{noreply, State}.
-spec handle_cast(_, T) -> {noreply, T}.
handle_cast(_Msg, State) ->
{noreply, State}.
%
build_trace(TraceId, Spans0) ->
Spans1 = lists:sort(fun(#span{start_time = A}, #span{start_time = B}) -> A < B end, Spans0),
[RootSpan | _] = lists:filter(
fun
(#span{parent_span_id = undefined}) -> true;
(_) -> false
end,
Spans1
),
#{
id => TraceId,
node => lists:foldl(fun(Span, RootNode) -> update_node(Span, RootNode) end, new_span_node(RootSpan), Spans1)
}.
update_node(
Span = #span{parent_span_id = ParentId},
SpanNode = #{span := #span{span_id = ParentId}, children := Children}
) ->
SpanNode#{children => [new_span_node(Span) | Children]};
update_node(Span, SpanNode = #{children := Children}) ->
SpanNode#{children => lists:map(fun(Child) -> update_node(Span, Child) end, Children)}.
new_span_node(Span) ->
#{span => Span, children => []}.

View File

@ -3,6 +3,9 @@
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-include_lib("hackney/include/hackney_lib.hrl").
-include_lib("opentelemetry_api/include/opentelemetry.hrl").
-include_lib("opentelemetry/include/otel_span.hrl").
-include_lib("prometheus/include/prometheus_model.hrl").
-include("woody_test_thrift.hrl").
-include("woody_defs.hrl").
@ -111,6 +114,11 @@
-define(WEAPON_STACK_OVERFLOW, pos_out_of_boundaries).
-define(BAD_POWERUP_REPLY, powerup_unknown).
-define(OTEL_SPAN(Name, Children), #{span := #span{name = Name}, children := Children}).
-define(OTEL_SPAN(Name, SpanAttributes, Children), #{
span := #span{name = Name, attributes = SpanAttributes}, children := Children
}).
-type config() :: [{atom(), any()}].
-type case_name() :: atom().
-type group_name() :: atom().
@ -273,6 +281,8 @@ cross_test_groups() ->
}
}.
-define(RANCH_REF, woody_ct).
%%
%% starting/stopping
%%
@ -280,20 +290,13 @@ init_per_suite(C) ->
% dbg:tracer(), dbg:p(all, c),
% dbg:tpl({woody_joint_workers, '_', '_'}, x),
%%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]),
application:set_env(hackney, mod_metrics, woody_client_metrics),
application:set_env(woody, woody_client_metrics_options, #{
metric_key_mapping => #{
[hackney, nb_requests] => [hackney, requests_in_process]
}
}),
application:set_env(how_are_you, metrics_handlers, [
{woody_api_hay, #{
interval => 1000
}}
]),
ok = application:set_env(hackney, mod_metrics, woody_hackney_prometheus),
{ok, MetricsApps} = application:ensure_all_started(prometheus),
{ok, Apps} = application:ensure_all_started(woody),
{ok, HayApps} = application:ensure_all_started(how_are_you),
[{apps, HayApps ++ Apps} | C].
ok = woody_ranch_prometheus_collector:setup(),
ok = woody_hackney_prometheus_collector:setup(),
{ok, OtelApps} = setup_opentelemetry(),
[{apps, OtelApps ++ MetricsApps ++ Apps} | C].
end_per_suite(C) ->
% unset so it won't report metrics next suite
@ -326,21 +329,21 @@ init_per_testcase(find_multiple_pools_test, C) ->
{ok, Sup} = start_tc_sup(),
Pool1 = {swords, 15000, 100},
Pool2 = {shields, undefined, 50},
ok = start_woody_server_with_pools(woody_ct, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
ok = start_woody_server_with_pools(?RANCH_REF, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
[{sup, Sup} | C];
init_per_testcase(calls_with_cache, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = start_caching_client(caching_client_ct, Sup),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
[{sup, Sup} | C];
init_per_testcase(server_handled_client_timeout_test, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = supervisor:start_child(Sup, server_timeout_event_handler:child_spec()),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
[{sup, Sup} | C];
init_per_testcase(_, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
[{sup, Sup} | C].
init_per_group(woody_resolver, Config) ->
@ -352,6 +355,20 @@ init_per_group(Name, Config) ->
end_per_group(_Name, _Config) ->
ok.
setup_opentelemetry() ->
ok = application:set_env([
{opentelemetry, [
{text_map_propagators, [
baggage,
trace_context
]},
{span_processor, simple},
{traces_exporter, {otel_exporter_pid, woody_ct_otel_collector}}
]}
]),
ok = application:start(opentelemetry),
{ok, [opentelemetry]}.
start_tc_sup() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
@ -359,7 +376,7 @@ start_error_server(TC, Sup) ->
Code = get_fail_code(TC),
Dispatch = cowboy_router:compile([{'_', [{?PATH_WEAPONS, ?MODULE, Code}]}]),
Server = ranch:child_spec(
woody_ct,
?RANCH_REF,
ranch_tcp,
[{ip, ?SERVER_IP}, {port, ?SERVER_PORT}],
cowboy_clear,
@ -673,8 +690,37 @@ call_pass_thru_ok_test(C) ->
Armor = <<"AntiGrav Boots">>,
Context = make_context(<<"call_pass_thru_ok">>),
Expect = {ok, genlib_map:get(Armor, powerups())},
Baggage = #{<<"service">> => <<"Powerups">>},
SpanName = <<"test Powerups:proxy_get_powerup">>,
%% NOTE: `otel_baggage` uses `otel_ctx` that relies on __process dictionary__
ok = otel_baggage:set(Baggage),
Tracer = opentelemetry:get_application_tracer(?MODULE),
%% NOTE: Starts span and sets its context as current span in __process dictionary__
SpanCtx = otel_tracer:set_current_span(otel_tracer:start_span(Tracer, SpanName, #{})),
Expect = call(Context, 'Powerups', proxy_get_powerup, {Armor, self_to_bin()}, C),
{ok, _} = receive_msg(Armor, Context).
%% NOTE: Ends span and puts context into __process dictionary__
_ = otel_tracer:set_current_span(otel_span:end_span(SpanCtx, undefined)),
{ok, _} = receive_msg(Armor, Context),
{ok, #{node := Root}} = woody_ct_otel_collector:get_trace(SpanCtx#span_ctx.trace_id),
%% Prepare otel attributes to match against
ProxyAttrs = mk_otel_attributes(Baggage),
Attrs = mk_otel_attributes(Baggage#{<<"proxied">> => <<"true">>}),
?assertMatch(
?OTEL_SPAN(SpanName, [
?OTEL_SPAN(<<"client Powerups:proxy_get_powerup">>, [
%% Upon invocation event baggage is expected to be put in span attributes
?OTEL_SPAN(<<"server Powerups:proxy_get_powerup">>, ProxyAttrs, [
%% New client starts call here
?OTEL_SPAN(<<"client Powerups:get_powerup">>, [
?OTEL_SPAN(<<"server Powerups:get_powerup">>, Attrs, [
%% Expect no child spans uphere
])
])
])
])
]),
Root
).
call_pass_thru_except_test(C) ->
Armor = <<"Shield Belt">>,
@ -775,7 +821,18 @@ call_fail_w_no_headers(Id, Class, _Code) ->
find_multiple_pools_test(_) ->
true = is_pid(hackney_pool:find_pool(swords)),
true = is_pid(hackney_pool:find_pool(shields)).
true = is_pid(hackney_pool:find_pool(shields)),
MF = smuggle_mf_return_value(fun(F) -> woody_hackney_prometheus_collector:collect_mf(default, F) end),
%% We can't know order and values from hackney's pool info, but we can
%% expect that values for those pools must be provided. However exact number
%% of values can vary based on order of testcase execution and other
%% side-effects.
ValuesCount = length([swords, shields]) * length([queue_count, in_use_count, free_count]),
?assertMatch(
#'MetricFamily'{type = 'GAUGE', name = <<"woody_hackney_pool_usage">>, metric = Values} when
length(Values) >= ValuesCount,
MF
).
call_thrift_multiplexed_test(_) ->
Client = make_thrift_multiplexed_client(
@ -985,7 +1042,7 @@ init(_) ->
{ok,
{
{one_for_one, 1, 1},
[]
[#{id => woody_ct_otel_collector, start => {woody_ct_otel_collector, start_link, []}}]
}}.
%%
@ -1015,6 +1072,8 @@ handle_function(ProxyGetPowerup, {Name, To}, Context, _Opts) when
ProxyGetPowerup =:= proxy_get_powerup orelse
ProxyGetPowerup =:= bad_proxy_get_powerup
->
%% NOTE: Merges baggage, requires #{binary() => binary()}
ok = otel_baggage:set(#{<<"proxied">> => <<"true">>}),
% NOTE
% Client may return `{exception, _}` tuple with some business level exception
% here, yet handler expects us to `throw/1` them. This is expected here it
@ -1054,6 +1113,12 @@ handle_event(
->
_ = handle_proxy_event(Event, Code, TraceId, ParentId),
log_event(Event, RpcId, Meta);
%% Handle invocation
handle_event(Event = ?EV_INVOKE_SERVICE_HANDLER, RpcId, Meta, _) ->
log_event(Event, RpcId, Meta),
SpanCtx = otel_tracer:current_span_ctx(),
_ = otel_span:set_attributes(SpanCtx, maps:map(fun(_Key, {Value, _Metadata}) -> Value end, otel_baggage:get_all())),
ok;
handle_event(Event, RpcId, Meta, _) ->
log_event(Event, RpcId, Meta).
@ -1084,6 +1149,7 @@ handle_proxy_event(Event, Code, Descr) ->
erlang:error(badarg, [Event, Code, Descr]).
log_event(Event, RpcId, Meta) ->
ok = woody_event_handler_otel:handle_event(Event, RpcId, Meta, []),
woody_ct_event_h:handle_event(Event, RpcId, Meta, []).
%%
@ -1169,6 +1235,7 @@ gun_test_basic(Id, Gun, Expect, C) ->
catch
Class:Reason -> ok
end,
ok = assert_connections_metrics(),
check_msg(Gun, Context).
get_except({ok, _}) ->
@ -1285,3 +1352,50 @@ handle_sleep(Context) ->
BinTimer ->
timer:sleep(binary_to_integer(BinTimer))
end.
mk_otel_attributes(Attributes) ->
SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(),
SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(),
otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit).
assert_connections_metrics() ->
MF = smuggle_mf_return_value(fun(F) -> woody_ranch_prometheus_collector:collect_mf(default, F) end),
%% Sadly we can't match on listener ref since its an iodata representation of tuple.
?assertMatch(
#'MetricFamily'{
name = <<"woody_ranch_listener_connections">>,
type = 'GAUGE',
metric = [
#'Metric'{
label = [#'LabelPair'{name = <<"listener">>, value = _Ref}],
gauge = #'Gauge'{value = V}
}
]
} when V >= 1,
MF
),
%% NOTE See `prometheus_model_helpers:ensure_binary_or_string/1`
RefImpl1 = io_lib:format("~p", [{woody_server_thrift_http_handler, ?RANCH_REF}]),
RefImpl2 = io_lib:format("~p", [{woody_server_thrift_v2, ?RANCH_REF}]),
?assertMatch(
Expected when Expected =:= RefImpl1 orelse Expected =:= RefImpl2,
get_metric_ref(hd(MF#'MetricFamily'.metric))
).
get_metric_ref(#'Metric'{label = Labels}) ->
get_metric_ref_(Labels).
get_metric_ref_([]) ->
undefined;
get_metric_ref_([#'LabelPair'{name = <<"listener">>, value = Ref} | _]) ->
Ref;
get_metric_ref_([_Label | Labels]) ->
get_metric_ref_(Labels).
%% Collector module expects 'ok' return. Because of that dialyzer won't quit bitching.
smuggle_mf_return_value(Fun) ->
_ = Fun(fun(MF) ->
_ = erlang:put(smuggle, MF),
ok
end),
erlang:get(smuggle).