Merge branch 'master' into compat/woody_ex

This commit is contained in:
Aleksey Kashapov 2024-05-17 13:45:54 +03:00
commit c4f6b9c62c
No known key found for this signature in database
GPG Key ID: E7A24E92707DA3D0
21 changed files with 1098 additions and 94 deletions

46
.github/settings.yml vendored Normal file
View File

@ -0,0 +1,46 @@
# These settings are synced to GitHub by https://probot.github.io/apps/settings/
_extends: .github
teams:
- name: devs
permission: push
- name: bots
permission: push
- name: maintainers
permission: push
- name: erlang
permission: maintain
branches:
- name: "master"
# https://developer.github.com/v3/repos/branches/#update-branch-protection
# Branch Protection settings. Set to null to disable
protection:
required_pull_request_reviews:
required_approving_review_count: 1
dismiss_stale_reviews: false
require_code_owner_reviews: true
dismissal_restrictions: {}
required_status_checks:
strict: true
checks:
- context: "Load .env"
app_id: 15368
- context: "Run checks / Build"
app_id: 15368
- context: "Run checks / Check"
app_id: 15368
- context: "Run checks / Dialyze"
app_id: 15368
- context: "Run checks / Test"
app_id: 15368
- context: "codecov/patch"
app_id: 254
- context: "codecov/project"
app_id: 254
enforce_admins: false
required_conversation_resolution: false
required_linear_history: true
restrictions: null
allow_force_pushes: false
allow_deletions: false

10
.github/workflows/basic-linters.yml vendored Normal file
View File

@ -0,0 +1,10 @@
name: Vality basic linters
on:
pull_request:
branches:
- "*"
jobs:
lint:
uses: valitydev/base-workflows/.github/workflows/basic-linters.yml@v1

View File

@ -18,7 +18,7 @@ jobs:
thrift-version: ${{ steps.thrift-version.outputs.version }}
steps:
- name: Checkout repository
uses: actions/checkout@v2
uses: actions/checkout@v3
- run: grep -v '^#' .env >> $GITHUB_ENV
- id: otp-version
run: echo "::set-output name=version::$OTP_VERSION"
@ -30,9 +30,10 @@ jobs:
run:
name: Run checks
needs: setup
uses: valitydev/erlang-workflows/.github/workflows/erlang-parallel-build.yml@v1.0.9
uses: valitydev/erlang-workflows/.github/workflows/erlang-parallel-build.yml@v1.0.10
with:
otp-version: ${{ needs.setup.outputs.otp-version }}
rebar-version: ${{ needs.setup.outputs.rebar-version }}
use-thrift: true
thrift-version: ${{ needs.setup.outputs.thrift-version }}
upload-coverage: false

View File

@ -173,4 +173,4 @@
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
END OF TERMS AND CONDITIONS

107
README.md
View File

@ -168,6 +168,80 @@ check_loan_limits(Limits, Context, N) ->
### Woody Event Handler
Интерфейс для получения и логирования событий RPC библиотеки. Также содержит вспомогательные функции для удобного форматирования событий. Пример реализации _event handler_'а - [woody_event_handler_default.erl](src/woody_event_handler_default.erl).
Через опции обработчика можно сообщить параметры соответствия событий RPC для уровня логирования:
``` erlang
woody_event_handler_default:handle_event(Event, RpcId, Meta, #{
formatter_opts => ...,
events_severity => #{
['call service'] => debug,
...
}
}).
```
Где эти параметры имеют значения по умолчанию в следующем виде:
``` erlang
#{
events_severity => #{
%% Пограничные события работы клиента
['client begin'] => debug,
['client end'] => debug,
%% Начало вызова сервиса, перед формированием запроса
['call service'] => info,
%% Результат вызова сервиса на клиенте
['service result'] => info,
['service result', error] => error,
%% Событие состоявшегося вызова с возвращённой ошибкой в качестве
%% результата
['service result', warning] => warning,
%% Клиентские события, включая обнаружения хоста
['client send'] => debug,
['client resolve begin'] => debug,
['client resolve result'] => debug,
['client receive'] => debug,
['client receive', error] => warning,
%% Непосредственные события обслуживания запроса сервером
['server receive'] => debug,
['server send'] => debug,
['server send', error] => warning,
%% Начало обслуживания вызова функции сервиса
['invoke service handler'] => info,
%% Завершение обслуживание вызова с разным итогом
['service handler result'] => info,
['service handler result', error, business] => info,
['service handler result', error, system] => error,
%% Обслуживание вызова завершилось поправимой ошибкой;
%% по крайней мере она не в рамках бизнес-логики но и не системное
%% исключение
['service handler result', warning] => warning,
%% События кеширующей обертки клиента
['client cache begin'] => debug,
['client cache end'] => debug,
['client cache hit'] => info,
['client cache miss'] => debug,
['client cache update'] => debug,
['client cache result'] => debug,
%% Внутренние ошибки с разным контекстом/происхождением
['internal error', system] => error,
['internal error', business] => warning,
%% Событие трассировки на уровне woody, см. далее
['trace event'] => debug
}
}.
```
### Tracing
Можно динамически включать и выключать трассировку http запросов и ответов.
@ -176,3 +250,36 @@ check_loan_limits(Limits, Context, N) ->
application:set_env(woody, trace_http_server, true).
application:unset_env(woody, trace_http_server).
```
### Prometheus metrics
Чтобы осуществлять экспорт метрик следует добавить [соответствующий хэндлер](https://github.com/deadtrickster/prometheus-cowboy#exporting-metrics-with-handlers) для cowboy-сервера.
``` erlang
{deps, [
...
{prometheus_cowboy, "0.1.8"}
]}
```
Для сбора серверных метрик необходимо на старте приложения объявить их
``` erlang
ok = woody_ranch_prometheus_collector:setup()
```
Если дополнительно интересуют все метрики ковбоя то можно добавить [реализацию обсервера из библиотеки](https://github.com/deadtrickster/prometheus-cowboy?tab=readme-ov-file#exporting-cowboy2-metrics).
Для сбора клиентских метрик необходимо на старте приложения объявить их
``` erlang
ok = woody_hackney_prometheus_collector:setup()
```
Это будет публиковать целочисленные значения в шкале 'woody_hackney_pool_usage' с метками `pool` в качестве названия пула и `status` в качестве параметра соответствующего значения:
- `in_use_count` -- используемые соединения в пуле;
- `free_count` -- свободное количество в пуле;
- `queue_count` -- очередь за свободными соединенеиями
**TODO** Возможно стоит рассмотреть публикацию метрик по количеству исполняемых запросов в общем, с разбивкой по хосту и количества новых и переиспользуемых соедининий в каждом из пулов. [Хакни это предоставляет](https://github.com/benoitc/hackney/tree/1.18.0#metrics).

View File

@ -31,7 +31,9 @@
{gproc, "0.9.0"},
{cache, "2.3.3"},
{snowflake, {git, "https://github.com/valitydev/snowflake.git", {branch, "master"}}},
{genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}}
{genlib, {git, "https://github.com/valitydev/genlib.git", {branch, "master"}}},
{prometheus, "4.8.1"},
{opentelemetry_api, "1.2.1"}
]}.
%% XRef checks
@ -102,17 +104,15 @@
{thrift,
{git, "https://github.com/valitydev/thrift_erlang.git",
{ref, "3f3e11246d90aefa8f58b35e4f2eab14c0c28bd2"}}},
{woody_api_hay,
{git, "https://github.com/valitydev/woody_api_hay.git",
{ref, "4c39134cddaa9bf6fb8db18e7030ae64f1efb3a9"}}},
{damsel,
{git, "https://github.com/valitydev/damsel.git", {ref, "3fa6f31db54b2ae781b27898ab4daf56bb36eb36"}}},
{mg_proto,
{git, "https://github.com/valitydev/machinegun-proto.git",
{ref, "ebae56fe2b3e79e4eb34afc8cb55c9012ae989f8"}}}
{ref, "ebae56fe2b3e79e4eb34afc8cb55c9012ae989f8"}}},
{opentelemetry, "1.3.0"}
]},
{dialyzer, [
{plt_extra_apps, [thrift, how_are_you, eunit, proper, common_test, cth_readable]}
{plt_extra_apps, [thrift, prometheus, eunit, proper, common_test, cth_readable, opentelemetry]}
]}
]}
]}.

View File

@ -12,7 +12,13 @@
{<<"idna">>,{pkg,<<"idna">>,<<"6.1.1">>},1},
{<<"metrics">>,{pkg,<<"metrics">>,<<"1.0.1">>},1},
{<<"mimerl">>,{pkg,<<"mimerl">>,<<"1.2.0">>},1},
{<<"opentelemetry_api">>,{pkg,<<"opentelemetry_api">>,<<"1.2.1">>},0},
{<<"opentelemetry_semantic_conventions">>,
{pkg,<<"opentelemetry_semantic_conventions">>,<<"0.2.0">>},
1},
{<<"parse_trans">>,{pkg,<<"parse_trans">>,<<"3.3.1">>},1},
{<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0},
{<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1},
{<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},1},
{<<"snowflake">>,
{git,"https://github.com/valitydev/snowflake.git",
@ -31,7 +37,11 @@
{<<"idna">>, <<"8A63070E9F7D0C62EB9D9FCB360A7DE382448200FBBD1B106CC96D3D8099DF8D">>},
{<<"metrics">>, <<"25F094DEA2CDA98213CECC3AEFF09E940299D950904393B2A29D191C346A8486">>},
{<<"mimerl">>, <<"67E2D3F571088D5CFD3E550C383094B47159F3EEE8FFA08E64106CDF5E981BE3">>},
{<<"opentelemetry_api">>, <<"7B69ED4F40025C005DE0B74FCE8C0549625D59CB4DF12D15C32FE6DC5076FF42">>},
{<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>},
{<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>},
{<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>},
{<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>},
{<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>},
{<<"ssl_verify_fun">>, <<"CF344F5692C82D2CD7554F5EC8FD961548D4FD09E7D22F5B62482E5AEAEBD4B0">>},
{<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]},
@ -45,7 +55,11 @@
{<<"idna">>, <<"92376EB7894412ED19AC475E4A86F7B413C1B9FBB5BD16DCCD57934157944CEA">>},
{<<"metrics">>, <<"69B09ADDDC4F74A40716AE54D140F93BEB0FB8978D8636EADED0C31B6F099F16">>},
{<<"mimerl">>, <<"F278585650AA581986264638EBF698F8BB19DF297F66AD91B18910DFC6E19323">>},
{<<"opentelemetry_api">>, <<"6D7A27B7CAD2AD69A09CABF6670514CAFCEC717C8441BEB5C96322BAC3D05350">>},
{<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>},
{<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>},
{<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>},
{<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>},
{<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>},
{<<"ssl_verify_fun">>, <<"BDB0D2471F453C88FF3908E7686F86F9BE327D065CC1EC16FA4540197EA04680">>},
{<<"unicode_util_compat">>, <<"25EEE6D67DF61960CF6A794239566599B09E17E668D3700247BC498638152521">>}]}

View File

@ -10,7 +10,8 @@
cowboy,
hackney,
gproc,
cache
cache,
opentelemetry_api
]},
{optional_applications, [
thrift

View File

@ -128,7 +128,7 @@ send(Url, Body, Options, ResOpts, WoodyState) ->
Headers = add_host_header(OldUrl, make_woody_headers(Context)),
Options1 = set_defaults(Options),
Options2 = set_timeouts(Options1, Context),
HeaderList = maps:to_list(Headers),
HeaderList = otel_propagator_text_map:inject(maps:to_list(Headers)),
Result = hackney:request(post, NewUrl, HeaderList, Body, maps:to_list(Options2)),
transform_request_results(Result);
{error, Reason} ->

View File

@ -155,7 +155,8 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) ->
% reusing keep-alive connections to dead hosts
case woody_resolver:resolve_url(Url, WoodyState, ResolverOpts) of
{ok, {OldUrl, NewUrl}, ConnectOpts} ->
Headers = add_host_header(OldUrl, make_woody_headers(Context)),
Headers0 = add_host_header(OldUrl, make_woody_headers(Context)),
Headers1 = otel_propagator_text_map:inject(Headers0),
TransportOpts1 = set_defaults(TransportOpts),
TransportOpts2 = set_timeouts(TransportOpts1, Context),
% NOTE
@ -163,7 +164,7 @@ send_call(Buffer, #{url := Url} = Opts, WoodyState) ->
% `set_tls_overrides/2`.
TransportOpts3 = append_connect_opts(TransportOpts2, ConnectOpts),
TransportOpts4 = set_tls_overrides(TransportOpts3, OldUrl),
Result = hackney:request(post, NewUrl, Headers, Buffer, maps:to_list(TransportOpts4)),
Result = hackney:request(post, NewUrl, Headers1, Buffer, maps:to_list(TransportOpts4)),
handle_response(Result, WoodyState);
{error, Reason} ->
Error = {error, {resolve_failed, Reason}},

View File

@ -4,6 +4,7 @@
-export([handle_event/3, handle_event/4]).
-export([get_event_severity/2]).
-export([get_event_severity/3]).
-export([format_event/3, format_event/4]).
-export([format_meta/2, format_meta/3]).
-export([format_rpc_id/1]).
@ -221,7 +222,8 @@
-export_type([meta/0]).
-type options() :: #{
formatter_opts => woody_event_formatter:opts()
formatter_opts => woody_event_formatter:opts(),
events_severity => #{nonempty_list(atom()) => severity()}
}.
-export_type([options/0]).
@ -379,68 +381,77 @@ format_event(UnknownEventType, Meta, _Opts) ->
{" unknown woody event type '~s' with meta ~p", [UnknownEventType, Meta]}.
-spec get_event_severity(event(), event_meta()) -> severity().
get_event_severity(?EV_CLIENT_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_END, _Meta) ->
debug;
get_event_severity(?EV_CALL_SERVICE, _Meta) ->
info;
get_event_severity(?EV_SERVICE_RESULT, #{status := ok}) ->
info;
get_event_severity(?EV_SERVICE_RESULT, #{status := error, stack := _Stack}) ->
error;
get_event_severity(?EV_SERVICE_RESULT, #{status := error, result := _Error}) ->
warning;
get_event_severity(?EV_CLIENT_SEND, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RESOLVE_RESULT, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_RECEIVE, #{status := ok}) ->
debug;
get_event_severity(?EV_CLIENT_RECEIVE, #{status := error}) ->
warning;
get_event_severity(?EV_SERVER_RECEIVE, _Meta) ->
debug;
get_event_severity(?EV_SERVER_SEND, #{status := ok}) ->
debug;
get_event_severity(?EV_SERVER_SEND, #{status := error}) ->
warning;
get_event_severity(?EV_INVOKE_SERVICE_HANDLER, _Meta) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := ok}) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := business}) ->
info;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, stack := _Stack}) ->
error;
get_event_severity(?EV_SERVICE_HANDLER_RESULT, #{status := error, class := system, result := _Error}) ->
warning;
get_event_severity(?EV_CLIENT_CACHE_BEGIN, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_END, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_HIT, _Meta) ->
info;
get_event_severity(?EV_CLIENT_CACHE_MISS, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_UPDATE, _Meta) ->
debug;
get_event_severity(?EV_CLIENT_CACHE_RESULT, _Meta) ->
debug;
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, class := _Class, stack := _Stack}) ->
error;
get_event_severity(?EV_INTERNAL_ERROR, #{error := _, reason := _Reason}) ->
warning;
get_event_severity(?EV_TRACE, _Meta) ->
debug;
get_event_severity(_UnknownEventType, _Meta) ->
warning.
get_event_severity(Event, Meta) ->
get_event_severity(Event, Meta, #{}).
-spec get_event_severity(event(), event_meta(), options()) -> severity().
get_event_severity(?EV_CLIENT_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_END = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CALL_SERVICE = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, stack := _Stack}, Opts) ->
map_severity([Event, error], Opts, error);
get_event_severity(?EV_SERVICE_RESULT = Event, #{status := error, result := _Error}, Opts) ->
map_severity([Event, warning], Opts, warning);
get_event_severity(?EV_CLIENT_SEND = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RESOLVE_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RESOLVE_RESULT = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_RECEIVE = Event, #{status := error}, Opts) ->
map_severity([Event, error], Opts, warning);
get_event_severity(?EV_SERVER_RECEIVE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_SERVER_SEND = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_SERVER_SEND = Event, #{status := error}, Opts) ->
map_severity([Event, error], Opts, warning);
get_event_severity(?EV_INVOKE_SERVICE_HANDLER = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := ok}, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := business}, Opts) ->
map_severity([Event, error, business], Opts, info);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, stack := _Stack}, Opts) ->
map_severity([Event, error, system], Opts, error);
get_event_severity(?EV_SERVICE_HANDLER_RESULT = Event, #{status := error, class := system, result := _Error}, Opts) ->
map_severity([Event, warning], Opts, warning);
get_event_severity(?EV_CLIENT_CACHE_BEGIN = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_END = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_HIT = Event, _Meta, Opts) ->
map_severity([Event], Opts, info);
get_event_severity(?EV_CLIENT_CACHE_MISS = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_UPDATE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_CLIENT_CACHE_RESULT = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, class := _Class, stack := _Stack}, Opts) ->
map_severity([Event, system], Opts, error);
get_event_severity(?EV_INTERNAL_ERROR = Event, #{error := _, reason := _Reason}, Opts) ->
map_severity([Event, business], Opts, warning);
get_event_severity(?EV_TRACE = Event, _Meta, Opts) ->
map_severity([Event], Opts, debug);
get_event_severity(UnknownEventType, _Meta, Opts) ->
map_severity([UnknownEventType], Opts, warning).
%%
%% Internal functions
%%
-spec map_severity(nonempty_list(atom()), options(), severity()) -> severity().
map_severity(Code, Opts, Default) ->
EventsSeverity = maps:get(events_severity, Opts, #{}),
maps:get(Code, EventsSeverity, Default).
-spec format_service_request(map(), options()) -> msg().
format_service_request(#{service_schema := {Module, Service}, function := Function, args := Args}, Opts) ->
woody_event_formatter:format_call(Module, Service, Function, Args, get_formatter_opts(Opts)).
@ -1582,4 +1593,157 @@ exception_test_() ->
)
].
-spec event_severity_defaults_test_() -> _.
event_severity_defaults_test_() ->
Opts = #{events_severity => default_severity_map()},
Meta = #{role => server},
[
?_assertEqual(debug, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(error, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
?_assertEqual(warning, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
?_assertEqual(info, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(
info, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
),
?_assertEqual(
error,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
),
?_assertEqual(
warning,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
?_assertEqual(info, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
?_assertEqual(debug, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
?_assertEqual(
error, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
),
?_assertEqual(warning, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
?_assertEqual(debug, get_event_severity(?EV_TRACE, Meta, Opts))
].
-spec event_severity_all_info_test_() -> _.
event_severity_all_info_test_() ->
ExpectAll = info,
Opts = #{events_severity => replace_all_with(ExpectAll, default_severity_map())},
Meta = #{role => server},
[
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_END, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CALL_SERVICE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, stack => []}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_RESULT, Meta#{status => error, result => {}}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_SEND, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RESOLVE_RESULT, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_RECEIVE, Meta#{status => error}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_RECEIVE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => ok}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVER_SEND, Meta#{status => error}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_INVOKE_SERVICE_HANDLER, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => ok}, Opts)),
?_assertEqual(
ExpectAll, get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => business}, Opts)
),
?_assertEqual(
ExpectAll,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, stack => []}, Opts)
),
?_assertEqual(
ExpectAll,
get_event_severity(?EV_SERVICE_HANDLER_RESULT, Meta#{status => error, class => system, result => {}}, Opts)
),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_BEGIN, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_END, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_HIT, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_MISS, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_UPDATE, Meta, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_CLIENT_CACHE_RESULT, Meta, Opts)),
?_assertEqual(
ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, class => throw, stack => []}, Opts)
),
?_assertEqual(ExpectAll, get_event_severity(?EV_INTERNAL_ERROR, Meta#{error => test, reason => test}, Opts)),
?_assertEqual(ExpectAll, get_event_severity(?EV_TRACE, Meta, Opts))
].
replace_all_with(NewSeverity, SeverityMap) ->
maps:map(fun(_K, _V) -> NewSeverity end, SeverityMap).
default_severity_map() ->
#{
['client begin'] => debug,
['client end'] => debug,
['call service'] => info,
['service result'] => info,
['service result', error] => error,
['service result', warning] => warning,
['client send'] => debug,
['client resolve begin'] => debug,
['client resolve result'] => debug,
['client receive'] => debug,
['client receive', error] => warning,
['server receive'] => debug,
['server send'] => debug,
['server send', error] => warning,
['invoke service handler'] => info,
['service handler result'] => info,
['service handler result', error, business] => info,
['service handler result', error, system] => error,
['service handler result', warning] => warning,
['client cache begin'] => debug,
['client cache end'] => debug,
['client cache hit'] => info,
['client cache miss'] => debug,
['client cache update'] => debug,
['client cache result'] => debug,
['internal error', system] => error,
['internal error', business] => warning,
['trace event'] => debug
}.
-endif.

View File

@ -22,7 +22,8 @@
handle_event(Event, RpcId, Meta, Opts) ->
EHOpts = get_event_handler_opts(Opts),
{Format, Msg} = woody_event_handler:format_event(Event, Meta, RpcId, EHOpts),
Function = get_logger_function(woody_event_handler:get_event_severity(Event, Meta)),
Severity = woody_event_handler:get_event_severity(Event, Meta, Opts),
Function = get_logger_function(Severity),
_ = error_logger:Function(Format, Msg),
ok.

View File

@ -0,0 +1,100 @@
-module(woody_event_handler_otel).
-include_lib("opentelemetry_api/include/opentelemetry.hrl").
-include("woody_defs.hrl").
-behaviour(woody_event_handler).
-export([handle_event/4]).
-spec handle_event(Event, RpcId, Meta, Opts) -> ok when
Event :: woody_event_handler:event(),
RpcId :: woody:rpc_id() | undefined,
Meta :: woody_event_handler:event_meta(),
Opts :: woody:options().
-define(IS_SPAN_START(Event), Event =:= ?EV_CALL_SERVICE orelse Event =:= ?EV_INVOKE_SERVICE_HANDLER).
-define(IS_SPAN_END(Event), Event =:= ?EV_SERVICE_RESULT orelse Event =:= ?EV_SERVICE_HANDLER_RESULT).
-define(IS_CLIENT_INTERNAL(Event),
Event =:= ?EV_CLIENT_CACHE_HIT orelse
Event =:= ?EV_CLIENT_CACHE_MISS orelse
Event =:= ?EV_CLIENT_CACHE_UPDATE orelse
Event =:= ?EV_CLIENT_SEND orelse
Event =:= ?EV_CLIENT_RECEIVE
).
%% Client events
handle_event(Event, RpcID, _Meta = #{url := Url}, _Opts) when ?IS_CLIENT_INTERNAL(Event) ->
with_span(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) ->
_ = otel_span:add_event(SpanCtx, atom_to_binary(Event), #{url => Url})
end);
%% Internal error handling
handle_event(?EV_INTERNAL_ERROR, RpcID, Meta = #{error := Error, class := Class, reason := Reason}, _Opts) ->
Stacktrace = maps:get(stack, Meta, []),
Details = io_lib:format("~ts: ~ts", [Error, Reason]),
with_span(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) ->
_ = otel_span:record_exception(SpanCtx, genlib:define(Class, error), Details, Stacktrace, #{}),
otel_maybe_cleanup(Meta, SpanCtx)
end);
%% Registers span starts/ends for woody client calls and woody server function invocations.
handle_event(Event, RpcID, Meta, _Opts) when ?IS_SPAN_START(Event) ->
Tracer = opentelemetry:get_application_tracer(?MODULE),
span_start(Tracer, otel_ctx:get_current(), mk_ref(RpcID), mk_name(Meta), mk_opts(Event));
handle_event(Event, RpcID, Meta, _Opts) when ?IS_SPAN_END(Event) ->
span_end(otel_ctx:get_current(), mk_ref(RpcID), fun(SpanCtx) ->
otel_maybe_erroneous_result(SpanCtx, Meta)
end);
handle_event(_Event, _RpcID, _Meta, _Opts) ->
ok.
%%
span_start(Tracer, Ctx, Key, SpanName, Opts) ->
SpanCtx = otel_tracer:start_span(Ctx, Tracer, SpanName, Opts),
Ctx1 = woody_util:span_stack_put(Key, SpanCtx, Ctx),
Ctx2 = otel_tracer:set_current_span(Ctx1, SpanCtx),
_ = otel_ctx:attach(Ctx2),
ok.
span_end(Ctx, Key, OnBeforeEnd) ->
case woody_util:span_stack_pop(Key, Ctx) of
{error, notfound} ->
ok;
{ok, SpanCtx, ParentSpanCtx, Ctx1} ->
SpanCtx1 = OnBeforeEnd(SpanCtx),
_ = otel_span:end_span(SpanCtx1, undefined),
Ctx2 = otel_tracer:set_current_span(Ctx1, ParentSpanCtx),
_ = otel_ctx:attach(Ctx2),
ok
end.
with_span(Ctx, Key, F) ->
SpanCtx = woody_util:span_stack_get(Key, Ctx, otel_tracer:current_span_ctx(Ctx)),
_ = F(SpanCtx),
ok.
otel_maybe_cleanup(#{final := true}, SpanCtx) ->
_ = otel_span:end_span(SpanCtx, undefined),
otel_ctx:clear(),
ok;
otel_maybe_cleanup(_Meta, _SpanCtx) ->
ok.
otel_maybe_erroneous_result(SpanCtx, Meta = #{status := error, result := Reason}) ->
Class = maps:get(except_class, Meta, error),
Stacktrace = maps:get(stack, Meta, []),
_ = otel_span:record_exception(SpanCtx, Class, Reason, Stacktrace, #{}),
SpanCtx;
otel_maybe_erroneous_result(SpanCtx, _Meta) ->
SpanCtx.
mk_opts(?EV_CALL_SERVICE) ->
#{kind => ?SPAN_KIND_CLIENT};
mk_opts(?EV_INVOKE_SERVICE_HANDLER) ->
#{kind => ?SPAN_KIND_SERVER}.
mk_ref(#{span_id := WoodySpanId}) ->
WoodySpanId.
mk_name(#{role := Role, service := Service, function := Function}) ->
woody_util:to_binary([Role, " ", Service, ":", Function]).

View File

@ -0,0 +1,134 @@
%% Translates hackney metrics for prometheus.
%% See
%% https://github.com/benoitc/hackney/tree/1.18.0?tab=readme-ov-file#metrics
%%
%% For pool metrics see dedicated collector module
%% `woody_hackney_prometheus_collector`.
-module(woody_hackney_prometheus).
-export([
new/2,
delete/1,
increment_counter/1,
increment_counter/2,
decrement_counter/1,
decrement_counter/2,
update_histogram/2,
update_gauge/2,
update_meter/2
]).
-type name() :: any().
-type metric() :: counter | histogram | gauge | meter.
%% API
-define(NB_REQUESTS, hackney_nb_requests).
-define(TOTAL_REQUESTS, hackney_total_requests).
-define(HOST_NB_REQUESTS, hackney_host_nb_requests).
-define(HOST_CONNECT_TIMEOUT, hackney_host_connect_timeout).
-define(HOST_CONNECT_ERROR, hackney_host_connect_error).
-define(HOST_NEW_CONNECTION, hackney_host_new_connection).
-define(HOST_REUSE_CONNECTION, hackney_host_reuse_connection).
%% Ignore unsupported metric
-spec new(metric(), name()) -> ok.
%% Total counters
new(counter, [hackney, nb_requests]) ->
true = prometheus_gauge:declare([
{name, ?NB_REQUESTS},
{registry, registry()},
{labels, []},
{help, "Number of running requests."}
]),
%% Per host counters, see
%% https://github.com/benoitc/hackney/tree/1.18.0?tab=readme-ov-file#metrics-per-hosts
%% NOTE Hackney won't call `metrics:new/3` for those counters
true = prometheus_gauge:declare([
{name, ?HOST_NB_REQUESTS},
{registry, registry()},
{labels, [host]},
{help, "Number of running requests."}
]),
[
true = prometheus_counter:declare([
{name, Name},
{registry, registry()},
{labels, [host]},
{help, Help}
])
|| {Name, Help} <- [
{?HOST_CONNECT_TIMEOUT, "Number of connect timeout."},
{?HOST_CONNECT_ERROR, "Number of timeout errors."},
{?HOST_NEW_CONNECTION, "Number of new pool connections per host."},
{?HOST_REUSE_CONNECTION, "Number of reused pool connections per host."}
]
],
ok;
new(counter, [hackney, total_requests]) ->
true = prometheus_counter:declare([
{name, ?TOTAL_REQUESTS},
{registry, registry()},
{labels, []},
{help, "Total number of requests."}
]),
ok;
new(_Type, _Name) ->
ok.
-spec delete(name()) -> ok.
delete(_Name) ->
ok.
-spec increment_counter(name()) -> ok.
increment_counter(Name) ->
increment_counter(Name, 1).
-spec increment_counter(name(), pos_integer()) -> ok.
increment_counter([hackney, nb_requests], Value) ->
prometheus_gauge:inc(registry(), ?NB_REQUESTS, [], Value);
increment_counter([hackney, total_requests], Value) ->
prometheus_counter:inc(registry(), ?TOTAL_REQUESTS, [], Value);
increment_counter([hackney, Host, nb_requests], Value) ->
prometheus_gauge:inc(registry(), ?HOST_NB_REQUESTS, [Host], Value);
increment_counter([hackney, Host, connect_timeout], Value) ->
prometheus_counter:inc(registry(), ?HOST_CONNECT_TIMEOUT, [Host], Value);
increment_counter([hackney, Host, connect_error], Value) ->
prometheus_counter:inc(registry(), ?HOST_CONNECT_ERROR, [Host], Value);
increment_counter([hackney_pool, Host, new_connection], Value) ->
prometheus_counter:inc(registry(), ?HOST_NEW_CONNECTION, [Host], Value);
increment_counter([hackney_pool, Host, reuse_connection], Value) ->
prometheus_counter:inc(registry(), ?HOST_REUSE_CONNECTION, [Host], Value);
increment_counter(_Name, _Value) ->
ok.
-spec decrement_counter(name()) -> ok.
decrement_counter(Name) ->
decrement_counter(Name, 1).
-spec decrement_counter(name(), pos_integer()) -> ok.
decrement_counter([hackney, nb_requests], Value) ->
prometheus_gauge:dec(registry(), ?NB_REQUESTS, [], Value);
decrement_counter([hackney, Host, nb_requests], Value) ->
prometheus_gauge:dec(registry(), ?HOST_NB_REQUESTS, [Host], Value);
decrement_counter(_Name, _Value) ->
ok.
-spec update_histogram(name(), fun(() -> ok) | number()) -> ok.
update_histogram(_Name, Fun) when is_function(Fun, 0) ->
Fun();
update_histogram(_Name, _Value) ->
ok.
-spec update_gauge(name(), number()) -> ok.
update_gauge(_Name, _Value) ->
ok.
-spec update_meter(name(), number()) -> ok.
update_meter(_Name, _Value) ->
ok.
%%
registry() ->
default.

View File

@ -0,0 +1,84 @@
-module(woody_hackney_prometheus_collector).
-export([setup/0]).
%%
-behaviour(prometheus_collector).
-export([collect_mf/2]).
-export([collect_metrics/2]).
-export([deregister_cleanup/1]).
-define(POOL_USAGE, woody_hackney_pool_usage).
%% Installation
%% @doc Installs custom collector for hackney's pool metrics
-spec setup() -> ok.
setup() ->
prometheus_registry:register_collector(registry(), ?MODULE).
%% Collector API
-type data() :: [data_item()].
-type data_item() :: {data_labels(), non_neg_integer()}.
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
-type pool_stats() :: [{atom(), any()}].
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
collect_mf(_Registry, Callback) ->
F = fun({Pool, _Pid}) ->
make_pool_data(Pool, get_pool_stats(Pool))
end,
Data = lists:flatten(lists:map(F, get_hackney_pools())),
_ = Callback(create_gauge(Data)),
ok.
-spec collect_metrics(prometheus_metric:name(), data()) ->
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
collect_metrics(_Name, Data) ->
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
deregister_cleanup(_Registry) ->
%% Nothing to clean up
ok.
%% Private
registry() ->
default.
get_pool_stats(Pool) ->
%% NOTE It looks like 'hackney_pool' table data can occasionally contain
%% dead pools
try
hackney_pool:get_stats(Pool)
catch
%% "Time to make the chimi-fuckin'-changas."
exit:{noproc, _Reason} ->
[]
end.
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
create_gauge(Data) ->
Help = "Connection pool status by used, free and queued connections count",
prometheus_model_helpers:create_mf(?POOL_USAGE, Help, gauge, ?MODULE, Data).
-spec make_pool_data(atom(), pool_stats()) -> data().
make_pool_data(Pool, Stats0) ->
Stats1 = maps:with([in_use_count, free_count, queue_count], maps:from_list(Stats0)),
lists:foldl(fun({S, V}, Data) -> [make_data_item(Pool, S, V) | Data] end, [], maps:to_list(Stats1)).
make_data_item(Pool, Status, Value) ->
Labels = [{pool, Pool}, {status, Status}],
{Labels, Value}.
get_hackney_pools() ->
%% Shamelessly pasted from
%% https://github.com/soundtrackyourbrand/prometheus-hackney-collector
%%
%% Basically, we rely on not publicly exposed table containing started pools
%% under hackney's hood.
ets:tab2list(hackney_pool).

View File

@ -0,0 +1,84 @@
-module(woody_ranch_prometheus_collector).
-export([setup/0]).
%%
-behaviour(prometheus_collector).
-export([collect_mf/2]).
-export([collect_metrics/2]).
-export([deregister_cleanup/1]).
-define(CONNECTIONS, woody_ranch_listener_connections).
%% Installation
-spec setup() -> ok.
setup() ->
prometheus_registry:register_collector(registry(), ?MODULE).
%% Collector API
-type data() :: [data_item()].
-type data_item() :: {data_labels(), non_neg_integer()}.
-type data_labels() :: [{atom(), atom() | nonempty_string() | binary() | iolist()}].
-type maybe_woody_server_ref() :: {module(), ID :: atom()} | ranch:ref().
-type ranch_info() ::
[{maybe_woody_server_ref(), [{atom(), any()}]}]
| #{maybe_woody_server_ref() => #{atom() => any()}}.
-spec collect_mf(prometheus_registry:registry(), prometheus_collector:collect_mf_callback()) -> ok.
collect_mf(_Registry, Callback) ->
F = fun({ListenerRef, ListenerInfo}) ->
make_listener_data(ListenerRef, ListenerInfo)
end,
Data = lists:flatten(lists:map(F, get_listeners_info())),
_ = Callback(create_gauge(Data)),
ok.
-spec collect_metrics(prometheus_metric:name(), data()) ->
prometheus_model:'Metric'() | [prometheus_model:'Metric'()].
collect_metrics(_Name, Data) ->
[prometheus_model_helpers:gauge_metric(Labels, Value) || {Labels, Value} <- Data].
-spec deregister_cleanup(prometheus_registry:registry()) -> ok.
deregister_cleanup(_Registry) ->
%% Nothing to clean up
ok.
%% Private
registry() ->
default.
-spec create_gauge(data()) -> prometheus_model:'MetricFamily'().
create_gauge(Data) ->
Help = "Number of active connections",
prometheus_model_helpers:create_mf(?CONNECTIONS, Help, gauge, ?MODULE, Data).
-spec make_listener_data(maybe_woody_server_ref(), #{atom() => any()}) -> data().
make_listener_data(Ref, #{active_connections := V}) ->
Labels = [{listener, Ref}],
[{Labels, V}];
make_listener_data(_Ref, _Info) ->
[].
get_listeners_info() ->
lists:filter(
fun
({_Ref, #{active_connections := _}}) -> true;
(_Else) -> false
end,
%% See https://ninenines.eu/docs/en/ranch/1.8/guide/listeners/#_obtaining_information_about_listeners
normalize_listeners_info(ranch:info())
).
-dialyzer({no_match, normalize_listeners_info/1}).
-spec normalize_listeners_info(ranch_info()) -> [{maybe_woody_server_ref(), #{atom() => any()}}].
%% Ranch v2
normalize_listeners_info(#{} = Info) ->
maps:to_list(Info);
%% Ranch v1
normalize_listeners_info(Info) ->
lists:map(fun({Ref, ListenerInfo}) -> {Ref, maps:from_list(ListenerInfo)} end, Info).

View File

@ -393,6 +393,7 @@ check_deadline(Deadline, Req, State = #{url := Url, woody_state := WoodyState})
-spec check_metadata_headers(woody:http_headers(), cowboy_req:req(), state()) -> check_result().
check_metadata_headers(Headers, Req, State = #{woody_state := WoodyState, regexp_meta := ReMeta}) ->
_OtelCtx = otel_propagator_text_map:extract(maps:to_list(Headers)),
WoodyState1 = set_metadata(find_metadata(Headers, ReMeta), WoodyState),
{ok, Req, update_woody_state(State, WoodyState1, Req)}.

View File

@ -407,6 +407,7 @@ check_deadline(Deadline, Req, State = #{url := Url, woody_state := WoodyState})
-spec check_metadata_headers(woody:http_headers(), cowboy_req:req(), state()) -> check_result().
check_metadata_headers(Headers, Req, State = #{woody_state := WoodyState, regexp_meta := ReMeta}) ->
_OtelCtx = otel_propagator_text_map:extract(maps:to_list(Headers)),
WoodyState1 = set_metadata(find_metadata(Headers, ReMeta), WoodyState),
{ok, Req, update_woody_state(State, WoodyState1, Req)}.

View File

@ -11,6 +11,12 @@
-define(DEFAULT_HANDLER_OPTS, undefined).
%%
-export([span_stack_put/3]).
-export([span_stack_get/3]).
-export([span_stack_pop/2]).
%%
%% Internal API
%%
@ -51,3 +57,41 @@ get_rpc_type({Module, Service}, Function) ->
-spec get_rpc_reply_type(_ThriftReplyType) -> woody:rpc_type().
get_rpc_reply_type(oneway_void) -> cast;
get_rpc_reply_type(_) -> call.
%% OTEL context span helpers
%% NOTE Those helpers are designed specifically to manage stacking spans during
%% woody client (or server) calls _inside_ one single process context.
%% Thus, use of process dictionary via `otel_ctx'.
-define(OTEL_SPANS_STACK, 'spans_ctx_stack').
-type span_key() :: atom() | binary() | string().
-type maybe_span_ctx() :: opentelemetry:span_ctx() | undefined.
-spec span_stack_put(span_key(), opentelemetry:span_ctx(), otel_ctx:t()) -> otel_ctx:t().
span_stack_put(Key, SpanCtx, Context) ->
Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []),
Entry = {Key, SpanCtx, otel_tracer:current_span_ctx(Context)},
otel_ctx:set_value(Context, ?OTEL_SPANS_STACK, [Entry | Stack]).
-spec span_stack_get(span_key(), otel_ctx:t(), maybe_span_ctx()) -> maybe_span_ctx().
span_stack_get(Key, Context, Default) ->
Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []),
case lists:keyfind(Key, 1, Stack) of
false ->
Default;
{_Key, SpanCtx, _ParentSpanCtx} ->
SpanCtx
end.
-spec span_stack_pop(span_key(), otel_ctx:t()) ->
{ok, opentelemetry:span_ctx(), maybe_span_ctx(), otel_ctx:t()} | {error, notfound}.
span_stack_pop(Key, Context) ->
Stack = otel_ctx:get_value(Context, ?OTEL_SPANS_STACK, []),
case lists:keytake(Key, 1, Stack) of
false ->
{error, notfound};
{value, {_Key, SpanCtx, ParentSpanCtx}, Stack1} ->
Context1 = otel_ctx:set_value(Context, ?OTEL_SPANS_STACK, Stack1),
{ok, SpanCtx, ParentSpanCtx, Context1}
end.

View File

@ -0,0 +1,97 @@
-module(woody_ct_otel_collector).
-behaviour(gen_server).
-export([
start_link/0,
get_trace/1,
get_traces/0
]).
-export([
init/1,
handle_call/3,
handle_cast/2,
handle_info/2
]).
-include_lib("opentelemetry/include/otel_span.hrl").
-type span() :: #span{}.
-type span_node() :: #{span := span(), children := [span_node()]}.
-type trace() :: #{
id := opentelemetry:trace_id(),
node := span_node()
}.
%
-spec start_link() -> {ok, pid()}.
start_link() ->
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
-spec get_trace(opentelemetry:trace_id()) -> {ok, trace()} | {error, notfound}.
get_trace(TraceId) ->
gen_server:call(?MODULE, {trace, TraceId}).
-spec get_traces() -> {ok, [trace()]}.
get_traces() ->
gen_server:call(?MODULE, traces).
-spec init(_) -> {ok, _}.
init(_Opts) ->
{ok, #{}}.
-spec handle_info(_, T) -> {noreply, T}.
handle_info({span, Span}, State0) ->
State1 = maps:update_with(Span#span.trace_id, fun(V) -> [Span | V] end, [Span], State0),
{noreply, State1};
handle_info(_Msg, State) ->
{noreply, State}.
-spec handle_call(_, _, T) -> {noreply, T}.
handle_call(traces, _From, State) ->
Result = maps:map(fun(TraceId, Spans) -> build_trace(TraceId, Spans) end, State),
{reply, maps:values(Result), State};
handle_call({trace, TraceId}, _From, State) ->
Result =
case maps:get(TraceId, State, undefined) of
undefined -> {error, notfound};
Spans -> {ok, build_trace(TraceId, Spans)}
end,
{reply, Result, State};
handle_call(_Msg, _From, State) ->
{noreply, State}.
-spec handle_cast(_, T) -> {noreply, T}.
handle_cast(_Msg, State) ->
{noreply, State}.
%
build_trace(TraceId, Spans0) ->
Spans1 = lists:sort(fun(#span{start_time = A}, #span{start_time = B}) -> A < B end, Spans0),
[RootSpan | _] = lists:filter(
fun
(#span{parent_span_id = undefined}) -> true;
(_) -> false
end,
Spans1
),
#{
id => TraceId,
node => lists:foldl(fun(Span, RootNode) -> update_node(Span, RootNode) end, new_span_node(RootSpan), Spans1)
}.
update_node(
Span = #span{parent_span_id = ParentId},
SpanNode = #{span := #span{span_id = ParentId}, children := Children}
) ->
SpanNode#{children => [new_span_node(Span) | Children]};
update_node(Span, SpanNode = #{children := Children}) ->
SpanNode#{children => lists:map(fun(Child) -> update_node(Span, Child) end, Children)}.
new_span_node(Span) ->
#{span => Span, children => []}.

View File

@ -3,6 +3,9 @@
-include_lib("common_test/include/ct.hrl").
-include_lib("stdlib/include/assert.hrl").
-include_lib("hackney/include/hackney_lib.hrl").
-include_lib("opentelemetry_api/include/opentelemetry.hrl").
-include_lib("opentelemetry/include/otel_span.hrl").
-include_lib("prometheus/include/prometheus_model.hrl").
-include("woody_test_thrift.hrl").
-include("woody_defs.hrl").
@ -111,6 +114,11 @@
-define(WEAPON_STACK_OVERFLOW, pos_out_of_boundaries).
-define(BAD_POWERUP_REPLY, powerup_unknown).
-define(OTEL_SPAN(Name, Children), #{span := #span{name = Name}, children := Children}).
-define(OTEL_SPAN(Name, SpanAttributes, Children), #{
span := #span{name = Name, attributes = SpanAttributes}, children := Children
}).
-type config() :: [{atom(), any()}].
-type case_name() :: atom().
-type group_name() :: atom().
@ -273,6 +281,8 @@ cross_test_groups() ->
}
}.
-define(RANCH_REF, woody_ct).
%%
%% starting/stopping
%%
@ -280,20 +290,13 @@ init_per_suite(C) ->
% dbg:tracer(), dbg:p(all, c),
% dbg:tpl({woody_joint_workers, '_', '_'}, x),
%%Apps = genlib_app:start_application_with(woody, [{trace_http_server, true}]),
application:set_env(hackney, mod_metrics, woody_client_metrics),
application:set_env(woody, woody_client_metrics_options, #{
metric_key_mapping => #{
[hackney, nb_requests] => [hackney, requests_in_process]
}
}),
application:set_env(how_are_you, metrics_handlers, [
{woody_api_hay, #{
interval => 1000
}}
]),
ok = application:set_env(hackney, mod_metrics, woody_hackney_prometheus),
{ok, MetricsApps} = application:ensure_all_started(prometheus),
{ok, Apps} = application:ensure_all_started(woody),
{ok, HayApps} = application:ensure_all_started(how_are_you),
[{apps, HayApps ++ Apps} | C].
ok = woody_ranch_prometheus_collector:setup(),
ok = woody_hackney_prometheus_collector:setup(),
{ok, OtelApps} = setup_opentelemetry(),
[{apps, OtelApps ++ MetricsApps ++ Apps} | C].
end_per_suite(C) ->
% unset so it won't report metrics next suite
@ -326,21 +329,21 @@ init_per_testcase(find_multiple_pools_test, C) ->
{ok, Sup} = start_tc_sup(),
Pool1 = {swords, 15000, 100},
Pool2 = {shields, undefined, 50},
ok = start_woody_server_with_pools(woody_ct, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
ok = start_woody_server_with_pools(?RANCH_REF, Sup, ['Weapons', 'Powerups'], [Pool1, Pool2], C),
[{sup, Sup} | C];
init_per_testcase(calls_with_cache, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = start_caching_client(caching_client_ct, Sup),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
[{sup, Sup} | C];
init_per_testcase(server_handled_client_timeout_test, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = supervisor:start_child(Sup, server_timeout_event_handler:child_spec()),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], server_timeout_event_handler, C),
[{sup, Sup} | C];
init_per_testcase(_, C) ->
{ok, Sup} = start_tc_sup(),
{ok, _} = start_woody_server(woody_ct, Sup, ['Weapons', 'Powerups'], C),
{ok, _} = start_woody_server(?RANCH_REF, Sup, ['Weapons', 'Powerups'], C),
[{sup, Sup} | C].
init_per_group(woody_resolver, Config) ->
@ -352,6 +355,20 @@ init_per_group(Name, Config) ->
end_per_group(_Name, _Config) ->
ok.
setup_opentelemetry() ->
ok = application:set_env([
{opentelemetry, [
{text_map_propagators, [
baggage,
trace_context
]},
{span_processor, simple},
{traces_exporter, {otel_exporter_pid, woody_ct_otel_collector}}
]}
]),
ok = application:start(opentelemetry),
{ok, [opentelemetry]}.
start_tc_sup() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).
@ -359,7 +376,7 @@ start_error_server(TC, Sup) ->
Code = get_fail_code(TC),
Dispatch = cowboy_router:compile([{'_', [{?PATH_WEAPONS, ?MODULE, Code}]}]),
Server = ranch:child_spec(
woody_ct,
?RANCH_REF,
ranch_tcp,
[{ip, ?SERVER_IP}, {port, ?SERVER_PORT}],
cowboy_clear,
@ -673,8 +690,37 @@ call_pass_thru_ok_test(C) ->
Armor = <<"AntiGrav Boots">>,
Context = make_context(<<"call_pass_thru_ok">>),
Expect = {ok, genlib_map:get(Armor, powerups())},
Baggage = #{<<"service">> => <<"Powerups">>},
SpanName = <<"test Powerups:proxy_get_powerup">>,
%% NOTE: `otel_baggage` uses `otel_ctx` that relies on __process dictionary__
ok = otel_baggage:set(Baggage),
Tracer = opentelemetry:get_application_tracer(?MODULE),
%% NOTE: Starts span and sets its context as current span in __process dictionary__
SpanCtx = otel_tracer:set_current_span(otel_tracer:start_span(Tracer, SpanName, #{})),
Expect = call(Context, 'Powerups', proxy_get_powerup, {Armor, self_to_bin()}, C),
{ok, _} = receive_msg(Armor, Context).
%% NOTE: Ends span and puts context into __process dictionary__
_ = otel_tracer:set_current_span(otel_span:end_span(SpanCtx, undefined)),
{ok, _} = receive_msg(Armor, Context),
{ok, #{node := Root}} = woody_ct_otel_collector:get_trace(SpanCtx#span_ctx.trace_id),
%% Prepare otel attributes to match against
ProxyAttrs = mk_otel_attributes(Baggage),
Attrs = mk_otel_attributes(Baggage#{<<"proxied">> => <<"true">>}),
?assertMatch(
?OTEL_SPAN(SpanName, [
?OTEL_SPAN(<<"client Powerups:proxy_get_powerup">>, [
%% Upon invocation event baggage is expected to be put in span attributes
?OTEL_SPAN(<<"server Powerups:proxy_get_powerup">>, ProxyAttrs, [
%% New client starts call here
?OTEL_SPAN(<<"client Powerups:get_powerup">>, [
?OTEL_SPAN(<<"server Powerups:get_powerup">>, Attrs, [
%% Expect no child spans uphere
])
])
])
])
]),
Root
).
call_pass_thru_except_test(C) ->
Armor = <<"Shield Belt">>,
@ -775,7 +821,18 @@ call_fail_w_no_headers(Id, Class, _Code) ->
find_multiple_pools_test(_) ->
true = is_pid(hackney_pool:find_pool(swords)),
true = is_pid(hackney_pool:find_pool(shields)).
true = is_pid(hackney_pool:find_pool(shields)),
MF = smuggle_mf_return_value(fun(F) -> woody_hackney_prometheus_collector:collect_mf(default, F) end),
%% We can't know order and values from hackney's pool info, but we can
%% expect that values for those pools must be provided. However exact number
%% of values can vary based on order of testcase execution and other
%% side-effects.
ValuesCount = length([swords, shields]) * length([queue_count, in_use_count, free_count]),
?assertMatch(
#'MetricFamily'{type = 'GAUGE', name = <<"woody_hackney_pool_usage">>, metric = Values} when
length(Values) >= ValuesCount,
MF
).
call_thrift_multiplexed_test(_) ->
Client = make_thrift_multiplexed_client(
@ -985,7 +1042,7 @@ init(_) ->
{ok,
{
{one_for_one, 1, 1},
[]
[#{id => woody_ct_otel_collector, start => {woody_ct_otel_collector, start_link, []}}]
}}.
%%
@ -1015,6 +1072,8 @@ handle_function(ProxyGetPowerup, {Name, To}, Context, _Opts) when
ProxyGetPowerup =:= proxy_get_powerup orelse
ProxyGetPowerup =:= bad_proxy_get_powerup
->
%% NOTE: Merges baggage, requires #{binary() => binary()}
ok = otel_baggage:set(#{<<"proxied">> => <<"true">>}),
% NOTE
% Client may return `{exception, _}` tuple with some business level exception
% here, yet handler expects us to `throw/1` them. This is expected here it
@ -1054,6 +1113,12 @@ handle_event(
->
_ = handle_proxy_event(Event, Code, TraceId, ParentId),
log_event(Event, RpcId, Meta);
%% Handle invocation
handle_event(Event = ?EV_INVOKE_SERVICE_HANDLER, RpcId, Meta, _) ->
log_event(Event, RpcId, Meta),
SpanCtx = otel_tracer:current_span_ctx(),
_ = otel_span:set_attributes(SpanCtx, maps:map(fun(_Key, {Value, _Metadata}) -> Value end, otel_baggage:get_all())),
ok;
handle_event(Event, RpcId, Meta, _) ->
log_event(Event, RpcId, Meta).
@ -1084,6 +1149,7 @@ handle_proxy_event(Event, Code, Descr) ->
erlang:error(badarg, [Event, Code, Descr]).
log_event(Event, RpcId, Meta) ->
ok = woody_event_handler_otel:handle_event(Event, RpcId, Meta, []),
woody_ct_event_h:handle_event(Event, RpcId, Meta, []).
%%
@ -1169,6 +1235,7 @@ gun_test_basic(Id, Gun, Expect, C) ->
catch
Class:Reason -> ok
end,
ok = assert_connections_metrics(),
check_msg(Gun, Context).
get_except({ok, _}) ->
@ -1285,3 +1352,50 @@ handle_sleep(Context) ->
BinTimer ->
timer:sleep(binary_to_integer(BinTimer))
end.
mk_otel_attributes(Attributes) ->
SpanAttributeCountLimit = otel_span_limits:attribute_count_limit(),
SpanAttributeValueLengthLimit = otel_span_limits:attribute_value_length_limit(),
otel_attributes:new(Attributes, SpanAttributeCountLimit, SpanAttributeValueLengthLimit).
assert_connections_metrics() ->
MF = smuggle_mf_return_value(fun(F) -> woody_ranch_prometheus_collector:collect_mf(default, F) end),
%% Sadly we can't match on listener ref since its an iodata representation of tuple.
?assertMatch(
#'MetricFamily'{
name = <<"woody_ranch_listener_connections">>,
type = 'GAUGE',
metric = [
#'Metric'{
label = [#'LabelPair'{name = <<"listener">>, value = _Ref}],
gauge = #'Gauge'{value = V}
}
]
} when V >= 1,
MF
),
%% NOTE See `prometheus_model_helpers:ensure_binary_or_string/1`
RefImpl1 = io_lib:format("~p", [{woody_server_thrift_http_handler, ?RANCH_REF}]),
RefImpl2 = io_lib:format("~p", [{woody_server_thrift_v2, ?RANCH_REF}]),
?assertMatch(
Expected when Expected =:= RefImpl1 orelse Expected =:= RefImpl2,
get_metric_ref(hd(MF#'MetricFamily'.metric))
).
get_metric_ref(#'Metric'{label = Labels}) ->
get_metric_ref_(Labels).
get_metric_ref_([]) ->
undefined;
get_metric_ref_([#'LabelPair'{name = <<"listener">>, value = Ref} | _]) ->
Ref;
get_metric_ref_([_Label | Labels]) ->
get_metric_ref_(Labels).
%% Collector module expects 'ok' return. Because of that dialyzer won't quit bitching.
smuggle_mf_return_value(Fun) ->
_ = Fun(fun(MF) ->
_ = erlang:put(smuggle, MF),
ok
end),
erlang:get(smuggle).