mirror of
https://github.com/valitydev/hellgate.git
synced 2024-11-06 02:45:20 +00:00
TD-788: Adds woody pool/connections metrics (#107)
* Adds overrides for linter deps * Bumps woody w/ prometheus collectors * Bumps woody & scoper * Raises processing info log messages * Adds woody event severity mapping example to `sys.config`
This commit is contained in:
parent
df89aad35d
commit
b189c2fc48
@ -21,6 +21,8 @@
|
||||
payproc_errors,
|
||||
erl_health,
|
||||
limiter_proto,
|
||||
prometheus,
|
||||
prometheus_cowboy,
|
||||
opentelemetry_api,
|
||||
opentelemetry_exporter,
|
||||
opentelemetry
|
||||
|
@ -104,8 +104,15 @@ get_prometheus_route() ->
|
||||
|
||||
-spec start(normal, any()) -> {ok, pid()} | {error, any()}.
|
||||
start(_StartType, _StartArgs) ->
|
||||
ok = setup_metrics(),
|
||||
supervisor:start_link(?MODULE, []).
|
||||
|
||||
-spec stop(any()) -> ok.
|
||||
stop(_State) ->
|
||||
ok.
|
||||
|
||||
%%
|
||||
|
||||
setup_metrics() ->
|
||||
ok = woody_ranch_prometheus_collector:setup(),
|
||||
ok = woody_hackney_prometheus_collector:setup().
|
||||
|
@ -686,8 +686,8 @@ validate_recurrent_terms(RecurrentTerms, PaymentTool) ->
|
||||
_ =
|
||||
case hg_payment_tool:has_any_payment_method(PaymentTool, PMs) of
|
||||
false ->
|
||||
logger:info("PaymentTool: ~p", [PaymentTool]),
|
||||
logger:info("RecurrentPaymentMethods: ~p", [PMs]),
|
||||
logger:notice("PaymentTool: ~p", [PaymentTool]),
|
||||
logger:notice("RecurrentPaymentMethods: ~p", [PMs]),
|
||||
throw_invalid_request(<<"Invalid payment method">>);
|
||||
true ->
|
||||
ok
|
||||
@ -794,7 +794,7 @@ log_route_choice_meta(#{choice_meta := undefined}, _Revision) ->
|
||||
ok;
|
||||
log_route_choice_meta(#{choice_meta := ChoiceMeta}, Revision) ->
|
||||
Metadata = hg_routing:get_logger_metadata(ChoiceMeta, Revision),
|
||||
logger:log(info, "Routing decision made", #{routing => Metadata}).
|
||||
logger:log(notice, "Routing decision made", #{routing => Metadata}).
|
||||
|
||||
maybe_log_misconfigurations({misconfiguration, _} = Error) ->
|
||||
{Format, Details} = hg_routing:prepare_log_message(Error),
|
||||
@ -810,14 +810,14 @@ log_rejected_routes(all, Routes, VS) ->
|
||||
log_rejected_routes(limit_misconfiguration, Routes, _VS) ->
|
||||
?LOG_MD(warning, "Limiter hold error caused route candidates to be rejected: ~p", [Routes]);
|
||||
log_rejected_routes(limit_overflow, Routes, _VS) ->
|
||||
?LOG_MD(info, "Limit overflow caused route candidates to be rejected: ~p", [Routes]);
|
||||
?LOG_MD(notice, "Limit overflow caused route candidates to be rejected: ~p", [Routes]);
|
||||
log_rejected_routes(adapter_unavailable, Routes, _VS) ->
|
||||
?LOG_MD(info, "Adapter unavailability caused route candidates to be rejected: ~p", [Routes]);
|
||||
?LOG_MD(notice, "Adapter unavailability caused route candidates to be rejected: ~p", [Routes]);
|
||||
log_rejected_routes(provider_conversion_is_too_low, Routes, _VS) ->
|
||||
?LOG_MD(info, "Lacking conversion of provider caused route candidates to be rejected: ~p", [Routes]);
|
||||
?LOG_MD(notice, "Lacking conversion of provider caused route candidates to be rejected: ~p", [Routes]);
|
||||
log_rejected_routes(forbidden, Routes, VS) ->
|
||||
?LOG_MD(info, "Rejected routes found for varset: ~p", [VS]),
|
||||
?LOG_MD(info, "Rejected routes found, rejected routes: ~p", [Routes]);
|
||||
?LOG_MD(notice, "Rejected routes found for varset: ~p", [VS]),
|
||||
?LOG_MD(notice, "Rejected routes found, rejected routes: ~p", [Routes]);
|
||||
log_rejected_routes(_, _Routes, _VS) ->
|
||||
ok.
|
||||
|
||||
@ -1932,7 +1932,7 @@ process_risk_score(Action, St) ->
|
||||
ok ->
|
||||
{next, {Events, hg_machine_action:set_timeout(0, Action)}};
|
||||
{error, risk_score_is_too_high = Reason} ->
|
||||
logger:info("No route found, reason = ~p, varset: ~p", [Reason, VS1]),
|
||||
logger:notice("No route found, reason = ~p, varset: ~p", [Reason, VS1]),
|
||||
handle_choose_route_error(Reason, Events, St, Action)
|
||||
end.
|
||||
|
||||
@ -2030,8 +2030,6 @@ handle_choose_route_error(Error, Events, St, Action) ->
|
||||
|
||||
%% NOTE See damsel payproc errors (proto/payment_processing_errors.thrift) for no route found
|
||||
|
||||
construct_routing_failure({rejected_routes, {forbidden, RejectedRoutes}}) ->
|
||||
construct_routing_failure([forbidden], genlib:format(RejectedRoutes));
|
||||
construct_routing_failure({rejected_routes, {SubCode, RejectedRoutes}}) when
|
||||
SubCode =:= limit_misconfiguration orelse
|
||||
SubCode =:= limit_overflow orelse
|
||||
@ -2039,6 +2037,8 @@ construct_routing_failure({rejected_routes, {SubCode, RejectedRoutes}}) when
|
||||
SubCode =:= provider_conversion_is_too_low
|
||||
->
|
||||
construct_routing_failure([rejected, SubCode], genlib:format(RejectedRoutes));
|
||||
construct_routing_failure({rejected_routes, {_SubCode, RejectedRoutes}}) ->
|
||||
construct_routing_failure([forbidden], genlib:format(RejectedRoutes));
|
||||
construct_routing_failure({misconfiguration = Code, Details}) ->
|
||||
construct_routing_failure([unknown, {unknown_error, atom_to_binary(Code)}], genlib:format(Details));
|
||||
construct_routing_failure(Code = risk_score_is_too_high) ->
|
||||
@ -2336,7 +2336,7 @@ process_failure({payment, Step} = Activity, Events, Action, Failure, St, _Refund
|
||||
Target = get_target(St),
|
||||
case check_retry_possibility(Target, Failure, St) of
|
||||
{retry, Timeout} ->
|
||||
_ = logger:info("Retry session after transient failure, wait ~p", [Timeout]),
|
||||
_ = logger:notice("Retry session after transient failure, wait ~p", [Timeout]),
|
||||
{SessionEvents, SessionAction} = retry_session(Action, Target, Timeout),
|
||||
{next, {Events ++ SessionEvents, SessionAction}};
|
||||
fatal ->
|
||||
@ -3273,7 +3273,7 @@ log_cascade_attempt_context(
|
||||
#domain_PaymentsServiceTerms{attempt_limit = AttemptLimit},
|
||||
#st{routes = AttemptedRoutes}
|
||||
) ->
|
||||
?LOG_MD(info, "Cascade context: merchant payment terms' attempt limit '~p', attempted routes: ~p", [
|
||||
?LOG_MD(notice, "Cascade context: merchant payment terms' attempt limit '~p', attempted routes: ~p", [
|
||||
AttemptLimit, AttemptedRoutes
|
||||
]).
|
||||
|
||||
|
@ -313,7 +313,7 @@ finish_session_processing({Events0, Action}, Session, Refund) ->
|
||||
{finished, ?session_failed(Failure)} ->
|
||||
case check_retry_possibility(Failure, Refund) of
|
||||
{retry, Timeout} ->
|
||||
_ = logger:info("Retry session after transient failure, wait ~p", [Timeout]),
|
||||
_ = logger:notice("Retry session after transient failure, wait ~p", [Timeout]),
|
||||
{SessionEvents, SessionAction} = retry_session(Action, Timeout),
|
||||
{next, {Events1 ++ SessionEvents, SessionAction}};
|
||||
fatal ->
|
||||
|
@ -86,7 +86,7 @@ check_limits_([T | TurnoverLimits], Context, Acc) ->
|
||||
true ->
|
||||
check_limits_(TurnoverLimits, Context, [Limit | Acc]);
|
||||
false ->
|
||||
logger:info("Limit with id ~p overflowed, amount ~p upper boundary ~p", [
|
||||
logger:notice("Limit with id ~p overflowed, amount ~p upper boundary ~p", [
|
||||
LimitID,
|
||||
LimiterAmount,
|
||||
UpperBoundary
|
||||
|
@ -346,7 +346,7 @@ dispatch_repair(Ns, Payload, Machine) ->
|
||||
marshal_repair_result(ok, Result, Machine)
|
||||
catch
|
||||
throw:{exception, Reason} = Error ->
|
||||
logger:info("Process repair failed, ~p", [Reason]),
|
||||
logger:notice("Process repair failed, ~p", [Reason]),
|
||||
woody_error:raise(business, marshal_repair_failed(Error))
|
||||
end.
|
||||
|
||||
|
@ -253,7 +253,7 @@ init(EncodedParams, #{id := RecPaymentToolID}) ->
|
||||
{ChosenRoute, ChoiceContext} = hg_routing:choose_route(NonFailRatedRoutes),
|
||||
ChosenPaymentRoute = hg_route:to_payment_route(ChosenRoute),
|
||||
LoggerMetadata = hg_routing:get_logger_metadata(ChoiceContext, Revision),
|
||||
_ = logger:log(info, "Routing decision made", #{routing => LoggerMetadata}),
|
||||
_ = logger:log(notice, "Routing decision made", #{routing => LoggerMetadata}),
|
||||
RecPaymentTool2 = set_minimal_payment_cost(RecPaymentTool, ChosenPaymentRoute, VS, Revision),
|
||||
{ok, {Changes, Action}} = start_session(),
|
||||
StartChanges = [
|
||||
@ -376,7 +376,7 @@ validate_risk_score(RiskScore) when RiskScore == low; RiskScore == high ->
|
||||
RiskScore.
|
||||
|
||||
handle_route_error(risk_score_is_too_high = Reason, RecPaymentTool) ->
|
||||
_ = logger:log(info, "No route found, reason = ~p", [Reason], logger:get_process_metadata()),
|
||||
_ = logger:log(notice, "No route found, reason = ~p", [Reason], logger:get_process_metadata()),
|
||||
{misconfiguration, {'No route found for a recurrent payment tool', RecPaymentTool}}.
|
||||
handle_route_error({no_route_found, {Reason, RejectedRoutes}}, RecPaymentTool, Varset) ->
|
||||
LogFun = fun(Msg, Param) ->
|
||||
|
@ -483,6 +483,7 @@ get_provider_status(Route, FDStats) ->
|
||||
AvailabilityStatus = get_adapter_availability_status(FdOverrides, AvailabilityServiceID, FDStats),
|
||||
ConversionStatus = get_provider_conversion_status(FdOverrides, ConversionServiceID, FDStats),
|
||||
{AvailabilityStatus, ConversionStatus}.
|
||||
|
||||
get_adapter_availability_status(#domain_RouteFaultDetectorOverrides{enabled = true}, _FDID, _Stats) ->
|
||||
%% ignore fd statistic if set override
|
||||
{alive, 0.0};
|
||||
|
@ -46,6 +46,15 @@
|
||||
event_handler_opts => #{
|
||||
formatter_opts => #{
|
||||
max_length => 1000
|
||||
},
|
||||
events_severity => #{
|
||||
%% Maybe disregard those events by lowering their severity level
|
||||
['call service'] => info,
|
||||
['service result'] => info,
|
||||
['invoke service handler'] => info,
|
||||
['service handler result'] => info,
|
||||
['service handler result', error, business] => warning,
|
||||
['client cache hit'] => info
|
||||
}
|
||||
}
|
||||
}},
|
||||
@ -126,6 +135,10 @@
|
||||
event_handler_opts => #{
|
||||
formatter_opts => #{
|
||||
max_length => 1000
|
||||
},
|
||||
events_severity => #{
|
||||
%% Was 'info'
|
||||
['service handler result', error, business] => warning
|
||||
}
|
||||
}
|
||||
}}
|
||||
@ -150,6 +163,10 @@
|
||||
event_handler_opts => #{
|
||||
formatter_opts => #{
|
||||
max_length => 1000
|
||||
},
|
||||
events_severity => #{
|
||||
%% Was 'info'
|
||||
['service handler result', error, business] => warning
|
||||
}
|
||||
}
|
||||
}}
|
||||
|
20
rebar.config
20
rebar.config
@ -40,6 +40,8 @@
|
||||
{erl_health, {git, "https://github.com/valitydev/erlang-health.git", {branch, "master"}}},
|
||||
{fault_detector_proto, {git, "https://github.com/valitydev/fault-detector-proto.git", {branch, "master"}}},
|
||||
{limiter_proto, {git, "https://github.com/valitydev/limiter-proto.git", {branch, "master"}}},
|
||||
{prometheus, "4.8.1"},
|
||||
{prometheus_cowboy, "0.1.8"},
|
||||
|
||||
%% OpenTelemetry deps
|
||||
{opentelemetry_api, "1.2.1"},
|
||||
@ -72,12 +74,6 @@
|
||||
{profiles, [
|
||||
{prod, [
|
||||
{deps, [
|
||||
%% NOTE
|
||||
%% Because of a dependency conflict, prometheus libs are only included in production build for now
|
||||
%% https://github.com/project-fifo/rebar3_lint/issues/42
|
||||
%% https://github.com/valitydev/hellgate/pull/2/commits/884724c1799703cee4d1033850fe32c17f986d9e
|
||||
{prometheus, "4.8.1"},
|
||||
{prometheus_cowboy, "0.1.8"},
|
||||
% for introspection on production
|
||||
{recon, "2.5.2"},
|
||||
{logger_logstash_formatter,
|
||||
@ -92,8 +88,6 @@
|
||||
{tools, load},
|
||||
{opentelemetry, temporary},
|
||||
logger_logstash_formatter,
|
||||
prometheus,
|
||||
prometheus_cowboy,
|
||||
sasl,
|
||||
hellgate
|
||||
]},
|
||||
@ -111,7 +105,7 @@
|
||||
]}
|
||||
]}.
|
||||
|
||||
{plugins, [
|
||||
{project_plugins, [
|
||||
{covertool, "2.0.4"},
|
||||
{erlfmt, "1.0.0"},
|
||||
{rebar3_lint, "1.0.1"},
|
||||
@ -136,3 +130,11 @@
|
||||
"ct.coverdata"
|
||||
]}
|
||||
]}.
|
||||
|
||||
%% NOTE
|
||||
%% It is needed to use rebar3 lint plugin
|
||||
{overrides, [
|
||||
{del, accept, [{plugins, [{rebar3_archive_plugin, "0.0.2"}]}]},
|
||||
{del, prometheus_cowboy, [{plugins, [{rebar3_archive_plugin, "0.0.1"}]}]},
|
||||
{del, prometheus_httpd, [{plugins, [{rebar3_archive_plugin, "0.0.1"}]}]}
|
||||
]}.
|
||||
|
21
rebar.lock
21
rebar.lock
@ -1,5 +1,6 @@
|
||||
{"1.2.0",
|
||||
[{<<"acceptor_pool">>,{pkg,<<"acceptor_pool">>,<<"1.0.0">>},2},
|
||||
[{<<"accept">>,{pkg,<<"accept">>,<<"0.3.5">>},2},
|
||||
{<<"acceptor_pool">>,{pkg,<<"acceptor_pool">>,<<"1.0.0">>},2},
|
||||
{<<"bender_client">>,
|
||||
{git,"https://github.com/valitydev/bender-client-erlang.git",
|
||||
{ref,"d8837617c8dc36216ce8c4ffc9a56a34e423ca5e"}},
|
||||
@ -79,10 +80,14 @@
|
||||
{git,"https://github.com/valitydev/payproc-errors-erlang.git",
|
||||
{ref,"8ae8586239ef68098398acf7eb8363d9ec3b3234"}},
|
||||
0},
|
||||
{<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0},
|
||||
{<<"prometheus_cowboy">>,{pkg,<<"prometheus_cowboy">>,<<"0.1.8">>},0},
|
||||
{<<"prometheus_httpd">>,{pkg,<<"prometheus_httpd">>,<<"2.1.11">>},1},
|
||||
{<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1},
|
||||
{<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},2},
|
||||
{<<"scoper">>,
|
||||
{git,"https://github.com/valitydev/scoper.git",
|
||||
{ref,"41a14a558667316998af9f49149ee087ffa8bef2"}},
|
||||
{ref,"55a2a32ee25e22fa35f583a18eaf38b2b743429b"}},
|
||||
0},
|
||||
{<<"snowflake">>,
|
||||
{git,"https://github.com/valitydev/snowflake.git",
|
||||
@ -99,10 +104,11 @@
|
||||
{<<"unicode_util_compat">>,{pkg,<<"unicode_util_compat">>,<<"0.7.0">>},2},
|
||||
{<<"woody">>,
|
||||
{git,"https://github.com/valitydev/woody_erlang.git",
|
||||
{ref,"5d46291a6bfcee0bae2a9346a7d927603a909249"}},
|
||||
{ref,"2cbe19998c073c545bf0f6b4b5211639c40b9925"}},
|
||||
0}]}.
|
||||
[
|
||||
{pkg_hash,[
|
||||
{<<"accept">>, <<"B33B127ABCA7CC948BBE6CAA4C263369ABF1347CFA9D8E699C6D214660F10CD1">>},
|
||||
{<<"acceptor_pool">>, <<"43C20D2ACAE35F0C2BCD64F9D2BDE267E459F0F3FD23DAB26485BF518C281B21">>},
|
||||
{<<"cache">>, <<"B23A5FE7095445A88412A6E614C933377E0137B44FFED77C9B3FEF1A731A20B2">>},
|
||||
{<<"certifi">>, <<"D4FB0A6BB20B7C9C3643E22507E42F356AC090A1DCEA9AB99E27E0376D695EBA">>},
|
||||
@ -123,11 +129,16 @@
|
||||
{<<"opentelemetry_exporter">>, <<"1D8809C0D4F4ACF986405F7700ED11992BCBDB6A4915DD11921E80777FFA7167">>},
|
||||
{<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>},
|
||||
{<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>},
|
||||
{<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>},
|
||||
{<<"prometheus_cowboy">>, <<"CFCE0BC7B668C5096639084FCD873826E6220EA714BF60A716F5BD080EF2A99C">>},
|
||||
{<<"prometheus_httpd">>, <<"F616ED9B85B536B195D94104063025A91F904A4CFC20255363F49A197D96C896">>},
|
||||
{<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>},
|
||||
{<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>},
|
||||
{<<"ssl_verify_fun">>, <<"354C321CF377240C7B8716899E182CE4890C5938111A1296ADD3EC74CF1715DF">>},
|
||||
{<<"tls_certificate_check">>, <<"C76C4C5D79EE79A2B11C84F910C825D6F024A78427C854F515748E9BD025E987">>},
|
||||
{<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]},
|
||||
{pkg_hash_ext,[
|
||||
{<<"accept">>, <<"11B18C220BCC2EAB63B5470C038EF10EB6783BCB1FCDB11AA4137DEFA5AC1BB8">>},
|
||||
{<<"acceptor_pool">>, <<"0CBCD83FDC8B9AD2EEE2067EF8B91A14858A5883CB7CD800E6FCD5803E158788">>},
|
||||
{<<"cache">>, <<"44516CE6FA03594D3A2AF025DD3A87BFE711000EB730219E1DDEFC816E0AA2F4">>},
|
||||
{<<"certifi">>, <<"6AC7EFC1C6F8600B08D625292D4BBF584E14847CE1B6B5C44D983D273E1097EA">>},
|
||||
@ -148,6 +159,10 @@
|
||||
{<<"opentelemetry_exporter">>, <<"2B40007F509D38361744882FD060A8841AF772AB83BB542AA5350908B303AD65">>},
|
||||
{<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>},
|
||||
{<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>},
|
||||
{<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>},
|
||||
{<<"prometheus_cowboy">>, <<"BA286BECA9302618418892D37BCD5DC669A6CC001F4EB6D6AF85FF81F3F4F34C">>},
|
||||
{<<"prometheus_httpd">>, <<"0BBE831452CFDF9588538EB2F570B26F30C348ADAE5E95A7D87F35A5910BCF92">>},
|
||||
{<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>},
|
||||
{<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>},
|
||||
{<<"ssl_verify_fun">>, <<"FE4C190E8F37401D30167C8C405EDA19469F34577987C76DDE613E838BBC67F8">>},
|
||||
{<<"tls_certificate_check">>, <<"4083B4A298ADD534C96125337CB01161C358BB32DD870D5A893AAE685FD91D70">>},
|
||||
|
Loading…
Reference in New Issue
Block a user