diff --git a/apps/hellgate/src/hellgate.app.src b/apps/hellgate/src/hellgate.app.src index 9f16d04..56a5c29 100644 --- a/apps/hellgate/src/hellgate.app.src +++ b/apps/hellgate/src/hellgate.app.src @@ -21,6 +21,8 @@ payproc_errors, erl_health, limiter_proto, + prometheus, + prometheus_cowboy, opentelemetry_api, opentelemetry_exporter, opentelemetry diff --git a/apps/hellgate/src/hellgate.erl b/apps/hellgate/src/hellgate.erl index bc3b8e1..ea39bf4 100644 --- a/apps/hellgate/src/hellgate.erl +++ b/apps/hellgate/src/hellgate.erl @@ -104,8 +104,15 @@ get_prometheus_route() -> -spec start(normal, any()) -> {ok, pid()} | {error, any()}. start(_StartType, _StartArgs) -> + ok = setup_metrics(), supervisor:start_link(?MODULE, []). -spec stop(any()) -> ok. stop(_State) -> ok. + +%% + +setup_metrics() -> + ok = woody_ranch_prometheus_collector:setup(), + ok = woody_hackney_prometheus_collector:setup(). diff --git a/apps/hellgate/src/hg_invoice_payment.erl b/apps/hellgate/src/hg_invoice_payment.erl index bf0d428..0563769 100644 --- a/apps/hellgate/src/hg_invoice_payment.erl +++ b/apps/hellgate/src/hg_invoice_payment.erl @@ -686,8 +686,8 @@ validate_recurrent_terms(RecurrentTerms, PaymentTool) -> _ = case hg_payment_tool:has_any_payment_method(PaymentTool, PMs) of false -> - logger:info("PaymentTool: ~p", [PaymentTool]), - logger:info("RecurrentPaymentMethods: ~p", [PMs]), + logger:notice("PaymentTool: ~p", [PaymentTool]), + logger:notice("RecurrentPaymentMethods: ~p", [PMs]), throw_invalid_request(<<"Invalid payment method">>); true -> ok @@ -794,7 +794,7 @@ log_route_choice_meta(#{choice_meta := undefined}, _Revision) -> ok; log_route_choice_meta(#{choice_meta := ChoiceMeta}, Revision) -> Metadata = hg_routing:get_logger_metadata(ChoiceMeta, Revision), - logger:log(info, "Routing decision made", #{routing => Metadata}). + logger:log(notice, "Routing decision made", #{routing => Metadata}). maybe_log_misconfigurations({misconfiguration, _} = Error) -> {Format, Details} = hg_routing:prepare_log_message(Error), @@ -810,14 +810,14 @@ log_rejected_routes(all, Routes, VS) -> log_rejected_routes(limit_misconfiguration, Routes, _VS) -> ?LOG_MD(warning, "Limiter hold error caused route candidates to be rejected: ~p", [Routes]); log_rejected_routes(limit_overflow, Routes, _VS) -> - ?LOG_MD(info, "Limit overflow caused route candidates to be rejected: ~p", [Routes]); + ?LOG_MD(notice, "Limit overflow caused route candidates to be rejected: ~p", [Routes]); log_rejected_routes(adapter_unavailable, Routes, _VS) -> - ?LOG_MD(info, "Adapter unavailability caused route candidates to be rejected: ~p", [Routes]); + ?LOG_MD(notice, "Adapter unavailability caused route candidates to be rejected: ~p", [Routes]); log_rejected_routes(provider_conversion_is_too_low, Routes, _VS) -> - ?LOG_MD(info, "Lacking conversion of provider caused route candidates to be rejected: ~p", [Routes]); + ?LOG_MD(notice, "Lacking conversion of provider caused route candidates to be rejected: ~p", [Routes]); log_rejected_routes(forbidden, Routes, VS) -> - ?LOG_MD(info, "Rejected routes found for varset: ~p", [VS]), - ?LOG_MD(info, "Rejected routes found, rejected routes: ~p", [Routes]); + ?LOG_MD(notice, "Rejected routes found for varset: ~p", [VS]), + ?LOG_MD(notice, "Rejected routes found, rejected routes: ~p", [Routes]); log_rejected_routes(_, _Routes, _VS) -> ok. @@ -1932,7 +1932,7 @@ process_risk_score(Action, St) -> ok -> {next, {Events, hg_machine_action:set_timeout(0, Action)}}; {error, risk_score_is_too_high = Reason} -> - logger:info("No route found, reason = ~p, varset: ~p", [Reason, VS1]), + logger:notice("No route found, reason = ~p, varset: ~p", [Reason, VS1]), handle_choose_route_error(Reason, Events, St, Action) end. @@ -2030,8 +2030,6 @@ handle_choose_route_error(Error, Events, St, Action) -> %% NOTE See damsel payproc errors (proto/payment_processing_errors.thrift) for no route found -construct_routing_failure({rejected_routes, {forbidden, RejectedRoutes}}) -> - construct_routing_failure([forbidden], genlib:format(RejectedRoutes)); construct_routing_failure({rejected_routes, {SubCode, RejectedRoutes}}) when SubCode =:= limit_misconfiguration orelse SubCode =:= limit_overflow orelse @@ -2039,6 +2037,8 @@ construct_routing_failure({rejected_routes, {SubCode, RejectedRoutes}}) when SubCode =:= provider_conversion_is_too_low -> construct_routing_failure([rejected, SubCode], genlib:format(RejectedRoutes)); +construct_routing_failure({rejected_routes, {_SubCode, RejectedRoutes}}) -> + construct_routing_failure([forbidden], genlib:format(RejectedRoutes)); construct_routing_failure({misconfiguration = Code, Details}) -> construct_routing_failure([unknown, {unknown_error, atom_to_binary(Code)}], genlib:format(Details)); construct_routing_failure(Code = risk_score_is_too_high) -> @@ -2336,7 +2336,7 @@ process_failure({payment, Step} = Activity, Events, Action, Failure, St, _Refund Target = get_target(St), case check_retry_possibility(Target, Failure, St) of {retry, Timeout} -> - _ = logger:info("Retry session after transient failure, wait ~p", [Timeout]), + _ = logger:notice("Retry session after transient failure, wait ~p", [Timeout]), {SessionEvents, SessionAction} = retry_session(Action, Target, Timeout), {next, {Events ++ SessionEvents, SessionAction}}; fatal -> @@ -3273,7 +3273,7 @@ log_cascade_attempt_context( #domain_PaymentsServiceTerms{attempt_limit = AttemptLimit}, #st{routes = AttemptedRoutes} ) -> - ?LOG_MD(info, "Cascade context: merchant payment terms' attempt limit '~p', attempted routes: ~p", [ + ?LOG_MD(notice, "Cascade context: merchant payment terms' attempt limit '~p', attempted routes: ~p", [ AttemptLimit, AttemptedRoutes ]). diff --git a/apps/hellgate/src/hg_invoice_payment_refund.erl b/apps/hellgate/src/hg_invoice_payment_refund.erl index fb2d247..586c154 100644 --- a/apps/hellgate/src/hg_invoice_payment_refund.erl +++ b/apps/hellgate/src/hg_invoice_payment_refund.erl @@ -313,7 +313,7 @@ finish_session_processing({Events0, Action}, Session, Refund) -> {finished, ?session_failed(Failure)} -> case check_retry_possibility(Failure, Refund) of {retry, Timeout} -> - _ = logger:info("Retry session after transient failure, wait ~p", [Timeout]), + _ = logger:notice("Retry session after transient failure, wait ~p", [Timeout]), {SessionEvents, SessionAction} = retry_session(Action, Timeout), {next, {Events1 ++ SessionEvents, SessionAction}}; fatal -> diff --git a/apps/hellgate/src/hg_limiter.erl b/apps/hellgate/src/hg_limiter.erl index 0741832..dc86d5b 100644 --- a/apps/hellgate/src/hg_limiter.erl +++ b/apps/hellgate/src/hg_limiter.erl @@ -86,7 +86,7 @@ check_limits_([T | TurnoverLimits], Context, Acc) -> true -> check_limits_(TurnoverLimits, Context, [Limit | Acc]); false -> - logger:info("Limit with id ~p overflowed, amount ~p upper boundary ~p", [ + logger:notice("Limit with id ~p overflowed, amount ~p upper boundary ~p", [ LimitID, LimiterAmount, UpperBoundary diff --git a/apps/hellgate/src/hg_machine.erl b/apps/hellgate/src/hg_machine.erl index 05d801a..19aa844 100644 --- a/apps/hellgate/src/hg_machine.erl +++ b/apps/hellgate/src/hg_machine.erl @@ -346,7 +346,7 @@ dispatch_repair(Ns, Payload, Machine) -> marshal_repair_result(ok, Result, Machine) catch throw:{exception, Reason} = Error -> - logger:info("Process repair failed, ~p", [Reason]), + logger:notice("Process repair failed, ~p", [Reason]), woody_error:raise(business, marshal_repair_failed(Error)) end. diff --git a/apps/hellgate/src/hg_recurrent_paytool.erl b/apps/hellgate/src/hg_recurrent_paytool.erl index 87fe3d3..958f9e9 100644 --- a/apps/hellgate/src/hg_recurrent_paytool.erl +++ b/apps/hellgate/src/hg_recurrent_paytool.erl @@ -253,7 +253,7 @@ init(EncodedParams, #{id := RecPaymentToolID}) -> {ChosenRoute, ChoiceContext} = hg_routing:choose_route(NonFailRatedRoutes), ChosenPaymentRoute = hg_route:to_payment_route(ChosenRoute), LoggerMetadata = hg_routing:get_logger_metadata(ChoiceContext, Revision), - _ = logger:log(info, "Routing decision made", #{routing => LoggerMetadata}), + _ = logger:log(notice, "Routing decision made", #{routing => LoggerMetadata}), RecPaymentTool2 = set_minimal_payment_cost(RecPaymentTool, ChosenPaymentRoute, VS, Revision), {ok, {Changes, Action}} = start_session(), StartChanges = [ @@ -376,7 +376,7 @@ validate_risk_score(RiskScore) when RiskScore == low; RiskScore == high -> RiskScore. handle_route_error(risk_score_is_too_high = Reason, RecPaymentTool) -> - _ = logger:log(info, "No route found, reason = ~p", [Reason], logger:get_process_metadata()), + _ = logger:log(notice, "No route found, reason = ~p", [Reason], logger:get_process_metadata()), {misconfiguration, {'No route found for a recurrent payment tool', RecPaymentTool}}. handle_route_error({no_route_found, {Reason, RejectedRoutes}}, RecPaymentTool, Varset) -> LogFun = fun(Msg, Param) -> diff --git a/apps/hellgate/src/hg_routing.erl b/apps/hellgate/src/hg_routing.erl index fe59214..6b8096b 100644 --- a/apps/hellgate/src/hg_routing.erl +++ b/apps/hellgate/src/hg_routing.erl @@ -483,6 +483,7 @@ get_provider_status(Route, FDStats) -> AvailabilityStatus = get_adapter_availability_status(FdOverrides, AvailabilityServiceID, FDStats), ConversionStatus = get_provider_conversion_status(FdOverrides, ConversionServiceID, FDStats), {AvailabilityStatus, ConversionStatus}. + get_adapter_availability_status(#domain_RouteFaultDetectorOverrides{enabled = true}, _FDID, _Stats) -> %% ignore fd statistic if set override {alive, 0.0}; diff --git a/config/sys.config b/config/sys.config index 5489589..86b56f2 100644 --- a/config/sys.config +++ b/config/sys.config @@ -46,6 +46,15 @@ event_handler_opts => #{ formatter_opts => #{ max_length => 1000 + }, + events_severity => #{ + %% Maybe disregard those events by lowering their severity level + ['call service'] => info, + ['service result'] => info, + ['invoke service handler'] => info, + ['service handler result'] => info, + ['service handler result', error, business] => warning, + ['client cache hit'] => info } } }}, @@ -126,6 +135,10 @@ event_handler_opts => #{ formatter_opts => #{ max_length => 1000 + }, + events_severity => #{ + %% Was 'info' + ['service handler result', error, business] => warning } } }} @@ -150,6 +163,10 @@ event_handler_opts => #{ formatter_opts => #{ max_length => 1000 + }, + events_severity => #{ + %% Was 'info' + ['service handler result', error, business] => warning } } }} diff --git a/rebar.config b/rebar.config index 8bc11d8..edeeb8c 100644 --- a/rebar.config +++ b/rebar.config @@ -40,6 +40,8 @@ {erl_health, {git, "https://github.com/valitydev/erlang-health.git", {branch, "master"}}}, {fault_detector_proto, {git, "https://github.com/valitydev/fault-detector-proto.git", {branch, "master"}}}, {limiter_proto, {git, "https://github.com/valitydev/limiter-proto.git", {branch, "master"}}}, + {prometheus, "4.8.1"}, + {prometheus_cowboy, "0.1.8"}, %% OpenTelemetry deps {opentelemetry_api, "1.2.1"}, @@ -72,12 +74,6 @@ {profiles, [ {prod, [ {deps, [ - %% NOTE - %% Because of a dependency conflict, prometheus libs are only included in production build for now - %% https://github.com/project-fifo/rebar3_lint/issues/42 - %% https://github.com/valitydev/hellgate/pull/2/commits/884724c1799703cee4d1033850fe32c17f986d9e - {prometheus, "4.8.1"}, - {prometheus_cowboy, "0.1.8"}, % for introspection on production {recon, "2.5.2"}, {logger_logstash_formatter, @@ -92,8 +88,6 @@ {tools, load}, {opentelemetry, temporary}, logger_logstash_formatter, - prometheus, - prometheus_cowboy, sasl, hellgate ]}, @@ -111,7 +105,7 @@ ]} ]}. -{plugins, [ +{project_plugins, [ {covertool, "2.0.4"}, {erlfmt, "1.0.0"}, {rebar3_lint, "1.0.1"}, @@ -136,3 +130,11 @@ "ct.coverdata" ]} ]}. + +%% NOTE +%% It is needed to use rebar3 lint plugin +{overrides, [ + {del, accept, [{plugins, [{rebar3_archive_plugin, "0.0.2"}]}]}, + {del, prometheus_cowboy, [{plugins, [{rebar3_archive_plugin, "0.0.1"}]}]}, + {del, prometheus_httpd, [{plugins, [{rebar3_archive_plugin, "0.0.1"}]}]} +]}. diff --git a/rebar.lock b/rebar.lock index 4406897..3ec79f9 100644 --- a/rebar.lock +++ b/rebar.lock @@ -1,5 +1,6 @@ {"1.2.0", -[{<<"acceptor_pool">>,{pkg,<<"acceptor_pool">>,<<"1.0.0">>},2}, +[{<<"accept">>,{pkg,<<"accept">>,<<"0.3.5">>},2}, + {<<"acceptor_pool">>,{pkg,<<"acceptor_pool">>,<<"1.0.0">>},2}, {<<"bender_client">>, {git,"https://github.com/valitydev/bender-client-erlang.git", {ref,"d8837617c8dc36216ce8c4ffc9a56a34e423ca5e"}}, @@ -79,10 +80,14 @@ {git,"https://github.com/valitydev/payproc-errors-erlang.git", {ref,"8ae8586239ef68098398acf7eb8363d9ec3b3234"}}, 0}, + {<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.8.1">>},0}, + {<<"prometheus_cowboy">>,{pkg,<<"prometheus_cowboy">>,<<"0.1.8">>},0}, + {<<"prometheus_httpd">>,{pkg,<<"prometheus_httpd">>,<<"2.1.11">>},1}, + {<<"quantile_estimator">>,{pkg,<<"quantile_estimator">>,<<"0.2.1">>},1}, {<<"ranch">>,{pkg,<<"ranch">>,<<"1.8.0">>},2}, {<<"scoper">>, {git,"https://github.com/valitydev/scoper.git", - {ref,"41a14a558667316998af9f49149ee087ffa8bef2"}}, + {ref,"55a2a32ee25e22fa35f583a18eaf38b2b743429b"}}, 0}, {<<"snowflake">>, {git,"https://github.com/valitydev/snowflake.git", @@ -99,10 +104,11 @@ {<<"unicode_util_compat">>,{pkg,<<"unicode_util_compat">>,<<"0.7.0">>},2}, {<<"woody">>, {git,"https://github.com/valitydev/woody_erlang.git", - {ref,"5d46291a6bfcee0bae2a9346a7d927603a909249"}}, + {ref,"2cbe19998c073c545bf0f6b4b5211639c40b9925"}}, 0}]}. [ {pkg_hash,[ + {<<"accept">>, <<"B33B127ABCA7CC948BBE6CAA4C263369ABF1347CFA9D8E699C6D214660F10CD1">>}, {<<"acceptor_pool">>, <<"43C20D2ACAE35F0C2BCD64F9D2BDE267E459F0F3FD23DAB26485BF518C281B21">>}, {<<"cache">>, <<"B23A5FE7095445A88412A6E614C933377E0137B44FFED77C9B3FEF1A731A20B2">>}, {<<"certifi">>, <<"D4FB0A6BB20B7C9C3643E22507E42F356AC090A1DCEA9AB99E27E0376D695EBA">>}, @@ -123,11 +129,16 @@ {<<"opentelemetry_exporter">>, <<"1D8809C0D4F4ACF986405F7700ED11992BCBDB6A4915DD11921E80777FFA7167">>}, {<<"opentelemetry_semantic_conventions">>, <<"B67FE459C2938FCAB341CB0951C44860C62347C005ACE1B50F8402576F241435">>}, {<<"parse_trans">>, <<"16328AB840CC09919BD10DAB29E431DA3AF9E9E7E7E6F0089DD5A2D2820011D8">>}, + {<<"prometheus">>, <<"FA76B152555273739C14B06F09F485CF6D5D301FE4E9D31B7FF803D26025D7A0">>}, + {<<"prometheus_cowboy">>, <<"CFCE0BC7B668C5096639084FCD873826E6220EA714BF60A716F5BD080EF2A99C">>}, + {<<"prometheus_httpd">>, <<"F616ED9B85B536B195D94104063025A91F904A4CFC20255363F49A197D96C896">>}, + {<<"quantile_estimator">>, <<"EF50A361F11B5F26B5F16D0696E46A9E4661756492C981F7B2229EF42FF1CD15">>}, {<<"ranch">>, <<"8C7A100A139FD57F17327B6413E4167AC559FBC04CA7448E9BE9057311597A1D">>}, {<<"ssl_verify_fun">>, <<"354C321CF377240C7B8716899E182CE4890C5938111A1296ADD3EC74CF1715DF">>}, {<<"tls_certificate_check">>, <<"C76C4C5D79EE79A2B11C84F910C825D6F024A78427C854F515748E9BD025E987">>}, {<<"unicode_util_compat">>, <<"BC84380C9AB48177092F43AC89E4DFA2C6D62B40B8BD132B1059ECC7232F9A78">>}]}, {pkg_hash_ext,[ + {<<"accept">>, <<"11B18C220BCC2EAB63B5470C038EF10EB6783BCB1FCDB11AA4137DEFA5AC1BB8">>}, {<<"acceptor_pool">>, <<"0CBCD83FDC8B9AD2EEE2067EF8B91A14858A5883CB7CD800E6FCD5803E158788">>}, {<<"cache">>, <<"44516CE6FA03594D3A2AF025DD3A87BFE711000EB730219E1DDEFC816E0AA2F4">>}, {<<"certifi">>, <<"6AC7EFC1C6F8600B08D625292D4BBF584E14847CE1B6B5C44D983D273E1097EA">>}, @@ -148,6 +159,10 @@ {<<"opentelemetry_exporter">>, <<"2B40007F509D38361744882FD060A8841AF772AB83BB542AA5350908B303AD65">>}, {<<"opentelemetry_semantic_conventions">>, <<"D61FA1F5639EE8668D74B527E6806E0503EFC55A42DB7B5F39939D84C07D6895">>}, {<<"parse_trans">>, <<"07CD9577885F56362D414E8C4C4E6BDF10D43A8767ABB92D24CBE8B24C54888B">>}, + {<<"prometheus">>, <<"6EDFBE928D271C7F657A6F2C46258738086584BD6CAE4A000B8B9A6009BA23A5">>}, + {<<"prometheus_cowboy">>, <<"BA286BECA9302618418892D37BCD5DC669A6CC001F4EB6D6AF85FF81F3F4F34C">>}, + {<<"prometheus_httpd">>, <<"0BBE831452CFDF9588538EB2F570B26F30C348ADAE5E95A7D87F35A5910BCF92">>}, + {<<"quantile_estimator">>, <<"282A8A323CA2A845C9E6F787D166348F776C1D4A41EDE63046D72D422E3DA946">>}, {<<"ranch">>, <<"49FBCFD3682FAB1F5D109351B61257676DA1A2FDBE295904176D5E521A2DDFE5">>}, {<<"ssl_verify_fun">>, <<"FE4C190E8F37401D30167C8C405EDA19469F34577987C76DDE613E838BBC67F8">>}, {<<"tls_certificate_check">>, <<"4083B4A298ADD534C96125337CB01161C358BB32DD870D5A893AAE685FD91D70">>},