hellgate/config/sys.config
Andrew Mayorov c78c98c17b
Fix accessing fault-detector settings (#13)
Also describe various fault detection settings in sys.config.
2022-03-02 14:16:18 +03:00

184 lines
6.5 KiB
Plaintext

[
{kernel, [
{logger_level, info},
{logger, [
{handler, console_logger, logger_std_h, #{
level => debug,
config => #{
type => {file, "/var/log/hellgate/console.json"},
sync_mode_qlen => 20
},
formatter => {logger_logstash_formatter, #{}}
}}
]}
]},
{scoper, [
{storage, scoper_storage_logger}
]},
{hellgate, [
{ip, "::"},
{port, 8022},
{default_woody_handling_timeout, 30000},
%% 1 sec above cowboy's request_timeout
{shutdown_timeout, 7000},
{protocol_opts, #{
% Bump keepalive timeout up to a minute
request_timeout => 6000,
% Should be greater than any other timeouts
idle_timeout => infinity
}},
{scoper_event_handler_options, #{
event_handler_opts => #{
formatter_opts => #{
max_length => 1000
}
}
}},
{services, #{
automaton => "http://machinegun:8022/v1/automaton",
eventsink => "http://machinegun:8022/v1/event_sink",
accounter => "http://shumway:8022/shumpune",
party_management => "http://party-management:8022/v1/processing/partymgmt",
customer_management => "http://hellgate:8022/v1/processing/customer_management",
% TODO make more consistent
recurrent_paytool => "http://hellgate:8022/v1/processing/recpaytool",
fault_detector => "http://fault-detector:8022/v1/fault-detector"
}},
{proxy_opts, #{
transport_opts => #{}
}},
{health_check, #{
disk => {erl_health, disk, ["/", 99]},
memory => {erl_health, cg_memory, [70]},
service => {erl_health, service, [<<"{{ service_name }}">>]},
dmt_client => {dmt_client, health_check, [<<"hellgate">>]}
}},
{payment_retry_policy, #{
processed => {exponential, {max_total_timeout, 30}, 2, 1},
captured => no_retry,
refunded => no_retry
}},
{inspect_timeout, 3000},
{fault_detector, #{
enabled => true,
% Woody RPC request timeout (ms).
timeout => 4000,
% How to measure technical availability of providers?
availability => #{
% We consider provider as _unavailable_ if their failrate is greater than this
% threshold (0.0 is no failures at all / 1.0 is every attempt to interact fails).
critical_fail_rate => 0.7,
% For how long to account operations in failrate computation (ms)?
sliding_window => 60000,
% If operation takes too long, when do we consider it as failure (ms)?
operation_time_limit => 10000,
% With which time resolution we should compute failrate (s)?
% Less means quicker reaction to failures at the cost of increased load on service.
pre_aggregation_size => 2
},
% How to measure providers' conversion (aka business availability)?
conversion => #{
% Which business level error codes aren't treated as failures?
benign_failures => [
insufficient_funds,
rejected_by_issuer,
processing_deadline_reached
],
% We consider provider as _unavailable_ if their failrate is greater than this
% threshold (0.0 is no failures at all / 1.0 is every attempt to interact fails).
critical_fail_rate => 0.7,
% For how long to account operations in failrate computation (ms)?
sliding_window => 60000,
% If operation takes too long, when do we consider it as failure (ms)?
operation_time_limit => 1200000,
% With which time resolution we should compute failrate (s)?
% Less means quicker reaction to failures at the cost of increased load on service.
pre_aggregation_size => 2
}
}},
{binding, #{
max_sync_interval => <<"5s">>,
outdated_sync_interval => <<"1440m">>,
outdate_timeout => <<"180m">>
}}
]},
{party_management, [
{scoper_event_handler_options, #{
event_handler_opts => #{
formatter_opts => #{
max_length => 1000
}
}
}},
{services, #{
automaton => "http://machinegun:8022/v1/automaton",
accounter => "http://shumway:8022/shumpune"
}},
%% see `pm_party_cache:cache_options/0`
{cache_options, #{
% 200Mb, cache memory quota in bytes
memory => 209715200,
ttl => 3600,
size => 3000
}}
]},
{dmt_client, [
% milliseconds
{cache_update_interval, 5000},
{max_cache_size, #{
elements => 20,
% 50Mb
memory => 52428800
}},
{woody_event_handlers, [
{scoper_woody_event_handler, #{
event_handler_opts => #{
formatter_opts => #{
max_length => 1000
}
}
}}
]},
{service_urls, #{
'Repository' => <<"http://dominant:8022/v1/domain/repository">>,
'RepositoryClient' => <<"http://dominant:8022/v1/domain/repository_client">>
}}
]},
{party_client, [
{services, #{
party_management => "http://party-management:8022/v1/processing/partymgmt"
}},
{woody, #{
% disabled | safe | aggressive
cache_mode => safe,
options => #{
woody_client => #{
event_handler =>
{scoper_woody_event_handler, #{
event_handler_opts => #{
formatter_opts => #{
max_length => 1000
}
}
}}
}
}
}}
]},
{snowflake, [
% 1 second
{max_backward_clock_moving, 1000},
{machine_id, hostname_hash}
]},
{prometheus, [
{collectors, [default]}
]}
].