hellgate/config/sys.config

[
    {kernel, [
        {logger_level, info},
        {logger, [
            {handler, console_logger, logger_std_h, #{
                level => debug,
                config => #{
                    type => {file, "/var/log/hellgate/console.json"},
                    sync_mode_qlen => 20
                },
                formatter => {logger_logstash_formatter, #{}}
            }}
        ]}
    ]},

    {scoper, [
        {storage, scoper_storage_logger}
    ]},

    {hg_proto, [
        {services, #{
            automaton => "http://machinegun:8022/v1/automaton",
            eventsink => "http://machinegun:8022/v1/event_sink",
            accounter => "http://shumway:8022/accounter",
            party_management => "http://party-management:8022/v1/processing/partymgmt",
            customer_management => "http://hellgate:8022/v1/processing/customer_management",
            % TODO make more consistent
            recurrent_paytool => "http://hellgate:8022/v1/processing/recpaytool",
            fault_detector => "http://fault-detector:8022/v1/fault-detector"
        }}
    ]},

    {hellgate, [
        {ip, "::"},
        {port, 8022},
        {default_woody_handling_timeout, 30000},
        %% 1 sec above cowboy's request_timeout
        {shutdown_timeout, 7000},
        {protocol_opts, #{
            % Bump keepalive timeout up to a minute
            request_timeout => 6000,
            % Should be greater than any other timeouts
            idle_timeout => infinity
        }},
        {scoper_event_handler_options, #{
            event_handler_opts => #{
                formatter_opts => #{
                    max_length => 1000
                },
                events_severity => #{
                    %% Maybe disregard those events by lowering their severity level
                    ['call service'] => info,
                    ['service result'] => info,
                    ['invoke service handler'] => info,
                    ['service handler result'] => info,
                    ['service handler result', error, business] => warning,
                    ['client cache hit'] => info
                }
            }
        }},
        {proxy_opts, #{
            transport_opts => #{}
        }},
        {health_check, #{
            disk => {erl_health, disk, ["/", 99]},
            memory => {erl_health, cg_memory, [70]},
            service => {erl_health, service, [<<"{{ service_name }}">>]},
            dmt_client => {dmt_client, health_check, [<<"hellgate">>]}
        }},
        {payment_retry_policy, #{
            processed => {exponential, {max_total_timeout, 30}, 2, 1},
            captured => no_retry,
            refunded => no_retry
        }},
        {inspect_timeout, 3000},
        {payment, #{
            % Payment error codes that signals hellgate to attempt another route
            % (default = [<<"preauthorization_failed">>])
            default_transient_errors => [<<"preauthorization_failed">>]
        }},
        {fault_detector, #{
            enabled => true,
            % Woody RPC request timeout (ms).
            timeout => 4000,
            % How to measure technical availability of providers?
            availability => #{
                % We consider provider as _unavailable_ if their failrate is greater than this
                % threshold (0.0 is no failures at all / 1.0 is every attempt to interact fails).
                critical_fail_rate => 0.7,
                % For how long to account operations in failrate computation (ms)?
                sliding_window => 60000,
                % If operation takes too long, when do we consider it as failure (ms)?
                operation_time_limit => 10000,
                % With which time resolution we should compute failrate (s)?
                % Less means quicker reaction to failures at the cost of increased load on service.
                pre_aggregation_size => 2
            },
            % How to measure providers' conversion (aka business availability)?
            conversion => #{
                % Which business level error codes aren't treated as failures?
                benign_failures => [
                    insufficient_funds,
                    rejected_by_issuer,
                    processing_deadline_reached
                ],
                % We consider provider as _unavailable_ if their failrate is greater than this
                % threshold (0.0 is no failures at all / 1.0 is every attempt to interact fails).
                critical_fail_rate => 0.7,
                % For how long to account operations in failrate computation (ms)?
                sliding_window => 60000,
                % If operation takes too long, when do we consider it as failure (ms)?
                operation_time_limit => 1200000,
                % With which time resolution we should compute failrate (s)?
                % Less means quicker reaction to failures at the cost of increased load on service.
                pre_aggregation_size => 2
            }
        }},
        {binding, #{
            max_sync_interval => <<"5s">>,
            outdated_sync_interval => <<"1440m">>,
            outdate_timeout => <<"180m">>
        }}
    ]},

    {dmt_client, [
        % milliseconds
        {cache_update_interval, 5000},
        {max_cache_size, #{
            elements => 20,
            % 50Mb
            memory => 52428800
        }},
        {woody_event_handlers, [
            {scoper_woody_event_handler, #{
                event_handler_opts => #{
                    formatter_opts => #{
                        max_length => 1000
                    },
                    events_severity => #{
                        %% Was 'info'
                        ['service handler result', error, business] => warning
                    }
                }
            }}
        ]},
        {service_urls, #{
            'Repository' => <<"http://dominant:8022/v1/domain/repository">>,
            'RepositoryClient' => <<"http://dominant:8022/v1/domain/repository_client">>
        }}
    ]},

    {party_client, [
        {services, #{
            party_management => "http://party-management:8022/v1/processing/partymgmt"
        }},
        {woody, #{
            % disabled | safe | aggressive
            cache_mode => safe,
            options => #{
                woody_client => #{
                    event_handler =>
                        {scoper_woody_event_handler, #{
                            event_handler_opts => #{
                                formatter_opts => #{
                                    max_length => 1000
                                },
                                events_severity => #{
                                    %% Was 'info'
                                    ['service handler result', error, business] => warning
                                }
                            }
                        }}
                }
            }
        }}
    ]},

    {snowflake, [
        % 1 second
        {max_backward_clock_moving, 1000},
        {machine_id, hostname_hash}
    ]},

    {prometheus, [
        {collectors, [default]}
    ]},

    {hackney, [
        {mod_metrics, woody_hackney_prometheus}
    ]}
].