diff --git a/docs/core_and_modules.rst b/docs/core_and_modules.rst index 1eaae54..f592481 100644 --- a/docs/core_and_modules.rst +++ b/docs/core_and_modules.rst @@ -1121,9 +1121,17 @@ It is supplied with Yandex.Tank. Thanks to https://github.com/influxdata/telegraf for metric collection agent. +For using this plugin, replace old plugin ``plugin_monitoring=yandextank.plugins.Monitoring`` in .ini file with this: +:: + + [tank] + plugin_monitoring=yandextank.plugins.Telegraf + +In https://github.com/yandex/yandex-tank/blob/master/yandextank/core/config/00-base.ini it is already done. Please, don't use both ``plugin_monitoring=yandextank.plugins.Telegraf`` and ``plugin_monitoring=yandextank.plugins.Monitoring`` simultaneously. + INI file section: **[telegraf]** -You can use old monitoring config format, if you specify it in [monitoring] seciton. Telegraf plugin transparently supports it. +You can use old monitoring config format, if you specify it in [monitoring] section. Telegraf plugin transparently supports it. You can use new monitoring config format, if you specify it in [telegraf] section. Backward compatibility logic: @@ -1190,6 +1198,11 @@ Example: curl -s -H 'Host: host.tld' 'http://localhost:6100/stat' | python -c 'import sys, json; j = json.load(sys.stdin); print "\n".join(`c["values"]["accept"]` for c in j["charts"] if c["name"] == "localqueue_wait_time")' /path/to/file + + [[inputs.ping]] + urls = ["127.0.0.1"] + count = 1 + @@ -1258,6 +1271,7 @@ List of metrics group names and particular metrics in them: * interfaces - default: ",".join(['"eth%s"' % (num) for num in range(6)]). Format sample: ["eth0","eth1"] * Netstat * Kernel +* KernelVmstat * NetResponse * protocol - default: "tcp". Protocol, must be "tcp" or "udp" * address - default: ":80". Server address and port @@ -1267,6 +1281,8 @@ List of metrics group names and particular metrics in them: * Custom * diff - default: 0 * measure - default: call - metric value is a command or script execution output. Example: `du -s /var/lib/mysql/ | awk '{print $1}'` +* TelegrafRaw + * raw telegraf TOML format, transparently added to final collector config * Source additional source file in telegraf json format, can be used to add custom metrics that needs complex processing and do not fit into standart custom metrics (like log parsing with aggregation) diff --git a/yandextank/plugins/Telegraf/config.py b/yandextank/plugins/Telegraf/config.py index e854b61..22dbe3a 100644 --- a/yandextank/plugins/Telegraf/config.py +++ b/yandextank/plugins/Telegraf/config.py @@ -87,6 +87,10 @@ class ConfigManager(object): "Kernel": { "name": '[inputs.kernel]', "fielddrop": '["boot_time"]', + }, + "KernelVmstat": { + "name": '[inputs.kernel_vmstat]', + "fieldpass": '["pgfault", "pgmajfault"]', } } defaults_enabled = ['CPU', 'Memory', 'Disk', 'Net', 'System', 'Kernel'] @@ -106,6 +110,7 @@ class ConfigManager(object): startups = [] shutdowns = [] sources = [] + telegrafraw = [] # agent defaults host_config = {} for metric in host: @@ -137,6 +142,8 @@ class ConfigManager(object): shutdowns.append(metric.text) elif (str(metric.tag)).lower() == 'source': sources.append(metric.text) + elif (str(metric.tag)).lower() == 'telegrafraw': + telegrafraw.append(metric.text) if len(host_config) == 0: logging.info('Empty host config, using defaults') for section in defaults_enabled: @@ -153,7 +160,8 @@ class ConfigManager(object): 'host': hostname, 'startup': startups, 'shutdown': shutdowns, - 'source': sources + 'source': sources, + 'telegrafraw': telegrafraw } logger.info("Result config %s", result) return result @@ -170,6 +178,7 @@ class AgentConfig(object): self.sources = config['source'] self.interval = config['interval'] self.comment = config['comment'] + self.telegrafraw = config['telegrafraw'] self.host_config = config['host_config'] self.old_style_configs = old_style_configs @@ -341,6 +350,14 @@ class AgentConfig(object): with open(cfg_path, 'a') as fds: fds.write(inputs) + # telegraf raw configuration into xml + telegraf_raw = "" + for element in self.telegrafraw: + telegraf_raw += element + + with open(cfg_path, 'a') as fds: + fds.write(telegraf_raw) + except Exception as exc: logger.error( 'Error trying to create monitoring config. Malformed? %s', diff --git a/yandextank/plugins/Telegraf/decoder.py b/yandextank/plugins/Telegraf/decoder.py index f01b1e5..6e5ed36 100644 --- a/yandextank/plugins/Telegraf/decoder.py +++ b/yandextank/plugins/Telegraf/decoder.py @@ -9,7 +9,7 @@ class MetricsDecoder(object): def __init__(self): """ translates telegraf metric names into common Monitoring metric names - translates `uncommon` names to `custom:`s + translates `uncommon` names to `custom:%s`s """ self.known_metrics = { @@ -25,16 +25,7 @@ class MetricsDecoder(object): 'system_load1': 'System_la1', 'system_load5': 'System_la5', 'system_load15': 'System_la15', - # 'cpu_usage_user': 'CPU_user', - # 'cpu_usage_system': 'CPU_system', - # 'cpu_usage_idle': 'CPU_idle', - # 'cpu_usage_iowait': 'CPU_iowait', - # 'cpu_usage_irq': 'CPU_irq', - # 'cpu_usage_nice': 'CPU_nice', - # 'cpu_usage_softirq': 'CPU_softirq', - # 'cpu_usage_steal': 'CPU_steal', - # 'cpu_usage_guest': 'CPU_guest', - 'nstat_TcpRetransSegs': 'Net_retransmit' + 'nstat_TcpRetransSegs': 'Net_retransmit', # those guys became inactive due to net interface names and disk ids # we don't need unknown id data here # 'net_packets_recv': 'Net_rx', @@ -43,6 +34,17 @@ class MetricsDecoder(object): # 'net_bytes_sent': 'Net_send', # 'diskio_read_bytes': 'Disk_read', # 'diskio_write_bytes': 'Disk_write', + # ---------- + # remove this crunch after front refactoring + # 'cpu-cpu-total_usage_user': 'CPU_user', + # 'cpu-cpu-total_usage_system': 'CPU_system', + # 'cpu-cpu-total_usage_idle': 'CPU_idle', + # 'cpu-cpu-total_usage_iowait': 'CPU_iowait', + # 'cpu-cpu-total_usage_irq': 'CPU_irq', + # 'cpu-cpu-total_usage_nice': 'CPU_nice', + # 'cpu-cpu-total_usage_softirq': 'CPU_softirq', + # 'cpu-cpu-total_usage_steal': 'CPU_steal', + # 'cpu-cpu-total_usage_guest': 'CPU_guest' } self.diff_metrics = { @@ -51,7 +53,7 @@ class MetricsDecoder(object): 'net': ['packets_recv', 'packets_sent', 'bytes_recv', 'bytes_sent'], 'nstat': ['TcpRetransSegs'], 'net_response': [], - 'kernel': ['context_switches', 'interrupts', 'processes_forked'], + 'kernel': ['context_switches', 'interrupts', 'processes_forked', 'vmstat_pgfault', 'vmstat_pgmajfault'], 'diskio': [ 'read_bytes', 'write_bytes', 'io_time', 'read_time', 'reads', 'write_time', 'writes' diff --git a/yandextank/plugins/Telegraf/reader.py b/yandextank/plugins/Telegraf/reader.py index 78a636d..f4e0a9a 100644 --- a/yandextank/plugins/Telegraf/reader.py +++ b/yandextank/plugins/Telegraf/reader.py @@ -41,21 +41,15 @@ class MonitoringReader(object): # key_group sample: diskio # key_name sample: io_time try: - key_group, key_name = key.split('_')[ - 0].split('-')[0], '_'.join( - key.split('_')[1:]) + key_group, key_name = key.split('_')[0].split('-')[0], '_'.join(key.split('_')[1:]) except: - key_group, key_name = key.split('_')[ - 0], '_'.join(key.split('_')[1:]) + key_group, key_name = key.split('_')[0], '_'.join(key.split('_')[1:]) if key_group in decoder.diff_metrics.keys(): - if key_name in decoder.diff_metrics[ - key_group]: - decoded_key = decoder.find_common_names( - key) + if key_name in decoder.diff_metrics[key_group]: + decoded_key = decoder.find_common_names(key) if self.prev_check: try: - value = jsn[ts][ - key] - self.prev_check[key] + value = jsn[ts][key] - self.prev_check[key] except KeyError: logger.debug( 'There is no diff value for metric %s.\n' @@ -64,11 +58,9 @@ class MonitoringReader(object): ts, exc_info=True) value = 0 - prepared_results[ - decoded_key] = value + prepared_results[decoded_key] = value else: - decoded_key = decoder.find_common_names( - key) + decoded_key = decoder.find_common_names(key) prepared_results[decoded_key] = value else: decoded_key = decoder.find_common_names(key)