Merge branch 'master' of github.com:yandex/yandex-tank

This commit is contained in:
Alexey Lavrenuke 2017-01-19 16:59:54 +03:00
commit 64b6739dcf
4 changed files with 56 additions and 29 deletions

View File

@ -1121,9 +1121,17 @@ It is supplied with Yandex.Tank.
Thanks to https://github.com/influxdata/telegraf for metric collection agent.
For using this plugin, replace old plugin ``plugin_monitoring=yandextank.plugins.Monitoring`` in .ini file with this:
::
[tank]
plugin_monitoring=yandextank.plugins.Telegraf
In https://github.com/yandex/yandex-tank/blob/master/yandextank/core/config/00-base.ini it is already done. Please, don't use both ``plugin_monitoring=yandextank.plugins.Telegraf`` and ``plugin_monitoring=yandextank.plugins.Monitoring`` simultaneously.
INI file section: **[telegraf]**
You can use old monitoring config format, if you specify it in [monitoring] seciton. Telegraf plugin transparently supports it.
You can use old monitoring config format, if you specify it in [monitoring] section. Telegraf plugin transparently supports it.
You can use new monitoring config format, if you specify it in [telegraf] section.
Backward compatibility logic:
@ -1190,6 +1198,11 @@ Example:
<Netstat />
<Custom diff="1" measure="call" label="test">curl -s -H 'Host: host.tld' 'http://localhost:6100/stat' | python -c 'import sys, json; j = json.load(sys.stdin); print "\n".join(`c["values"]["accept"]` for c in j["charts"] if c["name"] == "localqueue_wait_time")'</Custom>
<Source>/path/to/file</Source>
<TelegrafRaw>
[[inputs.ping]]
urls = ["127.0.0.1"]
count = 1
</TelegrafRaw>
</Host>
<Host address="localhost" telegraf="/usr/bin/telegraf">
@ -1258,6 +1271,7 @@ List of metrics group names and particular metrics in them:
* interfaces - default: ",".join(['"eth%s"' % (num) for num in range(6)]). Format sample: ["eth0","eth1"]
* Netstat
* Kernel
* KernelVmstat
* NetResponse
* protocol - default: "tcp". Protocol, must be "tcp" or "udp"
* address - default: ":80". Server address and port
@ -1267,6 +1281,8 @@ List of metrics group names and particular metrics in them:
* Custom
* diff - default: 0
* measure - default: call - metric value is a command or script execution output. Example: `<Custom measure="call" diff="1" label="Base size">du -s /var/lib/mysql/ | awk '{print $1}'</Custom>`
* TelegrafRaw
* raw telegraf TOML format, transparently added to final collector config
* Source additional source file in telegraf json format, can be used to add custom metrics that needs complex processing and do not fit into standart custom metrics (like log parsing with aggregation)

View File

@ -87,6 +87,10 @@ class ConfigManager(object):
"Kernel": {
"name": '[inputs.kernel]',
"fielddrop": '["boot_time"]',
},
"KernelVmstat": {
"name": '[inputs.kernel_vmstat]',
"fieldpass": '["pgfault", "pgmajfault"]',
}
}
defaults_enabled = ['CPU', 'Memory', 'Disk', 'Net', 'System', 'Kernel']
@ -106,6 +110,7 @@ class ConfigManager(object):
startups = []
shutdowns = []
sources = []
telegrafraw = []
# agent defaults
host_config = {}
for metric in host:
@ -137,6 +142,8 @@ class ConfigManager(object):
shutdowns.append(metric.text)
elif (str(metric.tag)).lower() == 'source':
sources.append(metric.text)
elif (str(metric.tag)).lower() == 'telegrafraw':
telegrafraw.append(metric.text)
if len(host_config) == 0:
logging.info('Empty host config, using defaults')
for section in defaults_enabled:
@ -153,7 +160,8 @@ class ConfigManager(object):
'host': hostname,
'startup': startups,
'shutdown': shutdowns,
'source': sources
'source': sources,
'telegrafraw': telegrafraw
}
logger.info("Result config %s", result)
return result
@ -170,6 +178,7 @@ class AgentConfig(object):
self.sources = config['source']
self.interval = config['interval']
self.comment = config['comment']
self.telegrafraw = config['telegrafraw']
self.host_config = config['host_config']
self.old_style_configs = old_style_configs
@ -341,6 +350,14 @@ class AgentConfig(object):
with open(cfg_path, 'a') as fds:
fds.write(inputs)
# telegraf raw configuration into xml
telegraf_raw = ""
for element in self.telegrafraw:
telegraf_raw += element
with open(cfg_path, 'a') as fds:
fds.write(telegraf_raw)
except Exception as exc:
logger.error(
'Error trying to create monitoring config. Malformed? %s',

View File

@ -9,7 +9,7 @@ class MetricsDecoder(object):
def __init__(self):
"""
translates telegraf metric names into common Monitoring metric names
translates `uncommon` names to `custom:`s
translates `uncommon` names to `custom:%s`s
"""
self.known_metrics = {
@ -25,16 +25,7 @@ class MetricsDecoder(object):
'system_load1': 'System_la1',
'system_load5': 'System_la5',
'system_load15': 'System_la15',
# 'cpu_usage_user': 'CPU_user',
# 'cpu_usage_system': 'CPU_system',
# 'cpu_usage_idle': 'CPU_idle',
# 'cpu_usage_iowait': 'CPU_iowait',
# 'cpu_usage_irq': 'CPU_irq',
# 'cpu_usage_nice': 'CPU_nice',
# 'cpu_usage_softirq': 'CPU_softirq',
# 'cpu_usage_steal': 'CPU_steal',
# 'cpu_usage_guest': 'CPU_guest',
'nstat_TcpRetransSegs': 'Net_retransmit'
'nstat_TcpRetransSegs': 'Net_retransmit',
# those guys became inactive due to net interface names and disk ids
# we don't need unknown id data here
# 'net_packets_recv': 'Net_rx',
@ -43,6 +34,17 @@ class MetricsDecoder(object):
# 'net_bytes_sent': 'Net_send',
# 'diskio_read_bytes': 'Disk_read',
# 'diskio_write_bytes': 'Disk_write',
# ----------
# remove this crunch after front refactoring
# 'cpu-cpu-total_usage_user': 'CPU_user',
# 'cpu-cpu-total_usage_system': 'CPU_system',
# 'cpu-cpu-total_usage_idle': 'CPU_idle',
# 'cpu-cpu-total_usage_iowait': 'CPU_iowait',
# 'cpu-cpu-total_usage_irq': 'CPU_irq',
# 'cpu-cpu-total_usage_nice': 'CPU_nice',
# 'cpu-cpu-total_usage_softirq': 'CPU_softirq',
# 'cpu-cpu-total_usage_steal': 'CPU_steal',
# 'cpu-cpu-total_usage_guest': 'CPU_guest'
}
self.diff_metrics = {
@ -51,7 +53,7 @@ class MetricsDecoder(object):
'net': ['packets_recv', 'packets_sent', 'bytes_recv', 'bytes_sent'],
'nstat': ['TcpRetransSegs'],
'net_response': [],
'kernel': ['context_switches', 'interrupts', 'processes_forked'],
'kernel': ['context_switches', 'interrupts', 'processes_forked', 'vmstat_pgfault', 'vmstat_pgmajfault'],
'diskio': [
'read_bytes', 'write_bytes', 'io_time', 'read_time', 'reads',
'write_time', 'writes'

View File

@ -41,21 +41,15 @@ class MonitoringReader(object):
# key_group sample: diskio
# key_name sample: io_time
try:
key_group, key_name = key.split('_')[
0].split('-')[0], '_'.join(
key.split('_')[1:])
key_group, key_name = key.split('_')[0].split('-')[0], '_'.join(key.split('_')[1:])
except:
key_group, key_name = key.split('_')[
0], '_'.join(key.split('_')[1:])
key_group, key_name = key.split('_')[0], '_'.join(key.split('_')[1:])
if key_group in decoder.diff_metrics.keys():
if key_name in decoder.diff_metrics[
key_group]:
decoded_key = decoder.find_common_names(
key)
if key_name in decoder.diff_metrics[key_group]:
decoded_key = decoder.find_common_names(key)
if self.prev_check:
try:
value = jsn[ts][
key] - self.prev_check[key]
value = jsn[ts][key] - self.prev_check[key]
except KeyError:
logger.debug(
'There is no diff value for metric %s.\n'
@ -64,11 +58,9 @@ class MonitoringReader(object):
ts,
exc_info=True)
value = 0
prepared_results[
decoded_key] = value
prepared_results[decoded_key] = value
else:
decoded_key = decoder.find_common_names(
key)
decoded_key = decoder.find_common_names(key)
prepared_results[decoded_key] = value
else:
decoded_key = decoder.find_common_names(key)