change logger in monitoring plugin

This commit is contained in:
Alexey Lavrenuke 2015-03-17 14:56:38 +03:00
parent 930bde237b
commit 1e0ca516ff
2 changed files with 56 additions and 53 deletions

View File

@ -109,13 +109,13 @@ class AgentClient(object):
def start(self): def start(self):
"""Start remote agent""" """Start remote agent"""
logging.debug('Start monitoring: %s', self.host) logger.debug('Start monitoring: %s', self.host)
if not self.run: if not self.run:
raise ValueError("Empty run string") raise ValueError("Empty run string")
self.run += ['-t', str(int(time.time()))] self.run += ['-t', str(int(time.time()))]
logging.debug(self.run) logger.debug(self.run)
pipe = self.ssh.get_ssh_pipe(self.run) pipe = self.ssh.get_ssh_pipe(self.run)
logging.debug("Started: %s", pipe) logger.debug("Started: %s", pipe)
return pipe return pipe
def create_agent_config(self, loglevel): def create_agent_config(self, loglevel):
@ -156,21 +156,21 @@ class AgentClient(object):
def install(self, loglevel): def install(self, loglevel):
"""Create folder and copy agent and metrics scripts to remote host""" """Create folder and copy agent and metrics scripts to remote host"""
logging.info("Installing monitoring agent at %s@%s...", self.username, self.host) logger.info("Installing monitoring agent at %s@%s...", self.username, self.host)
agent_config = self.create_agent_config(loglevel) agent_config = self.create_agent_config(loglevel)
self.ssh.set_host_port(self.host, self.port, self.username) self.ssh.set_host_port(self.host, self.port, self.username)
# getting remote temp dir # getting remote temp dir
cmd = [self.python + ' -c "import tempfile; print tempfile.mkdtemp();"'] cmd = [self.python + ' -c "import tempfile; print tempfile.mkdtemp();"']
logging.debug("Get remote temp dir: %s", cmd) logger.debug("Get remote temp dir: %s", cmd)
pipe = self.ssh.get_ssh_pipe(cmd) pipe = self.ssh.get_ssh_pipe(cmd)
err = pipe.stderr.read().strip() err = pipe.stderr.read().strip()
if err: if err:
raise RuntimeError("[%s] ssh error: '%s'" % (self.host, err)) raise RuntimeError("[%s] ssh error: '%s'" % (self.host, err))
pipe.wait() pipe.wait()
logging.debug("Return code [%s]: %s", self.host, pipe.returncode) logger.debug("Return code [%s]: %s", self.host, pipe.returncode)
if pipe.returncode: if pipe.returncode:
raise RuntimeError("Failed to get remote dir via SSH at %s@%s, code %s: %s" % ( raise RuntimeError("Failed to get remote dir via SSH at %s@%s, code %s: %s" % (
self.username, self.host, pipe.returncode, pipe.stdout.read().strip())) self.username, self.host, pipe.returncode, pipe.stdout.read().strip()))
@ -178,16 +178,16 @@ class AgentClient(object):
remote_dir = pipe.stdout.read().strip() remote_dir = pipe.stdout.read().strip()
if remote_dir: if remote_dir:
self.path['AGENT_REMOTE_FOLDER'] = remote_dir self.path['AGENT_REMOTE_FOLDER'] = remote_dir
logging.debug("Remote dir at %s:%s", self.host, self.path['AGENT_REMOTE_FOLDER']) logger.debug("Remote dir at %s:%s", self.host, self.path['AGENT_REMOTE_FOLDER'])
# Copy agent # Copy agent
cmd = [self.path['AGENT_LOCAL_FOLDER'] + '/agent.py', cmd = [self.path['AGENT_LOCAL_FOLDER'] + '/agent.py',
self.username+'@'+'['+self.host+']' + ':' + self.path['AGENT_REMOTE_FOLDER']] self.username+'@'+'['+self.host+']' + ':' + self.path['AGENT_REMOTE_FOLDER']]
logging.debug("Copy agent to %s: %s", self.host, cmd) logger.debug("Copy agent to %s: %s", self.host, cmd)
pipe = self.ssh.get_scp_pipe(cmd) pipe = self.ssh.get_scp_pipe(cmd)
pipe.wait() pipe.wait()
logging.debug("AgentClient copy exitcode: %s", pipe.returncode) logger.debug("AgentClient copy exitcode: %s", pipe.returncode)
if pipe.returncode != 0: if pipe.returncode != 0:
raise RuntimeError("AgentClient copy exitcode: %s" % pipe.returncode) raise RuntimeError("AgentClient copy exitcode: %s" % pipe.returncode)
@ -200,11 +200,11 @@ class AgentClient(object):
dirname=self.path['AGENT_REMOTE_FOLDER'] dirname=self.path['AGENT_REMOTE_FOLDER']
) )
] ]
logging.debug("[%s] Copy config: %s", cmd, self.host) logger.debug("[%s] Copy config: %s", cmd, self.host)
pipe = self.ssh.get_scp_pipe(cmd) pipe = self.ssh.get_scp_pipe(cmd)
pipe.wait() pipe.wait()
logging.debug("AgentClient copy config exitcode: %s", pipe.returncode) logger.debug("AgentClient copy config exitcode: %s", pipe.returncode)
if pipe.returncode != 0: if pipe.returncode != 0:
raise RuntimeError("AgentClient copy config exitcode: %s" % pipe.returncode) raise RuntimeError("AgentClient copy config exitcode: %s" % pipe.returncode)
@ -221,11 +221,11 @@ class AgentClient(object):
fhandle, log_file = tempfile.mkstemp('.log', "agent_" + self.host + "_") fhandle, log_file = tempfile.mkstemp('.log', "agent_" + self.host + "_")
os.close(fhandle) os.close(fhandle)
cmd = [self.host + ':' + self.path['AGENT_REMOTE_FOLDER'] + "_agent.log", log_file] cmd = [self.host + ':' + self.path['AGENT_REMOTE_FOLDER'] + "_agent.log", log_file]
logging.debug("Copy agent log from %s@%s: %s", self.username, self.host, cmd) logger.debug("Copy agent log from %s@%s: %s", self.username, self.host, cmd)
remove = self.ssh.get_scp_pipe(cmd) remove = self.ssh.get_scp_pipe(cmd)
remove.wait() remove.wait()
logging.info("Removing agent from: %s@%s...", self.username, self.host) logger.info("Removing agent from: %s@%s...", self.username, self.host)
cmd = ['rm', '-r', self.path['AGENT_REMOTE_FOLDER']] cmd = ['rm', '-r', self.path['AGENT_REMOTE_FOLDER']]
remove = self.ssh.get_ssh_pipe(cmd) remove = self.ssh.get_ssh_pipe(cmd)
remove.wait() remove.wait()
@ -271,9 +271,9 @@ class MonitoringCollector:
logger.debug("Filter mask: %s", self.filter_mask) logger.debug("Filter mask: %s", self.filter_mask)
# Creating agent for hosts # Creating agent for hosts
logging.debug('Creating agents') logger.debug('Creating agents')
for adr in agent_config: for adr in agent_config:
logging.debug('Creating agent: %s', adr) logger.debug('Creating agent: %s', adr)
agent = AgentClient() agent = AgentClient()
agent.host = adr['host'] agent.host = adr['host']
agent.username = adr['username'] agent.username = adr['username']
@ -288,11 +288,11 @@ class MonitoringCollector:
self.agents.append(agent) self.agents.append(agent)
# Mass agents install # Mass agents install
logging.debug("Agents: %s", self.agents) logger.debug("Agents: %s", self.agents)
conf = Config(self.config) conf = Config(self.config)
for agent in self.agents: for agent in self.agents:
logging.debug('Install monitoring agent. Host: %s', agent.host) logger.debug('Install monitoring agent. Host: %s', agent.host)
self.artifact_files.append(agent.install(conf.loglevel())) self.artifact_files.append(agent.install(conf.loglevel()))
def start(self): def start(self):
@ -311,18 +311,18 @@ class MonitoringCollector:
fcntl.fcntl(fds, fcntl.F_SETFL, flags | os.O_NONBLOCK) fcntl.fcntl(fds, fcntl.F_SETFL, flags | os.O_NONBLOCK)
self.excepts.append(pipe.stderr) self.excepts.append(pipe.stderr)
logging.debug("Pipes: %s", self.agent_pipes) logger.debug("Pipes: %s", self.agent_pipes)
def poll(self): def poll(self):
"""Poll agents for data""" """Poll agents for data"""
readable, writable, exceptional = select.select(self.outputs, self.inputs, self.excepts, 0) readable, writable, exceptional = select.select(self.outputs, self.inputs, self.excepts, 0)
logging.debug("Streams: %s %s %s", readable, writable, exceptional) logger.debug("Streams: %s %s %s", readable, writable, exceptional)
# if empty run - check children # if empty run - check children
if (not readable) or exceptional: if (not readable) or exceptional:
for pipe in self.agent_pipes: for pipe in self.agent_pipes:
if pipe.returncode: if pipe.returncode:
logging.debug("Child died returncode: %s", pipe.returncode) logger.debug("Child died returncode: %s", pipe.returncode)
self.outputs.remove(pipe.stdout) self.outputs.remove(pipe.stdout)
self.agent_pipes.remove(pipe) self.agent_pipes.remove(pipe)
@ -330,7 +330,7 @@ class MonitoringCollector:
for excepted in exceptional: for excepted in exceptional:
data = excepted.readline() data = excepted.readline()
while data: while data:
logging.error("Got exception [%s]: %s", excepted, data) logger.error("Got exception [%s]: %s", excepted, data)
data = excepted.readline() data = excepted.readline()
while readable: while readable:
@ -344,9 +344,9 @@ class MonitoringCollector:
lines = [] lines = []
for data in lines: for data in lines:
logging.debug("Got data from agent: %s", data.strip()) logger.debug("Got data from agent: %s", data.strip())
self.send_data += self.filter_unused_data(self.filter_conf, self.filter_mask, data) self.send_data += self.filter_unused_data(self.filter_conf, self.filter_mask, data)
logging.debug("Data after filtering: %s", self.send_data) logger.debug("Data after filtering: %s", self.send_data)
if not self.first_data_received and self.send_data: if not self.first_data_received and self.send_data:
self.first_data_received = True self.first_data_received = True
@ -358,12 +358,12 @@ class MonitoringCollector:
def stop(self): def stop(self):
"""Shutdown agents""" """Shutdown agents"""
logging.debug("Initiating normal finish") logger.debug("Initiating normal finish")
for pipe in self.agent_pipes: for pipe in self.agent_pipes:
try: try:
pipe.stdin.write("stop\n") pipe.stdin.write("stop\n")
except IOError as exc: except IOError as exc:
logging.warn("Problems stopping agent: %s", traceback.format_exc(exc)) logger.warn("Problems stopping agent: %s", traceback.format_exc(exc))
time.sleep(1) time.sleep(1)
@ -373,7 +373,7 @@ class MonitoringCollector:
delay = 1 delay = 1
while tankcore.pid_exists(pipe.pid): while tankcore.pid_exists(pipe.pid):
if first_try: if first_try:
logging.debug("Killing %s with %s", pipe.pid, signal.SIGTERM) logger.debug("Killing %s with %s", pipe.pid, signal.SIGTERM)
os.killpg(pipe.pid, signal.SIGTERM) os.killpg(pipe.pid, signal.SIGTERM)
pipe.communicate() pipe.communicate()
first_try = False first_try = False
@ -381,7 +381,7 @@ class MonitoringCollector:
else: else:
time.sleep(delay) time.sleep(delay)
delay *= 2 delay *= 2
logging.warn("Killing %s with %s", pipe.pid, signal.SIGKILL) logger.warn("Killing %s with %s", pipe.pid, signal.SIGKILL)
os.killpg(pipe.pid, signal.SIGKILL) os.killpg(pipe.pid, signal.SIGKILL)
for agent in self.agents: for agent in self.agents:
@ -409,7 +409,7 @@ class MonitoringCollector:
if hostname == '[target]': if hostname == '[target]':
if not target_hint: if not target_hint:
raise ValueError("Can't use [target] keyword with no target parameter specified") raise ValueError("Can't use [target] keyword with no target parameter specified")
logging.debug("Using target hint: %s", target_hint) logger.debug("Using target hint: %s", target_hint)
hostname = target_hint.lower() hostname = target_hint.lower()
stats = [] stats = []
startups = [] startups = []
@ -445,10 +445,10 @@ class MonitoringCollector:
elif (str(metric.tag)).lower() == 'shutdown': elif (str(metric.tag)).lower() == 'shutdown':
shutdowns.append(metric.text) shutdowns.append(metric.text)
logging.debug("Metrics count: %s", metrics_count) logger.debug("Metrics count: %s", metrics_count)
logging.debug("Host len: %s", len(host)) logger.debug("Host len: %s", len(host))
logging.debug("keys: %s", host.attrib.keys()) logger.debug("keys: %s", host.attrib.keys())
logging.debug("values: %s", host.attrib.values()) logger.debug("values: %s", host.attrib.values())
# use default metrics for host # use default metrics for host
if metrics_count == 0: if metrics_count == 0:
@ -485,7 +485,7 @@ class MonitoringCollector:
try: try:
tree = parse_xml(filename) tree = parse_xml(filename)
except IOError as exc: except IOError as exc:
logging.error("Error loading config: %s", exc) logger.error("Error loading config: %s", exc)
raise RuntimeError("Can't read monitoring config %s" % filename) raise RuntimeError("Can't read monitoring config %s" % filename)
hosts = tree.findall('Host') hosts = tree.findall('Host')
@ -531,7 +531,7 @@ class MonitoringCollector:
out = 'start;' out = 'start;'
out += self.filtering(filter_mask, keys[1:]).rstrip(';') + '\n' out += self.filtering(filter_mask, keys[1:]).rstrip(';') + '\n'
elif re.match('^\[debug\]', data): # log debug output elif re.match('^\[debug\]', data): # log debug output
logging.debug('agent debug: %s', data.rstrip()) logger.debug('agent debug: %s', data.rstrip())
else: else:
filtered = self.filtering(filter_mask, keys) filtered = self.filtering(filter_mask, keys)
if filtered: if filtered:
@ -622,7 +622,7 @@ class MonitoringDataDecoder:
data.pop(0) # remove 'start' data.pop(0) # remove 'start'
host = data.pop(0) host = data.pop(0)
if not data: if not data:
logging.warn("Wrong mon data line: %s", line) logger.warn("Wrong mon data line: %s", line)
else: else:
timestamp = data.pop(0) timestamp = data.pop(0)
self.metrics[host] = [] self.metrics[host] = []

View File

@ -5,6 +5,9 @@ import time
import traceback import traceback
import fnmatch import fnmatch
import datetime import datetime
import logging
logger = logging.getLogger(__name__)
from pkg_resources import resource_string from pkg_resources import resource_string
from collector import MonitoringCollector, \ from collector import MonitoringCollector, \
@ -74,34 +77,34 @@ class MonitoringPlugin(AbstractPlugin):
autostop.add_criteria_class(MetricHigherCriteria) autostop.add_criteria_class(MetricHigherCriteria)
autostop.add_criteria_class(MetricLowerCriteria) autostop.add_criteria_class(MetricLowerCriteria)
except KeyError: except KeyError:
self.log.debug("No autostop plugin found, not adding instances criteria") logger.debug("No autostop plugin found, not adding instances criteria")
def prepare_test(self): def prepare_test(self):
try: try:
phantom = self.core.get_plugin_of_type(PhantomPlugin) phantom = self.core.get_plugin_of_type(PhantomPlugin)
if phantom.phout_import_mode: if phantom.phout_import_mode:
self.log.info("Phout import mode, disabling monitoring") logger.info("Phout import mode, disabling monitoring")
self.config = None self.config = None
self.monitoring = None self.monitoring = None
info = phantom.get_info() info = phantom.get_info()
if info: if info:
self.default_target = info.address self.default_target = info.address
self.log.debug("Changed monitoring target to %s", self.default_target) logger.debug("Changed monitoring target to %s", self.default_target)
except KeyError, ex: except KeyError, ex:
self.log.debug("Phantom plugin not found: %s", ex) logger.debug("Phantom plugin not found: %s", ex)
if self.address_resolver: if self.address_resolver:
try: try:
self.default_target = self.address_resolver.resolve_virtual(self.default_target) self.default_target = self.address_resolver.resolve_virtual(self.default_target)
except Exception, exc: except Exception, exc:
self.log.error("Failed to get target info: %s", exc) logger.error("Failed to get target info: %s", exc)
if not self.config or self.config == 'none': if not self.config or self.config == 'none':
self.log.info("Monitoring has been disabled") logger.info("Monitoring has been disabled")
else: else:
self.log.info("Starting monitoring with config: %s", self.config) logger.info("Starting monitoring with config: %s", self.config)
self.core.add_artifact_file(self.config, True) self.core.add_artifact_file(self.config, True)
self.monitoring.config = self.config self.monitoring.config = self.config
if self.default_target: if self.default_target:
@ -115,7 +118,7 @@ class MonitoringPlugin(AbstractPlugin):
try: try:
console = self.core.get_plugin_of_type(ConsoleOnlinePlugin) console = self.core.get_plugin_of_type(ConsoleOnlinePlugin)
except Exception, ex: except Exception, ex:
self.log.debug("Console not found: %s", ex) logger.debug("Console not found: %s", ex)
console = None console = None
if console: if console:
widget = MonitoringWidget(self) widget = MonitoringWidget(self)
@ -131,11 +134,11 @@ class MonitoringPlugin(AbstractPlugin):
self.monitoring.poll() self.monitoring.poll()
count += 1 count += 1
except Exception, exc: except Exception, exc:
self.log.debug("Problems starting monitoring: %s", traceback.format_exc(exc)) logger.debug("Problems starting monitoring: %s", traceback.format_exc(exc))
if self.die_on_fail: if self.die_on_fail:
raise exc raise exc
else: else:
self.log.warning("Failed to start monitoring: %s", exc) logger.warning("Failed to start monitoring: %s", exc)
self.monitoring = None self.monitoring = None
@ -144,20 +147,20 @@ class MonitoringPlugin(AbstractPlugin):
if self.die_on_fail: if self.die_on_fail:
raise RuntimeError("Monitoring died unexpectedly") raise RuntimeError("Monitoring died unexpectedly")
else: else:
self.log.warn("Monitoring died unexpectedly") logger.warn("Monitoring died unexpectedly")
self.monitoring = None self.monitoring = None
return -1 return -1
def end_test(self, retcode): def end_test(self, retcode):
self.log.info("Finishing monitoring") logger.info("Finishing monitoring")
if self.monitoring: if self.monitoring:
self.monitoring.stop() self.monitoring.stop()
for log in self.monitoring.artifact_files: for log in self.monitoring.artifact_files:
self.core.add_artifact_file(log) self.core.add_artifact_file(log)
while self.monitoring.send_data: while self.monitoring.send_data:
self.log.info("Sending monitoring data rests...") logger.info("Sending monitoring data rests...")
self.monitoring.send_collected_data() self.monitoring.send_collected_data()
if self.mon_saver: if self.mon_saver:
@ -222,7 +225,7 @@ class MonitoringWidget(AbstractInfoWidget, MonitoringDataListener, MonitoringDat
def monitoring_data(self, data_string): def monitoring_data(self, data_string):
self.log.debug("Mon widget data: %s", data_string) logger.debug("Mon widget data: %s", data_string)
for line in data_string.split("\n"): for line in data_string.split("\n"):
if not line.strip(): if not line.strip():
continue continue
@ -270,7 +273,7 @@ class AbstractMetricCriteria(AbstractCriteria, MonitoringDataListener, Monitorin
if self.mon.monitoring: if self.mon.monitoring:
self.mon.monitoring.add_listener(self) self.mon.monitoring.add_listener(self)
except KeyError: except KeyError:
self.log.warning("No monitoring module, mon autostop disabled") logger.warning("No monitoring module, mon autostop disabled")
self.triggered = False self.triggered = False
self.autostop = autostop self.autostop = autostop
@ -297,17 +300,17 @@ class AbstractMetricCriteria(AbstractCriteria, MonitoringDataListener, Monitorin
if self.metric not in data.keys() or not data[self.metric] or data[self.metric] == self.NA: if self.metric not in data.keys() or not data[self.metric] or data[self.metric] == self.NA:
data[self.metric] = 0 data[self.metric] = 0
self.log.debug("Compare %s %s/%s=%s to %s", self.get_type_string(), host, self.metric, data[self.metric], logger.debug("Compare %s %s/%s=%s to %s", self.get_type_string(), host, self.metric, data[self.metric],
self.value_limit) self.value_limit)
if self.comparison_fn(float(data[self.metric]), self.value_limit): if self.comparison_fn(float(data[self.metric]), self.value_limit):
if not self.seconds_count: if not self.seconds_count:
self.cause_second = self.last_second self.cause_second = self.last_second
self.log.debug(self.explain()) logger.debug(self.explain())
self.seconds_count += 1 self.seconds_count += 1
if self.seconds_count >= self.seconds_limit: if self.seconds_count >= self.seconds_limit:
self.log.debug("Triggering autostop") logger.debug("Triggering autostop")
self.triggered = True self.triggered = True
return return
else: else: