Source code for alignak.daemons.schedulerdaemon

#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2015: Alignak team, see AUTHORS.txt file for contributors
#
# This file is part of Alignak.
#
# Alignak is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Alignak is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with Alignak.  If not, see <http://www.gnu.org/licenses/>.
#
#
# This file incorporates work covered by the following copyright and
# permission notice:
#
#  Copyright (C) 2009-2014:
#     xkilian, fmikus@acktomic.com
#     David Moreau Simard, dmsimard@iweb.com
#     Guillaume Bour, guillaume@bour.cc
#     aviau, alexandre.viau@savoirfairelinux.com
#     Hartmut Goebel, h.goebel@goebel-consult.de
#     Nicolas Dupeux, nicolas@dupeux.net
#     Grégory Starck, g.starck@gmail.com
#     Sebastien Coavoux, s.coavoux@free.fr
#     Olivier Hanesse, olivier.hanesse@gmail.com
#     Jean Gabes, naparuba@gmail.com
#     Romain Forlot, rforlot@yahoo.com

#  This file is part of Shinken.
#
#  Shinken is free software: you can redistribute it and/or modify
#  it under the terms of the GNU Affero General Public License as published by
#  the Free Software Foundation, either version 3 of the License, or
#  (at your option) any later version.
#
#  Shinken is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU Affero General Public License for more details.
#
#  You should have received a copy of the GNU Affero General Public License
#  along with Shinken.  If not, see <http://www.gnu.org/licenses/>.
"""
This module provide Alignak which is the main scheduling daemon class
"""

import os
import signal
import time
import traceback
import cPickle
from multiprocessing import process


from alignak.scheduler import Scheduler
from alignak.macroresolver import MacroResolver
from alignak.external_command import ExternalCommandManager
from alignak.daemon import Daemon
from alignak.http.scheduler_interface import SchedulerInterface
from alignak.property import PathProp, IntegerProp
from alignak.log import logger
from alignak.satellite import BaseSatellite
from alignak.stats import statsmgr


[docs]class Alignak(BaseSatellite): """Scheduler class. Referenced as "app" in most Interface """ properties = BaseSatellite.properties.copy() properties.update({ 'pidfile': PathProp(default='schedulerd.pid'), 'port': IntegerProp(default=7768), 'local_log': PathProp(default='schedulerd.log'), }) def __init__(self, config_file, is_daemon, do_replace, debug, debug_file, profile=''): BaseSatellite.__init__(self, 'scheduler', config_file, is_daemon, do_replace, debug, debug_file) self.http_interface = SchedulerInterface(self) self.sched = Scheduler(self) self.must_run = True # Now the interface self.uri = None self.uri2 = None # And possible links for satellites # from now only pollers self.pollers = {} self.reactionners = {} self.brokers = {}
[docs] def compensate_system_time_change(self, difference): """Compensate a system time change of difference for all hosts/services/checks/notifs :param difference: difference in seconds :type difference: int :return: None """ logger.warning("A system time change of %d has been detected. Compensating...", difference) # We only need to change some value self.program_start = max(0, self.program_start + difference) if not hasattr(self.sched, "conf"): # Race condition where time change before getting conf return # Then we compensate all host/services for host in self.sched.hosts: host.compensate_system_time_change(difference) for serv in self.sched.services: serv.compensate_system_time_change(difference) # Now all checks and actions for chk in self.sched.checks.values(): # Already launch checks should not be touch if chk.status == 'scheduled' and chk.t_to_go is not None: t_to_go = chk.t_to_go ref = chk.ref new_t = max(0, t_to_go + difference) if ref.check_period is not None: # But it's no so simple, we must match the timeperiod new_t = ref.check_period.get_next_valid_time_from_t(new_t) # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: chk.state = 'waitconsume' chk.exit_status = 2 chk.output = '(Error: there is no available check time after time change!)' chk.check_time = time.time() chk.execution_time = 0 else: chk.t_to_go = new_t ref.next_chk = new_t # Now all checks and actions for act in self.sched.actions.values(): # Already launch checks should not be touch if act.status == 'scheduled': t_to_go = act.t_to_go # Event handler do not have ref ref = getattr(act, 'ref', None) new_t = max(0, t_to_go + difference) # Notification should be check with notification_period if act.is_a == 'notification': if ref.notification_period: # But it's no so simple, we must match the timeperiod new_t = ref.notification_period.get_next_valid_time_from_t(new_t) # And got a creation_time variable too act.creation_time += difference # But maybe no there is no more new value! Not good :( # Say as error, with error output if new_t is None: act.state = 'waitconsume' act.exit_status = 2 act.output = '(Error: there is no available check time after time change!)' act.check_time = time.time() act.execution_time = 0 else: act.t_to_go = new_t
[docs] def manage_signal(self, sig, frame): """Manage signals caught by the daemon signal.SIGUSR1 : dump_memory signal.SIGUSR2 : dump_object (nothing) signal.SIGTERM, signal.SIGINT : terminate process :param sig: signal caught by daemon :type sig: str :param frame: current stack frame :type frame: :return: None TODO: Refactor with Daemon one """ logger.warning("%s > Received a SIGNAL %s", process.current_process(), sig) # If we got USR1, just dump memory if sig == signal.SIGUSR1: self.sched.need_dump_memory = True elif sig == signal.SIGUSR2: # usr2, dump objects self.sched.need_objects_dump = True else: # if not, die :) self.sched.die() self.must_run = False Daemon.manage_signal(self, sig, frame)
[docs] def do_loop_turn(self): """Scheduler loop turn Basically wait initial conf and run :return: None """ # Ok, now the conf self.wait_for_initial_conf() if not self.new_conf: return logger.info("New configuration received") self.setup_new_conf() logger.info("New configuration loaded") self.sched.run()
[docs] def setup_new_conf(self): """Setup new conf received for scheduler :return: None """ with self.conf_lock: new_c = self.new_conf conf_raw = new_c['conf'] override_conf = new_c['override_conf'] modules = new_c['modules'] satellites = new_c['satellites'] instance_name = new_c['instance_name'] push_flavor = new_c['push_flavor'] skip_initial_broks = new_c['skip_initial_broks'] accept_passive_unknown_chk_res = new_c['accept_passive_unknown_check_results'] api_key = new_c['api_key'] secret = new_c['secret'] http_proxy = new_c['http_proxy'] statsd_host = new_c['statsd_host'] statsd_port = new_c['statsd_port'] statsd_prefix = new_c['statsd_prefix'] statsd_enabled = new_c['statsd_enabled'] # horay, we got a name, we can set it in our stats objects statsmgr.register(self.sched, instance_name, 'scheduler', api_key=api_key, secret=secret, http_proxy=http_proxy, statsd_host=statsd_host, statsd_port=statsd_port, statsd_prefix=statsd_prefix, statsd_enabled=statsd_enabled) t00 = time.time() conf = cPickle.loads(conf_raw) logger.debug("Conf received at %d. Unserialized in %d secs", t00, time.time() - t00) self.new_conf = None # Tag the conf with our data self.conf = conf self.conf.push_flavor = push_flavor self.conf.instance_name = instance_name self.conf.skip_initial_broks = skip_initial_broks self.conf.accept_passive_unknown_check_results = accept_passive_unknown_chk_res self.cur_conf = conf self.override_conf = override_conf self.modules = modules self.satellites = satellites # self.pollers = self.app.pollers if self.conf.human_timestamp_log: logger.set_human_format() # Now We create our pollers for pol_id in satellites['pollers']: # Must look if we already have it already_got = pol_id in self.pollers poll = satellites['pollers'][pol_id] self.pollers[pol_id] = poll if poll['name'] in override_conf['satellitemap']: poll = dict(poll) # make a copy poll.update(override_conf['satellitemap'][poll['name']]) proto = 'http' if poll['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, poll['address'], poll['port']) self.pollers[pol_id]['uri'] = uri self.pollers[pol_id]['last_connection'] = 0 # Now We create our reactionners for reac_id in satellites['reactionners']: # Must look if we already have it already_got = reac_id in self.reactionners reac = satellites['reactionners'][reac_id] self.reactionners[reac_id] = reac if reac['name'] in override_conf['satellitemap']: reac = dict(reac) # make a copy reac.update(override_conf['satellitemap'][reac['name']]) proto = 'http' if poll['use_ssl']: proto = 'https' uri = '%s://%s:%s/' % (proto, reac['address'], reac['port']) self.reactionners[reac_id]['uri'] = uri self.reactionners[reac_id]['last_connection'] = 0 # First mix conf and override_conf to have our definitive conf for prop in self.override_conf: val = self.override_conf[prop] setattr(self.conf, prop, val) if self.conf.use_timezone != '': logger.debug("Setting our timezone to %s", str(self.conf.use_timezone)) os.environ['TZ'] = self.conf.use_timezone time.tzset() if len(self.modules) != 0: logger.debug("I've got %s modules", str(self.modules)) # TODO: if scheduler had previous modules instanciated it must clean them! self.modules_manager.set_modules(self.modules) self.do_load_modules() logger.info("Loading configuration.") self.conf.explode_global_conf() # we give sched it's conf self.sched.reset() self.sched.load_conf(self.conf) self.sched.load_satellites(self.pollers, self.reactionners) # We must update our Config dict macro with good value # from the config parameters self.sched.conf.fill_resource_macros_names_macros() # print "DBG: got macros", self.sched.conf.macros # Creating the Macroresolver Class & unique instance m_solver = MacroResolver() m_solver.init(self.conf) # self.conf.dump() # self.conf.quick_debug() # Now create the external commander # it's a applyer: it role is not to dispatch commands, # but to apply them ecm = ExternalCommandManager(self.conf, 'applyer') # Scheduler need to know about external command to # activate it if necessary self.sched.load_external_command(ecm) # External command need the sched because he can raise checks ecm.load_scheduler(self.sched) # We clear our schedulers managed (it's us :) ) # and set ourself in it self.schedulers = {self.conf.instance_id: self.sched}
[docs] def what_i_managed(self): """Get my managed dict (instance id and push_flavor) :return: dict containing instance_id key and push flavor value :rtype: dict """ if hasattr(self, 'conf'): return {self.conf.instance_id: self.conf.push_flavor} else: return {}
[docs] def main(self): """Main function for Scheduler, launch after the init:: * Init daemon * Load module manager * Launch main loop * Catch any Exception that occurs :return: None """ try: self.load_config_file() # Setting log level logger.setLevel(self.log_level) # Force the debug level if the daemon is said to start with such level if self.debug: logger.setLevel('DEBUG') self.look_for_early_exit() self.do_daemon_init_and_start() self.load_modules_manager() self.uri = self.http_daemon.uri logger.info("[scheduler] General interface is at: %s", self.uri) self.do_mainloop() except Exception, exp: self.print_unrecoverable(traceback.format_exc()) raise