mon

http://cpansearch.perl.org/src/TROCKIJ/mon-0.99.2/mon.d/
#!/usr/bin/perl
#
# mon - schedules service tests and triggers alerts upon failures
#
# Jim Trocki, trockij@transmeta.com
#
# $Id: mon 1.27 Sat, 08 Sep 2001 09:42:05 -0400 trockij $
#
# Copyright (C) 1998 Jim Trocki
#
#    This program is free software; you can redistribute it and/or modify
#    it under the terms of the GNU General Public License as published by
#    the Free Software Foundation; either version 2 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with this program; if not, write to the Free Software
#    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
#
use strict;

my $RCSID='$Id: mon 1.27 Sat, 08 Sep 2001 09:42:05 -0400 trockij $';
my $AUTHOR='trockij@transmeta.com';
my $RELEASE='$ProjectVersion: mon-0-99-2.6 $';

#
# modules in the perl distribution
#
use Getopt::Std;
use Text::ParseWords;
use POSIX;
use Fcntl;
use Socket;
use Sys::Hostname;
use Sys::Syslog qw(:DEFAULT setlogsock);
use FileHandle;

use Data::Dumper;

#
# CPAN modules
#
use Time::HiRes qw(gettimeofday tv_interval usleep);
use Time::Period;
use Mon::SNMP;
#use SNMP in read_cf()

sub auth;
sub call_alert;
sub check_auth;
sub clear_timers;
sub client_accept;
sub client_close;
sub client_command;
sub client_dopending;
sub client_write_opstatus;
sub collect_output;
sub daemon;
sub debug;
sub debug_dir;
sub dep_ok;
sub depend;
sub dhmstos;
sub die_die;
sub disen_host;
sub disen_service;
sub disen_watch;
sub do_alert;
sub do_startup_alerts;
sub err_startup;
sub esc_str;
sub gen_scriptdir_hash;
sub handle_io;
sub handle_snmp_trap;
sub handle_trap;
sub handle_trap_timeout;
sub host_exists;
sub inRange;
sub init_cf_globals;
sub init_globals;
sub load_auth;
sub load_oncall;
sub load_state;
sub normalize_paths;
sub init_dtlog;
sub pam_conv_func;
sub proc_cleanup;
sub randomize_startdelay;
sub read_cf;
sub readhistoricfile;
sub reload;
sub remove_proc;
sub reset_server;
sub run_monitor;
sub save_state;
sub set_last_test;
sub set_op_status;
sub reset_timer;
sub setup_server;
sub sock_write;
sub syslog_die;
sub un_esc_str;
sub usage;
sub write_dtlog;

#
# globals
#
my %opt;		# cmdline arguments
my %CF;			# configuration directives
my $PWD;		# current working directory
my $HOSTNAME;		# system hostname
my $STOPPED;		# 1 = scheduler stopped, 0 = not stopped
my $STOPPED_TIME;	# time(2) scheduler was stopped, if stopped
my $SLEEPINT;		# don't touch
my %oncall;		# currently unused
my %watch_disabled;	# watches disabled, indexed by watch
my %watch;		# main configuration file data structure
my %alias;		# aliases
my %groups;		# hostgroups, indexed by group

#
# I/O routine globals
#
my %clients;		# fds of connected clients
my $numclients;		# count of connected clients
my %running;		# procs which are forked and running,
			# indexed by group/service
my $iovec;		# used for select loop
my %runningpid;		# procs which are forked and running,
			# indexed by PID
my $procs;		# number of outstanding procs
my %fhandles;		# input file handles of children
my %ibufs;		# buffer structure to hold data from children
my ($fdset_rbits, $fdset_ebits);

#
# history globals
#
my @last_alerts;	# alert history, in memory
my @last_failures;	# failure history, in memory

#
# misc. globals
#
my $i;			# loop iteration counter, used for debugging only
my $lasttm;		# the last time(2) the mon loop started
my $pid_file_owner;	# set when creating pid file
my $tm;			# used in main loop

#
# authentication structure globals
#
my %AUTHCMDS;
my %NOAUTHCMDS;
my %AUTHTRAPS;
my %AUTHSNMPTRAPS;

#
# PAM authentication globals (must not be lexically scoped)
#
use vars qw ( $PAM_username $PAM_password ) ;


#
# opstatus globals
#
my (%OPSTAT, %FAILURE, %SUCCESS, %WARNING);	# operational statuses
my ($TRAP_COLDSTART, $TRAP_WARMSTART,		# trap types
	$TRAP_LINKDOWN, $TRAP_LINKUP,
	$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS,
	$TRAP_ENTERPRISE, $TRAP_HEARTBEAT);

my ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART,	# _op_status values
	$STAT_WARMSTART, $STAT_LINKDOWN,
	$STAT_UNKNOWN, $STAT_TIMEOUT,
	$STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN);

my ($FL_MONITOR, $FL_UPALERT,			# alert type flags
	$FL_TRAP, $FL_TRAPTIMEOUT,
	$FL_STARTUPALERT, $FL_TEST);

my $TRAP_PDU;
my (%ALERTHASH, %MONITORHASH);			# hash of pathnames for
						# alerts/monitors
my $PROT_VERSION;
my $START_TIME;					# time(2) server started
my $TRAP_PRO_VERSION;				# trap protocol version
my $DEP_EVAL_SANDBOX;				# perl environment for
						# dep evals

#
# argument parsing
#
getopts ("fhlMSvda:A:b:B:c:D:i:L:m:O:o:p:P:r:s:t:", \%opt);

#
# these two things can be taken care of without
# initializing things further
#
if ($opt{"v"}) {
    print "$RCSID\n$RELEASE\n";
    exit;
}

if ($opt{"h"}) {
    usage();
    exit;
}

if ($opt{"d"})
{
    eval 'require Data::Dumper;';

    if ($@ ne "")
    {
    	die "error: $@\n";
    }
}

($^O eq "linux" || $^O eq "openbsd") && setlogsock ('unix');

openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});

#
# definitions
#
die "basedir $opt{b} does not exist\n" if ($opt{"b"} && ! -d $opt{"b"});

init_globals();
init_cf_globals();

syslog_die ("config file $CF{CF} does not exist") if (! -f $CF{"CF"});

#
# read config file
#
if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
    syslog_die ("$err");
}

closelog;

openlog ("mon", "cons,pid", $CF{"SYSLOG_FACILITY"});

#
# cmdline args override config file
#
$CF{"ALERTDIR"}  = $opt{"a"} if ($opt{"a"});
$CF{"BASEDIR"}   = $opt{"b"} if ($opt{"b"});
$CF{"AUTHFILE"}  = $opt{"A"} if ($opt{"A"});
$CF{"LOGDIR"}    = $opt{"L"} if ($opt{"L"});
$CF{"STATEDIR"}  = $opt{"D"} if ($opt{"D"});
$CF{"SCRIPTDIR"} = $opt{"s"} if ($opt{"s"});
$CF{"OCFILE"}    = $opt{"o"} if ($opt{"o"});

$CF{"PIDFILE"}   = $opt{"P"} if defined($opt{"P"});	# allow empty pidfile
$CF{"MAX_KEEP"}  = $opt{"k"} if ($opt{"k"});
$CF{"MAXPROCS"}  = $opt{"m"} if ($opt{"m"});
$CF{"SERVPORT"}  = $opt{"p"} if ($opt{"p"});
$CF{"TRAPPORT"}  = $opt{"t"} if ($opt{"t"});

$SLEEPINT  = $opt{"i"} if ($opt{"i"});

if ($opt{"r"}) {
    syslog_die ("bad randstart value") if (!defined (dhmstos ($opt{"r"})));
    $CF{"RANDSTART"} = dhmstos($opt{"r"});
}

if ($opt{"S"}) {
    $STOPPED = 1;
    $STOPPED_TIME = time;
}


#
# do some path cleanups and
# build lookup tables for alerts and monitors
#
normalize_paths();
gen_scriptdir_hash();

if ($opt{"d"}) {
    debug_dir();
}

#
# load the auth control, oncall, bind, and listen
#
load_auth (1);
%oncall = ();
#load_oncall (1);

#
# init client interface
#   %clients is an I/O structure, indexed by the fd of the client
#   $numclients is the number of clients currently connected
#   $iovec is fd_set for clients and traps
#
%clients = ();
$numclients = 0;
$iovec = '';
setup_server();

#
# fork and become a daemon
#
init_dtlog() if ($CF{"DTLOGGING"});
daemon() if ($opt{"f"});
if ($CF{"PIDFILE"} ne '' && open PID, ">$CF{PIDFILE}") {
    $pid_file_owner = $$;
    print PID "$pid_file_owner\n";
    close PID;
}
set_last_test ();

#
# randomize startup checks if asked to
#
randomize_startdelay() if ($CF{"RANDSTART"});

@last_alerts = ();
@last_failures = ();
readhistoricfile ();

$procs = 0;
$i=0;
$lasttm=time;
$fdset_rbits = $fdset_ebits = '';
%watch_disabled = ();

$SIG{HUP} = \&reset_server;
$SIG{INT} = \&handle_sigterm;		# for interactive debugging
$SIG{TERM} = \&handle_sigterm;
$SIG{PIPE} = 'IGNORE';

#
# load previously saved state
#
load_state ("disabled") if ($opt{"l"});

syslog ('info', "mon server started");

#
# startup alerts
#
do_startup_alerts();

#
# main monitoring loop
#
for (;;) {
debug (1, "$i" . ($STOPPED ? " (stopped)" : "") . "\n");
    $i++;
    $tm = time;

    #
    # step through the watch groups, decrementing and
    # handing expired timers
    #
    if (!$STOPPED) {
	foreach my $group (keys %watch) {
	    #
	    # skip over disabled watch
	    #
	    next if ($watch_disabled{$group} == 1);

	    foreach my $service (keys %{$watch{$group}}) {

		my $sref = \%{$watch{$group}->{$service}};

		my $t = $tm - $lasttm;
		$t = 1 if ($t <= 0);

		#
		# trap timer
		#
		if ($sref->{"traptimeout"}) {
		    $sref->{"_trap_timer"} -= $t;

		    if ($sref->{"_trap_timer"} <= 0 && $tm - $sref->{"_last_uptrap"} >
				$sref->{"traptimeout"}) {
			$sref->{"_trap_timer"} = $sref->{"traptimeout"};
			handle_trap_timeout ($group, $service);
		    }
		}

		#
		# trap duration timer
		#
		if (defined ($sref->{"_trap_duration_timer"})) {
		    $sref->{"_trap_duration_timer"} -= $t;

		    if ($sref->{"_trap_duration_timer"} <= 0) {
		    	set_op_status ($group, $service, $STAT_OK);
			undef $sref->{"_trap_duration_timer"};
		    }
		}

		#
		# polling monitor timer
		#
		if ($sref->{"interval"} && $sref->{"_timer"} <= 0 &&
			!$running{"$group/$service"})
		{
		    if (!$CF{"MAXPROCS"} || $procs < $CF{"MAXPROCS"})
		    {
			if ($sref->{"exclude_period"} ne "" &&
				inPeriod (time, $sref->{"exclude_period"}))
			{
			    debug (1, "not running $group,$service because of exclude_period\n");
			}

			elsif ($sref->{"dep_behavior"} eq "m" &&
				$sref->{"depend"} ne "")
			{
			    if (dep_ok ($sref))
			    {
				run_monitor ($group, $service);
			    }

			    else
			    {
			    	debug (1, "not running $group,$service because of depend\n");
			    }
			}

			else
			{
			    run_monitor ($group, $service);
			}
		    }

		    else
		    {
			syslog ('info', "throttled at $procs processes");
		    }

		}
		
		else
		{
		    $sref->{"_timer"} -= $t;
		    if ($sref->{"_timer"} < 0)
		    {
		    	$sref->{"_timer"} = 0;
		    }
		}
	    }
	}
    }

    $lasttm = time;

    #
    # collect any output from subprocs
    #
    collect_output;

    #
    # clean up after exited processes, and trigger alerts
    #
    proc_cleanup;

    #
    # handle client, server, and trap I/O
    # this routine sleeps for $SLEEPINT if no I/O is ready
    #
    handle_io;
}

die "not reached";

END {
    unlink $CF{"PIDFILE"} if $$ == $pid_file_owner && $CF{"PIDFILE"} ne '';
}


##############################################################################

#
# startup alerts
#
sub do_startup_alerts {
    foreach my $group (keys %watch) {
    	foreach my $service (keys %{$watch{$group}}) {
	    do_alert ($group, $service, "", 0, $FL_STARTUPALERT);
	}
    }
}


#
# handle alert event, throttling the alert call if necessary
#
sub do_alert {
    my ($group, $service, $output, $retval, $flags) = @_;
    my (@groupargs, $last_alert, $alert);
    my ($sref, $range, @alerts);

    $sref = \%{$watch{$group}->{$service}};

    my $tmnow = time;

    #
    # if the alarm is disabled, ignore it
    #
    if ($sref->{"disable"} == 1)
    {
	syslog ("notice", "ignoring alert for $group,$service");
	return;
    }

    #
    # dependency check
    #
    if (!($flags & $FL_STARTUPALERT) &&
	    !($flags & $FL_UPALERT) &&
	    defined $sref->{"depend"} &&
	    $sref->{"dep_behavior"} eq "a")
    {
	if (!$sref->{"_depend_status"})
	{
	    debug (1, "alert for $group,$service supressed because of dep fail\n");
	    return;
	}
    }

    #
    # no alerts for ack'd failures, except for upalerts
    #
    if ($sref->{"_ack"} == 1 && !($flags & $FL_UPALERT))
    {
	syslog ("notice", "no alert for $group.$service" .
		" because of ack'd failure");
	return;
    }

    my ($summary) = split("\n", $output);
    $summary = "(NO SUMMARY)" if ($summary =~ /^\s*$/m);

    #
    # check each time period for pending alerts
    #
    foreach my $periodlabel (keys %{$sref->{"periods"}})
    {
	#
	# only send alerts that are in the proper period
	#
    	next if (!inPeriod ($tmnow, $sref->{"periods"}->{$periodlabel}->{"period"}));

    	my $pref = \%{$sref->{"periods"}->{$periodlabel}};

	#
	# skip upalerts not paired with down alerts
	# disable by setting "no_comp_alerts" in period section
	#
	if (!$pref->{"no_comp_alerts"} && ($flags & $FL_UPALERT) && !$pref->{"_alert_sent"})
	{
	    next;
	}

	#
	# do this if we're not handling an upalert or startupalert
	#
	if (!($flags & $FL_UPALERT) && !($flags & $FL_STARTUPALERT))
	{
	    #
	    # alert only numalerts
	    #
	    if ($pref->{"numalerts"} &&
	    	     $pref->{"_alert_sent"} >= $pref->{"numalerts"})
	    {
	    	next;
	    }

	    #
	    # only alert once every "alertevery" seconds, unless
	    # output from monitor is different
	    #
	    my ($prevsumm) = split("\n", $sref->{"_failure_output"});
	    if	(
		    $pref->{"alertevery"} != 0 &&
		    (
			($tmnow - $pref->{"_last_alert"} < $pref->{"alertevery"}) &&
			(
			    ($pref->{"_observe_detail"} && $sref->{"_failure_output"} eq $output) ||
			    (!$pref->{"_observe_detail"} && $prevsumm eq $summary)
			)
		    )
		)
	    {
		syslog ("info", "not alerting for failure of $group/$service");
		next;
	    }

	    #
	    # alertafter NUM
	    #
	    if (defined $pref->{"alertafter_consec"})
	    {
	    	next if ($sref->{"_consec_failures"} < $pref->{"alertafter_consec"});
	    }

	    #
	    # alertafter timeval
	    #
	    elsif ( (!defined ($pref->{"alertafter"})) && (defined ($pref->{"alertafterival"})) )
	    {
	    	$pref->{'_1stfailtime'} = $tmnow if $pref->{'_1stfailtime'} == 0;
		if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'})
		{
		    next;
		}
	    }

	    #
	    # alertafter NUM timeval
	    #
	    elsif (defined ($pref->{"alertafter"}))
	    {
		$pref->{"_failcount"}++;

		if ($tmnow - $pref->{'_1stfailtime'} <= $pref->{'alertafterival'} &&
		    $pref->{"_failcount"} < $pref->{"alertafter"})
		{
		    next;
		}

		#
		# start a new time interval
		#
		if ($tmnow - $pref->{'_1stfailtime'} > $pref->{'alertafterival'})
		{
		    $pref->{"_failcount"} = 1;
		}

		if ($pref->{"_failcount"} == 1)
		{
		    $pref->{"_1stfailtime"} = $tmnow;
		}

		if ($pref->{"_failcount"} < $pref->{"alertafter"})
		{
		    next;
		}
	    }
	}

	#
	# at this point, no alerts are blocked,
	# so send the alerts
	#

	#
	# trigger multiple alerts in this period
	#
	if ($flags & $FL_UPALERT)
	{
	    @alerts = @{$pref->{"upalerts"}};
	}
	elsif ($flags & $FL_STARTUPALERT)
	{
	    @alerts = @{$pref->{"startupalerts"}};
	}
	else
	{
	    @alerts = @{$pref->{"alerts"}};
	}

	my $called = 0;

	for (my $i=0;$i<@alerts;$i++)
	{
	    my ($range, $fac, $args);

	    if ($alerts[$i] =~ /^exit\s*=\s*((\d+|\d+-\d+))\s/i)
	    {
		$range=$1;
		next if (!inRange($retval, $range));
		($fac, $args) = (split (/\s+/, $alerts[$i], 3))[1,2];
	    }
	    else
	    {
		($fac, $args) = split (/\s+/, $alerts[$i], 2);
	    }

	    $called++ if (call_alert (
		    group	=> $group,
		    service	=> $service,
		    output	=> $output,
		    retval	=> $retval,
		    flags	=> $flags,

		    pref	=> $pref,
		    alert	=> $fac,
		    args	=> $args,
		)
	    );
	}

	#
	# reset _alert_sent if up alert was sent from a trap
	#
        if ($called)
        {
            if( (($FL_TRAP | $flags) && ($FL_UPALERT & $flags)) ) {
	        $pref->{"_alert_sent"} = 0;
            }
            else {
                $pref->{"_alert_sent"}++;
            }
        }
    }
}



#
# walk through the watch list and reset the time
# the service was last called
#
sub set_last_test {
    my ($i, $k, $t);
    $t = time;
    foreach $k (keys %watch)
    {
    	foreach my $service (keys %{$watch{$k}})
	{
	    $watch{$k}->{$service}->{"_timer"} = $watch{$k}->{$service}->{"interval"};
	}
    }

}


#
# parse configuration file
#
# build the following data structures:
#
# %group
#       each element of %group is an array of hostnames
#       group records are terminated by a blank line in the
#       configuration file
# %watch{"group"}->{"service"}->{"variable"} = value
# %alias
#
sub read_cf {
    my ($CF, $commit) = @_;
    my ($var, $watchgroup, $ingroup, $curgroup, $inwatch,
	$args, $hosts, %disabled, $h, $i,
	$inalias, $curalias);
    my ($sref, $pref);
    my ($service, $period);
    my ($authtype, @authtypes);
    my $line_num = 0;

    #
    # parse configuration file
    #
    if ($opt{"M"} || $CF =~ /\.m4$/)
    {
	return "could not open m4 pipe of cf file: $CF: $!"
	    if (!open (CFG, "m4 $CF |"));
    }
    
    else
    {
	return "could not open cf file: $CF: $!"
	    if (!open (CFG, $CF));
    }

    #
    # buffers to hold the new un-committed config
    #
    my %new_alias = ();
    my %new_CF = %CF;
    my %new_groups;
    my %new_watch;

    my %is_watch;

    my $servnum = 0;

    my $DEP_BEHAVIOR = "a";

    my $incomplete_line = 0;
    my $linepart = "";
    my $l = "";
    my $acc_line = "";

    for (;;)
    {
	#
	# read in a logical "line", which may span actual lines
	#
	do
	{
	    $line_num++;
	    last if (!defined ($linepart = <CFG>));
	    next if $linepart =~ /^\s*#/;

	    #
	    # accumulate multi-line lines (ones which are \-escaped)
	    #
	    if ($incomplete_line) { $linepart =~ s/^\s*//; }

	    if ($linepart =~ /^(.*)\\\s*$/)
	    {
		$incomplete_line = 1;
		$acc_line .= $1;
		chomp $acc_line;
		next;
	    }

	    else
	    {
		$acc_line .= $linepart;
	    }

	    $l = $acc_line;
	    $acc_line = "";

	    chomp $l;
	    $l =~ s/^\s*//;
	    $l =~ s/\s*$//;

	    $incomplete_line = 0;
	    $linepart = "";
	};

	#
	# global variables which can be overriden by the command line
	#
	if (!$inwatch && $l =~ /^(\w+) \s* = \s* (.*) \s*$/ix)
	{
	    if ($1 eq "alertdir") {
		$new_CF{"ALERTDIR"} = $2;

	    } elsif ($1 eq "basedir") {
		$new_CF{"BASEDIR"} = $2;
		$new_CF{"BASEDIR"} = "$PWD/$new_CF{BASEDIR}" if ($new_CF{"BASEDIR"} !~ m{^/});
		$new_CF{"BASEDIR"} =~ s{/$}{};

	    } elsif ($1 eq "cfbasedir") {
		$new_CF{"CFBASEDIR"} = $2;
		$new_CF{"CFBASEDIR"} = "$PWD/$new_CF{CFBASEDIR}" if ($new_CF{"CFBASEDIR"} !~ m{^/});
		$new_CF{"CFBASEDIR"} =~ s{/$}{};

	    } elsif ($1 eq "mondir") {
		$new_CF{"SCRIPTDIR"} = $2;

	    } elsif ($1 eq "logdir") {
		$new_CF{"LOGDIR"} = $2;

	    } elsif ($1 eq "histlength") {
		$new_CF{"MAX_KEEP"} = $2;

	    } elsif ($1 eq "serverport") {
		$new_CF{"SERVPORT"} = $2;

	    } elsif ($1 eq "trapport") {
		$new_CF{"TRAPPORT"} = $2;

	    } elsif ($1 eq "serverbind") {
	    	$new_CF{"SERVERBIND"} = $2;

	    } elsif ($1 eq "trapbind") {
	    	$new_CF{"TRAPBIND"} = $2;

	    } elsif ($1 eq "pidfile") {
		$new_CF{"PIDFILE"} = $2;

	    } elsif ($1 eq "randstart") {
		$new_CF{"RANDSTART"} = dhmstos($2);
		if (!defined ($new_CF{"RANDSTART"})) {
		    close (CFG);
		    return "cf error: bad value '$2' for randstart option (syntax: historictime = timeval), line $line_num";
		}

	    } elsif ($1 eq "maxprocs") {
		$new_CF{"MAXPROCS"} = $2;

	    } elsif ($1 eq "statedir") {
		$new_CF{"STATEDIR"} = $2;

	    } elsif ($1 eq "authfile") {
		$new_CF{"AUTHFILE"} = $2;
                if (! -r $new_CF{"AUTHFILE"}) {
                    close (CFG);
                    return "cf error: authfile '$2' does not exist or is not readable, line $line_num";
                }

	    } elsif ($1 eq "authtype") {
		$new_CF{"AUTHTYPE"} = $2;
		@authtypes = split(' ' , $new_CF{"AUTHTYPE"}) ;
		foreach $authtype (@authtypes) {
		    if ($authtype eq "pam") {
			eval 'use Authen::PAM qw(:constants);' ;
			if ($@ ne "") {
			    close (CFG);
			    return "cf error: could not use PAM authentication: $@";
			}
		    }
		}

	    } elsif ($1 eq "pamservice") {
		$new_CF{"PAMSERVICE"} = $2;

	    } elsif ($1 eq "userfile") {
		$new_CF{"USERFILE"} = $2;
                if (! -r $new_CF{"USERFILE"}) {
                    close (CFG);
                    return "cf error: userfile '$2' does not exist or is not readable, line $line_num";
                }

	    } elsif ($1 eq "ocfile") {
		$new_CF{"OCFILE"} = $2;

	    } elsif ($1 eq "historicfile") {
	    	$new_CF{"HISTORICFILE"} = $2;

	    } elsif ($1 eq "historictime") {
	    	$new_CF{"HISTORICTIME"} = dhmstos($2);
		if (!defined $new_CF{"HISTORICTIME"}) {
		    close (CFG);
		    return "cf error: bad value '$2' for historictime command (syntax: historictime = timeval), line $line_num";
		}

	    } elsif ($1 eq "cltimeout") {
		$new_CF{"CLIENT_TIMEOUT"} = dhmstos($2);
		if (!defined ($new_CF{"CLIENT_TIMEOUT"})) {
		    close (CFG);
		    return "cf error: bad value '$2' for cltimeout command (syntax: cltimeout = secs), line $line_num";
		}

	    } elsif ($1 eq "snmp") {
		if ($2 =~ /^1|yes|on|true$/i) {
		    $new_CF{"SNMP"} = 1;
		    eval "use SNMP";
		    if ($@ ne "") {
			close (CFG);
			return "cf error: could not use SNMP: $@";
		    }
		} else {
		    $new_CF{"SNMP"} = 0;
		}

	    } elsif ($1 eq "monerrfile") {
	    	$new_CF{"MONERRFILE"} = $2;

	    } elsif ($1 eq "dtlogfile") {
		$new_CF{"DTLOGFILE"} = $2;

	    } elsif ($1 eq "dtlogging") {
		$new_CF{"DTLOGGING"} = 0;
		if ($2 == 1 || $2 eq "yes" || $2 eq "true") {
		    $new_CF{"DTLOGGING"} = 1;
		}

	    } elsif ($1 eq "snmpport") {
		$new_CF{"SNMPPORT"} = $2;

	    } elsif ($1 eq "dep_recur_limit") {
	    	$new_CF{"DEP_RECUR_LIMIT"} = $2;

	    } elsif ($1 eq "dep_behavior") {
		if ($2 ne "m" && $2 ne "a") {
		    close (CFG);
		    return "cf error: unknown dependency behavior '$2', line $line_num";
		}
		$DEP_BEHAVIOR = $2;

	    } elsif ($1 eq "syslog_facility") {
	    	$new_CF{"SYSLOG_FACILITY"} = $2;

	    } elsif ($1 eq "startupalerts_on_reset") {
		if ($2 =~ /^1|yes|true|on$/i) {
		    $new_CF{"STARTUPALERTS_ON_RESET"} = 1;
		} else {
		    $new_CF{"STARTUPALERTS_ON_RESET"} = 0;
		}

	    } else {
		close (CFG);
		return "cf error: unknown variable '$1', line $line_num";
	    }

	    next;
	}

	#
	# end of record
	#
	if ($l eq "")
	{
	    $ingroup    = 0;
	    $inalias	= 0;
	    $inwatch    = 0;
	    $period	= 0;

	    $curgroup   = "";
	    $curalias	= "";
	    $watchgroup = "";

	    $servnum	= 0;
	    next;
	}

	#
	# hostgroup record
	#
	if ($l =~ /^hostgroup\s+([a-zA-Z0-9_.-]+)\s*(.*)/)
	{
	    $curgroup = $1;

	    $ingroup = 1;
	    $inalias = 0;
	    $inwatch = 0;
	    $period  = 0;


	    $hosts = $2;
	    %disabled = ();

	    foreach $h (grep (/^\*/, @{$groups{$curgroup}}))
	    {
		# We have to make $i = $h because $h is actually
		# a pointer to %groups and will modify it.
		$i = $h;
		$i =~ s/^\*//;
		$disabled{$i} = 1;
	    }

	    @{$new_groups{$curgroup}} = split(/\s+/, $hosts);

	    #
	    # keep hosts which were previously disabled
	    #
	    for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
	    {
		$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
		    if ($disabled{$new_groups{$curgroup}[$i]});
	    }

	    next;
	}

	if ($ingroup)
	{
	    push (@{$new_groups{$curgroup}}, split(/\s+/, $l));

	    for ($i=0;$i<@{$new_groups{$curgroup}};$i++)
	    {
		$new_groups{$curgroup}[$i] = "*$new_groups{$curgroup}[$i]"
		    if ($disabled{$new_groups{$curgroup}[$i]});
	    }

	    next;
	}

	#
	# alias record
	#
	if ($l =~ /^alias\s+([a-zA-Z0-9_.-]+)\s*$/)
	{
	    $inalias = 1;
	    $ingroup = 0;
	    $inwatch = 0;
	    $period  = 0;

	    $curalias = $1;
	    next;
	}
	
	if ($inalias)
	{
	    if ($l =~ /\A(.*)\Z/)
	    {
		push (@{$new_alias{$curalias}}, $1);
		next;
	    }
	}

	#
	# watch record
	#
	if ($l =~ /^watch\s+([a-zA-Z0-9_.-]+)\s*/)
	{
	    $watchgroup = $1;
	    $inwatch = 1;
	    $inalias = 0;
	    $ingroup = 0;
	    $period  = 0;

	    if (!defined ($new_groups{$watchgroup}))
	    {
		#
		# This hostgroup doesn't exist yet, we'll create it and warn
		#
	    	@{$new_groups{$watchgroup}} = ($watchgroup);
		print STDERR "Warning: watch group $watchgroup defined with no corresponding hostgroup.\n";
	    }
	    if ($new_watch{$watchgroup})
	    {
		close (CFG);
		return "cf error: watch '$watchgroup' already defined, line $line_num";
	    }

	    $curgroup   = "";
	    $service = "";

	    next;
	}
	
	if ($inwatch)
	{
	    #
	    # env variables
	    #
	    if ($l =~ /^([A-Z_][A-Z0-9_]*)=(.*)/)
	    {
		if ($service eq "") {
		    close (CFG);
		    return "cf error: environment variable defined without a service, line $line_num";
		}
		$new_watch{$watchgroup}->{$service}->{"ENV"}->{$1} = $2;

		next;
	    }

	    #
	    # non-env variables
	    #
	    else
	    {
		$l =~ /^(\w+)\s*(.*)$/;
		$var = $1;
		$args = $2;
	    }

	    #
	    # service entry
	    #
	    if ($var eq "service")
	    {
		$service = $args;

		if ($service !~ /^[a-zA-Z0-9_.-]+$/) {
		    close (CFG);
		    return "cf error: invalid service tag '$args', line $line_num";
		}

		$period = 0;
		$sref = \%{$new_watch{$watchgroup}->{$service}};
		$sref->{"service"} = $args;
		$sref->{"interval"} = undef;
		$sref->{"randskew"} = 0;
		$sref->{"dep_behavior"} = $DEP_BEHAVIOR;
		$sref->{"exclude_period"} = "";
		$sref->{"exclude_hosts"} = {};
		$sref->{"_op_status"} = $STAT_UNTESTED;
		$sref->{"_last_op_status"} = $STAT_UNTESTED;
		$sref->{"_ack"} = 0;
		$sref->{"_ack_comment"} = '';
		$sref->{"_consec_failures"} = 0;
		$sref->{"_failure_count"} = 0 if (!defined($sref->{"_failure_count"}));
		$sref->{"_start_of_monitor"} = time if (!defined($sref->{"_start_of_monitor"}));
		$sref->{"_alert_count"} = 0 if (!defined($sref->{"_alert_count"}));
		$sref->{"_last_failure"} = 0 if (!defined($sref->{"_last_failure"}));
		$sref->{"_last_success"} = 0 if (!defined($sref->{"_last_success"}));
		$sref->{"_last_trap"} = 0 if (!defined($sref->{"_last_trap"}));
		$sref->{"_exitval"} = "undef" if (!defined($sref->{"_exitval"}));
		$sref->{"_last_check"} = undef;
		$sref->{"_depend_status"} = undef;
		$sref->{"failure_interval"} = undef;
		$sref->{"_old_interval"} = undef;
		next;
	    }

	    if ($service eq "")
	    {
		close (CFG);
		return "cf error: need to specify service in watch record, line $line_num";
	    }


	    #
	    # period definition
	    #
	    # for each service there can be one or more alert periods
	    # this is stored as an array of hashes named
	    #     %{$watch{$watchgroup}->{$service}->{"periods"}}
	    # each index for this hash is a unique tag for the period as
	    # defined by the user or named after the period (such as
	    # "wd {Mon-Fri} hr {7am-11pm}")
	    #
	    # the value of the hash is an array containing the list of alert commands
	    # and arguments, so
	    #
	    # @alerts = @{$watch{$watchgroup}->{$service}->{"periods"}->{"TAG"}}
	    #
	    if ($var eq "period")
	    {
		$period = 1;

		my $periodstr;

		if ($args =~ /^([a-z_]\w*) \s* : \s* (.*)$/ix)
		{
		    $periodstr = $1;
		    $args = $2;
		}
		
		else
		{
		    $periodstr = $args;
		}

		$pref = \%{$sref->{"periods"}->{$periodstr}};

		if (inPeriod (time, $args) == -1)
		{
		    close (CFG);
		    return "cf error: malformed period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
		}

		$pref->{"period"} = $args;
		$pref->{"alertevery"} = 0;
		$pref->{"numalerts"} = 0;
		$pref->{"_alert_sent"} = 0;
		$pref->{"no_comp_alerts"} = 0;
		@{$pref->{"alerts"}} = ();
		@{$pref->{"upalerts"}} = ();
		@{$pref->{"startupalerts"}} = ();
		next;
	    }

	    #
	    # period variables
	    #
	    if ($period)
	    {
		if ($var eq "alert")
		{
		    push @{$pref->{"alerts"}}, $args;
		}
		
		elsif ($var eq "upalert")
		{
		    $sref->{"_upalert"} = 1;
		    push @{$pref->{"upalerts"}}, $args;
		}
		
		elsif ($var eq "startupalert")
		{
		    push @{$pref->{"startupalerts"}}, $args;
		}
		
		elsif ($var eq "alertevery")
		{
		    my $observe_detail = 0;

		    if ($args =~ /(\S+) \s+ observe_detail \s*$/ix)
		    {
			$observe_detail = 1;
			$args = $1;
		    }

		    #
		    # for backawards-compatibility with <= 0.38.21
		    #
		    elsif ($args =~ /(\S+) \s+ summary/ix)
		    {
			$args = $1;
		    }

		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid time interval '$args' (syntax: alertevery {positive number}{smhd}), line $line_num";
		    }

		    $pref->{"alertevery"} = $args;
		    $pref->{"_observe_detail"} = $observe_detail;
		    next;
		}

		elsif ($var eq "alertafter")
		{
		    my ($p1, $p2);

		    #
		    # alertafter NUM
		    #
		    if ($args =~ /^(\d+)$/)
		    {
			$p1 = $1;
			$pref->{"alertafter_consec"} = $p1;
		    }

		    #
		    # alertafter timeval
		    #
		    elsif ($args =~ /^(\d+[hms])$/)
		    {
			$p1 = $1;
			if (!($p1 = dhmstos ($p1)))
			{
			    close (CFG);
			    return "cf error: invalid time interval '$args' (syntax: alertafter = [{positive integer}] [{positive number}{smhd}]), line $line_num";
			}

			$pref->{"alertafterival"} = $p1;
			$pref->{"_1stfailtime"} = 0;
		    }

		    #
		    # alertafter NUM timeval
		    #
		    elsif ($args =~ /(\d+)\s+(\d+[hms])$/)
		    {
			($p1, $p2) = ($1, $2);
			if (($p1 - 1) * $sref->{"interval"} >= dhmstos($p2))
			{
			    close (CFG);
			    return "cf error:  interval & alertafter not sensible. No alerts can be generated with those parameters, line $line_num";
			}
			$pref->{"alertafter"} = $p1;
			$pref->{"alertafterival"} = dhmstos ($p2);

			$pref->{"_1stfailtime"} = 0;
			$pref->{"_failcount"} = 0;
		    }

		    else
		    {
			close (CFG);
			return "cf error: invalid interval specification '$args', line $line_num";
		    }
		}
	    
		elsif ($var eq "upalertafter")
		{
		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid upalertafter specification '$args' (syntax: upalertafter = {positive number}{smhd}), line $line_num";
		    }
		}
		
		elsif ($var eq "numalerts")
		{
		    if ($args !~ /^\d+$/) {
			close (CFG);
			return "cf error: -numeric arg '$args' (syntax: numalerts = {positive integer}, line $line_num";
		    }
		    $pref->{"numalerts"} = $args;
		    next;
		}

		elsif ($var eq "no_comp_alerts")
		{
		    $pref->{"no_comp_alerts"} = 1;
		    next;
		}
	    }

	    #
	    # non-period variables
	    #
	    elsif (!$period)
	    {
		if ($var eq "interval")
		{
		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid time interval '$args' (syntax: interval = {positive number}{smhd}), line $line_num";
		    }
		}

		elsif ($var eq "failure_interval")
		{
		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid interval '$args' (syntax: failure_interval = {positive number}{smhd}), line $line_num";
		    }
		}

		elsif ($var eq "monitor")
		{
		    # valid
		}

		elsif ($var eq "allow_empty_group")
		{
		    # valid
		}

		elsif ($var eq "description")
		{
		    # valid
		}

		elsif ($var eq "traptimeout")
		{
		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid traptimeout interval '$args' (syntax: traptimeout = {positive number}{smhd}), line $line_num";
		    }
		    $sref->{"_trap_timer"} = $args;
		}

		elsif ($var eq "trapduration")
		{
		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid trapduration interval '$args' (syntax: trapduration = {positive number}{smhd}), line $line_num";
		    }
		}
		
		elsif ($var eq "randskew")
		{
		    if (!($args = dhmstos ($args))) {
			close (CFG);
			return "cf error: invalid randskew time interval '$args' (syntax: randskew = {positive number}{smhd}), line $line_num";
		    }
		}


		
		elsif ($var eq "dep_behavior")
		{
		    if ($args ne "m" && $args ne "a")
		    {
			close (CFG);
			return "cf error: unknown dependency behavior '$args' (syntax: dep_behavior = {m|a}), line $line_num";
		    }
		}

		elsif ($var eq "depend")
		{
		    $args =~ s/SELF:/$watchgroup:/g;
		}

		elsif ($var eq "exclude_hosts")
		{
		    my $ex = {};
		    foreach my $h (split (/\s+/, $args))
		    {
			$ex->{$h} = 1;
		    }
		    $args = $ex;
		}

		elsif ($var eq "exclude_period" && inPeriod (time, $args) == -1)
		{
		    close (CFG);
		    return "cf error: malformed exclude_period '$args' (the specified time period is not valid as per Time::Period::inPeriod), line $line_num";
		}

		else
		{
		    close (CFG);
		    return "cf error: unknown syntax [$l], line $line_num";
		}

		$sref->{$var} = $args;
	    }

	    else
	    {
		close (CFG);
		return "cf error: unknown syntax outside of period section [$l], line $line_num";
	    }
	}

	next;
    }

    close (CFG) || return "Could not open pipe to m4 (check that m4 is properly installed and in your PATH): $!";

    #
    # Go through each defined hostgroup and check that there is a 
    #  watch associated with that hostgroup record.
    #
    # hostgroups without associated watches are not a violation of 
    #  mon config syntax, but it's usually not what you want.
    #
    for (keys(%new_watch)) { $is_watch{$_} = 1 };
    foreach $watchgroup ( keys (%new_groups) ) {
	print STDERR "Warning: hostgroup $watchgroup has no watch assigned to it!\n" unless $is_watch{$watchgroup};
    }

    #
    # no errors, commit new config if $commit was specified
    #
    return "" unless $commit;
    %alias = %new_alias;
    %groups = %new_groups;
    %watch = %new_watch;
    %CF = %new_CF;

    "";
}


#
# convert a string like "20m" into seconds
#
sub dhmstos {
    my ($str) = @_;
    my ($s);

    $str = lc ($str);

    if ($str =~ /^\s*(\d+(?:\.\d+)?)([dhms])\s*$/i) {
	if ($2 eq "m") {
	    $s = $1 * 60;
	} elsif ($2 eq "h") {
	    $s = $1 * 60 * 60;
	} elsif ($2 eq "d") {
	    $s = $1 * 60 * 60 * 24;
	} else {
	    $s = $1;
	}
    } else {
    	return undef;
    }
    $s;
}


#
# reset the state of the server on SIGHUP, and reread config
# file.
#
sub reset_server {
    my ($keepstate) = @_;

    #
    # reap children that may be running
    #
    foreach my $pid (keys %runningpid) {
	my ($group, $service) = split (/\//, $runningpid{$pid});
    	kill 15, $pid;
	waitpid ($pid, 0);
	syslog ('info', "reset killed child $pid, exit status $?");
	remove_proc ($pid);
    }

    $procs = 0;

    syslog ('info', "resetting, and re-reading configuration $CF{CF}");

    if ((my $err = read_cf ($CF{"CF"}, 1)) ne "") {
    	syslog ('err', "error reading config file: $err");
	return undef;
    }

    normalize_paths;
    gen_scriptdir_hash;
    $lasttm=time; # the last time(2) the loop started
    $fdset_rbits = $fdset_ebits = '';
    set_last_test ();
    randomize_startdelay() if ($CF{"RANDSTART"});
    load_state ("disabled") if ($keepstate);
    if ($CF{"DTLOGGING"}) {
	init_dtlog();
    }

    readhistoricfile;

    if ($CF{"STARTUPALERTS_ON_RESET"}) {
    	do_startup_alerts;
    }

    return 1;
}


sub init_dtlog {
    my $t = time;

    return if (!$CF{"DTLOGGING"});

    if (!open (DTLOG, ">>$CF{DTLOGFILE}")) {
       syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
       $CF{"DTLOGGING"} = 0;
    } else {
       $CF{"DTLOGGING"} = 1;
       print DTLOG <<EOF;
#
# downtime log start $t
# time back up, group, service, first failure, downtime, interval, summary
#
EOF
    	close (DTLOG);
    }
}


#
# remove a process from our state
#
sub remove_proc {
    my ($pid) = @_;

    return if (!defined $runningpid{$pid});

    vec ($fdset_rbits, fileno($fhandles{$runningpid{$pid}}), 1) = 0;
    close ($fhandles{$runningpid{$pid}});
    delete $fhandles{$runningpid{$pid}};
    delete $running{$runningpid{$pid}};
    delete $runningpid{$pid};
    $procs--;
}


#
# exit on SIGTERM
#
sub handle_sigterm {
    syslog ("info", "caught TERM signal, exiting");
    exit (1);
}


#
# set O_NONBLOCK and FD_CLOEXEC on the given filehandle
#
sub configure_filehandle {
    my ($fh) = @_;
    my ($fl);

    $fl = '';
    fcntl ($fh, F_GETFL, $fl)          || return;
    $fl |= O_NONBLOCK;
    fcntl ($fh, F_SETFL, $fl)          || return;

    $fl = fcntl ($fh, F_GETFD, 0)      || return;
    $fl |= FD_CLOEXEC;
    fcntl ($fh, F_SETFD, $fl)          || return;

    return 1;
}


#
# setup server
#
sub setup_server {
    my ($tcpproto, $udpproto, $fl);

    if (!defined ($tcpproto = getprotobyname ('tcp')))
    {
    	die_die ("err", "could not get protocol for tcp");
    }

    if (!defined ($udpproto = getprotobyname ('udp')))
    {
    	die_die ("err", "could not get protocol for tcp");
    }

    #
    # client server, such as moncmd
    #
    my $bindaddr;
    if (defined $CF{"SERVERBIND"})
    {
	if (!($bindaddr = gethostbyname ($CF{"SERVERBIND"})))
	{
	    die_die ("err", "error returned by gethostbyname for serverbind: $?");
	}
    }

    else
    {
    	$bindaddr = INADDR_ANY;
    }

    socket (SERVER, PF_INET, SOCK_STREAM, $tcpproto) ||
    	die_die ("err", "could not create TCP socket: $!");

    setsockopt (SERVER, SOL_SOCKET, SO_REUSEADDR, pack ("l", 1)) ||
    	die_die ("err", "could not setsockopt: $!");

    bind (SERVER, sockaddr_in ($CF{"SERVPORT"}, $bindaddr)) ||
    	die_die ("err", "could not bind TCP server port $CF{'SERVPORT'}: $!");

    listen (SERVER, SOMAXCONN);

    configure_filehandle (*SERVER) ||
    	die_die ("err", "could not configure TCP server port: $!");

    #
    # remote monitor traps
    #
    if (defined $CF{"TRAPBIND"})
    {
	if (!($bindaddr = gethostbyname ($CF{"TRAPBIND"})))
	{
	    die_die ("err", "error returned by gethostbyname for trapbind: $?");
	}
    }

    else
    {
    	$bindaddr = INADDR_ANY;
    }

    socket (TRAPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
    	die_die ("err", "could not create UDP socket: $!");
    bind (TRAPSERVER, sockaddr_in ($CF{"TRAPPORT"}, $bindaddr)) ||
    	die_die ("err", "could not bind UDP server port: $!");
    configure_filehandle (*TRAPSERVER) ||
    	die_die ("err", "could not configure UDP trap port: $!");
    
    return if (!$CF{"SNMP"});

    #
    # SNMP traps
    #
    socket (SNMPSERVER, PF_INET, SOCK_DGRAM, $udpproto) ||
    	die_die ("err", "could not create UDP socket: $!");
    bind (SNMPSERVER, sockaddr_in ($CF{"SNMPPORT"}, INADDR_ANY)) ||
    	die_die ("err", "could not bind UDP server port: $!");
    configure_filehandle (*SNMPSERVER) ||
    	die_die ("err", "could not configure UDP SNMP port: $!");
}


#
# set up a client connection if necessary
#
sub client_accept {
    my ($rin, $rout, $n, $sock, $port, $addr, $fl);

    my $CLIENT = new FileHandle;

    if (!defined ($sock = accept ($CLIENT, SERVER))) {
    	syslog ('err', "accept returned error: $!");
	return;
    }

debug(1, "accepted client $CLIENT\n");
    my $fno = fileno ($CLIENT);

    #
    # set socket to nonblocking
    #
    if (!configure_filehandle ($CLIENT)) {
    	syslog ("err", "could not configure for client: $!");
	close ($CLIENT);
	return;
    }

    ($port, $addr) = unpack_sockaddr_in ($sock);
    syslog ('info', "client connection from " . inet_ntoa ($addr) .
	    ":" . $port);

    select ($CLIENT);
    $|=1;
    select (STDOUT);

    $clients{$fno}->{"fhandle"} = $CLIENT;
    $clients{$fno}->{"user"} = undef;		# username if authenticated
    $clients{$fno}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
    $clients{$fno}->{"last_read"} = time;		# last time data was read
    $clients{$fno}->{"buf"} = '';
    $numclients++;
}


#
# do all pending client commands
#
sub client_dopending {
    my ($cl, $cmd, $l);

    foreach $cl (keys %clients) {
    	if ($clients{$cl}->{"buf"} =~ /^([^\r\n]*)[\r\n]+/s) {
	    $cmd = $1;
	    $l = length ($cmd);
	    $clients{$cl}->{"buf"} =~ s/^[^\r\n]*[\r\n]+//s;
	    client_command ($cl, $cmd);
	}
    }
}


#
# close a client connection
#
sub client_close {
    my ($cl, $reason) = @_;

    syslog ('info', "closing client $cl: $reason") if (defined $reason);
die if !defined ($clients{$cl}->{"fhandle"});
    close ($clients{$cl}->{"fhandle"});
    delete $clients{$cl};
    vec ($iovec, $cl, 1) = 0;
    $numclients--;
}


#
# Handle a connection from a client
#
sub client_command {
    my ($cl, $l) = @_;
    my ($cmd, $args, $group, $service, $s, $sname, $stchanged);
    my ($var, $value, $msg, @l, $sock, $port, $addr, $sref, $auth, $fh);
    my ($user, $pass, @argsList, $comment);
    my ($authtype, @authtypes);
    my $is_auth = 0;    #flag for multiple auth types

    syslog ('info', "client command \"$l\"")
	if ($l !~ /^\s*login/i);

    $fh = $clients{$cl}->{"fhandle"};

    if ($l !~ /^(dump|login|disable|enable|quit|list|set|get|
		    stop|start|loadstate|savestate|reset|clear|checkauth|
		    reload|term|test|servertime|ack|version|protid)\s*(.*)?$/ix) {
	sock_write ($fh, "520 invalid command\n");
	return;
    }
    ($cmd, $args) = ("\L$1", $2);

    $stchanged = 0;

    #
    # quit command
    #
    if ($cmd eq "quit") {
	sock_write ($fh, "220 quitting\n");
	client_close ($cl);

    } elsif ($opt{"d"} && $cmd eq "dump") {
    	print STDERR Dumper (\%watch), "\n\n";

    #
    # protocol identification
    #
    } elsif ($cmd eq "protid") {
    	if ($args != int ($PROT_VERSION))
	{	
	    sock_write ($fh, "520 protocol mismatch\n");
	}

	else
	{
	    sock_write ($fh, "220 protocol match\n");
	}

    #
    # login
    #
    } elsif ($cmd eq "login") {
	($user, $pass) = split (/\s+/, $args, 2);
	@authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
	# Check each for of authentication in order, and stop checking
	# as soon as we get a positive authentication result.
	foreach $authtype (@authtypes) {
	    if (defined auth ($authtype, $user, $pass)) {
		$is_auth = 1;
		last;
	    }
	}
	if ($is_auth != 1) {
	    sock_write ($fh,  "530 login unsuccessful\n");
	} else {
	    $clients{$cl}->{"user"} = $user;
	    syslog ("info", "authenticated $user");
	    sock_write ($fh,  "220 login accepted\n");
	}

    #
    # reset
    #
    } elsif ($cmd eq "reset" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	my ($keepstate);
	if ($args =~ /stopped/i) {
	    $STOPPED = 1;
	    $STOPPED_TIME = time;
	}

	if ($args =~ /keepstate/) {
	    $keepstate = 1;
	}

	if (reset_server ($keepstate)) {
	    sock_write ($fh,  "220 reset PID $$\@$HOSTNAME\n");
	} else {
	    sock_write ($fh,  "520 reset PID $$\@$HOSTNAME failed, error in config file\n");
	}

    #
    # reload
    #
    } elsif ($cmd eq "reload" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	if (!defined reload (split (/\s+/, $args))) {
	    sock_write ($fh,  "520 unknown reload command\n");
	} else {
	    sock_write ($fh,  "220 reload completed\n");
	}

    #
    # clear
    #
    } elsif ($cmd eq "clear" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
    	if ($args =~ /^timers \s+ ([a-zA-Z0-9_.-]+) \s+ ([a-zA-Z0-9_.-]+)/ix) {
	    if (!defined $watch{$1}->{$2}) {
		sock_write ($fh,  "520 unknown group\n");
	    } else {
		clear_timers ($1, $2);
		sock_write ($fh,  "220 clear timers completed\n");
	    }

	} else {
	    sock_write ($fh,  "520 unknown clear command\n");
	    next;
	}

    #
    # test
    #
    } elsif ($cmd eq "test" && check_auth ($clients{$cl}->{"user"}, $cmd))  {
	my ($cmd, $args) = split (/\s+/, $args, 2);

	#
	# test monitor
	#
	if ($cmd eq "monitor") {
	    my ($group, $service) = split (/\s+/, $args);

	    if (!defined $watch{$group}->{$service}) {
		sock_write ($fh,  "$group $service not defined\n");
	    } else {
		$watch{$group}->{$service}->{"_timer"} = 0;
	    }
	    sock_write ($fh,  "220 test monitor completed\n");
	
	#
	# test alert
	#
	} elsif ($cmd =~ /^alert|startupalert|upalert$/) {
	    my ($group, $service, $retval, $period) = split (/\s+/, $args, 4);

	    if (!defined $watch{$group}->{$service}) {
		sock_write ($fh,  "520 $group $service not defined\n");

	    } elsif (!defined $watch{$group}->{$service}->{"periods"}->{$period}) {
		    sock_write ($fh,  "520 period not defined\n");

	    } else {
		my $f = 0;
		my $a;

		if ($cmd eq "alert") {
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"};
		} elsif ($cmd eq "startupalert") {
		    $f = $FL_STARTUPALERT;
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"};
		} elsif ($cmd eq "upalert") {
		    $f = $FL_UPALERT;
		    $a = $watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"};
		}

		for (@{$a}) {
		    my ($alert, $args) = split (/\s+/, $_, 2);

		    if ($args =~ /^exit=/) {
		    	$args =~ s/^exit=\S+ \s+//x;
		    }

		    call_alert (
			group	=> $group,
			service	=> $service,
			output	=> "test\ntest detail\n",
			retval	=> $retval,
			flags	=> $f | $FL_TEST,
			alert	=> $alert,
			args	=> $args,
		    );
		}

		sock_write ($fh,  "220 test alert completed\n");
	    }
	
	#
        # test config file
        #
        } elsif ($cmd =~ /^config$/) {
	    if ((my $err = read_cf ($CF{"CF"}, 0))  ne "") {
		sock_write ($fh,  $err);
		sock_write ($fh,  "\n520 test config completed, errors found in config file\n");
	    }

	    else
	    {
		sock_write ($fh,  "220 test config completed OK, no errors found\n");
	    }

	} else {
	    sock_write ($fh,  "520 test error\n");
	}

    #
    # version
    #
    } elsif ($cmd eq "version") {
    	sock_write ($fh, "version " . int ($PROT_VERSION) . "\n");
    	sock_write ($fh, "220 version completed\n");

    #
    # load state
    #
    } elsif ($cmd eq "loadstate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	foreach (split (/\s+/, $args)) {
	    load_state ($_);
	}
	sock_write ($fh,  "220 loadstate completed\n");

    #
    # save state
    #
    } elsif ($cmd eq "savestate" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	if ($args =~ /\S/)
	{
	    foreach (split (/\s+/, $args))
	    {
		save_state ($_);
	    }
	    sock_write ($fh,  "220 savestate completed\n");
	}

	else
	{
	    sock_write ($fh,  "520 savestate error, arguments required\n");
	}

    #
    # term
    #
    } elsif ($cmd eq "term"  && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	sock_write ($fh,  "220 terminating server\n");
	client_close ($cl, "terminated by user command");
	syslog ("info", "terminating by user command");
	exit;

    #
    # stop testing
    #
    } elsif ($cmd eq "stop"&& check_auth ($clients{$cl}->{"user"}, $cmd)) {
	$STOPPED = 1;
	$STOPPED_TIME = time;
	sock_write ($fh,  "220 stop completed\n");

    #
    # start testing
    #
    } elsif ($cmd eq "start" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	$STOPPED = 0;
	$STOPPED_TIME = 0;
	sock_write ($fh,  "220 start completed\n");

    #
    # set
    #
    } elsif ($cmd eq "set" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	if ($args =~ /^maxkeep\s+(\d+)/) {
	    $CF{"MAX_KEEP"} = $1;
	    sock_write ($fh,  "220 set completed\n");
	} else {
	    ($group, $service, $var, $value) = split (/\s+/, $args, 4);
	    if (!defined $watch{$group}->{$service}) {
		sock_write ($fh,  "520 $group,$service not defined\n");
	    } elsif ($var eq "opstatus") {
		if (!defined ($OPSTAT{$value})) {
		    sock_write ($fh,  "520 undefined opstatus\n");
		} else {
		    set_op_status ($group, $service,
		    	un_esc_str ((parse_line ('\s+', 0, $value))[0]));
		    sock_write ($fh,  "220 set completed\n");
		}

	    } else {
		$value = un_esc_str ((parse_line ('\s+', 0, $value))[0]);
		$watch{$group}->{$service}->{$var} = $value;
		sock_write ($fh,  "$group $service $var='$value'\n");
		sock_write ($fh,  "220 set completed\n");
	    }
	}

    #
    # get
    #
    } elsif ($cmd eq "get" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	if ($args =~ /^maxkeep\s*$/) {
	    sock_write ($fh,  "maxkeep = $CF{MAX_KEEP}\n");
	    sock_write ($fh,  "220 set completed\n");
	} else {
	    ($group, $service, $var) = split (/\s+/, $args, 3);
	    if (!defined $watch{$group}->{$service}) {
		sock_write ($fh,  "520 $group,$service not defined\n");
	    } else {
		sock_write ($fh,  "$group $service $var='" .
			esc_str ($watch{$group}->{$service}->{$var}, 1) . "'\n");
		sock_write ($fh,  "220 get completed\n");
	    }
	}

    #
    # list
    #
    } elsif ($cmd eq "list" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	@argsList = split(/\s+/, $args);
	($cmd, $args) = split (/\s+/, $args, 2);

	#
	# list service descriptions
	#
	if ($cmd eq "descriptions") {
	    foreach $group (keys %watch) {
		foreach $service (keys %{$watch{$group}}) {
		    sock_write ($fh,  "$group $service '" .
			esc_str ($watch{$group}->{$service}->{"description"}, 1) .
		    	"'\n");
		}
	    }
	    sock_write ($fh,  "220 list descriptions completed\n");

	#
	# list group members
	#
	} elsif ($cmd eq "group") {
	    if ($groups{$args}) {
		sock_write ($fh,  "hostgroup $args @{$groups{$args}}\n");
		sock_write ($fh,  "220 list group completed\n");
	    } else {
		sock_write ($fh,  "520 list group error, undefined group\n");
	    }

	#
	# list status of all services
	#
	} elsif ($cmd eq "opstatus") {
	    if ($args eq "")
	    {
		foreach $group (keys %watch) {
		    foreach $service (keys %{$watch{$group}}) {
			client_write_opstatus ($fh, $group, $service);
		    }
		}
		sock_write ($fh,  "220 list opstatus completed\n");
	    }

	    else
	    {
	    	my $err = 0;
		my @g = ();
		my ($group, $service);

		foreach my $gs (split (/\s+/, $args))
		{
		    ($group, $service) = split (/,/, $gs);
		    $err++ && last if (!defined $watch{$group}->{$service});
		    push (@g, [$group, $service]);
		}

		if (!$err)
		{
		    foreach my $gs (@g)
		    {
			client_write_opstatus ($fh, $gs->[0], $gs->[1]);
		    }
		    sock_write ($fh,  "220 list opstatus completed\n");
		}

		else
		{
		    sock_write ($fh,  "520 $group,$service does not exist\n");
		}
	    }

	#
	# list disabled hosts and services
	#
	} elsif ($cmd eq "disabled") {
	    foreach $group (keys %groups) {
		@l = grep (/^\*/, @{$groups{$group}});
		if (@l) {
		    grep (s/^\*//, @l);
		    sock_write ($fh,  "group $group: @l\n");
		}
	    }
	    foreach $group (keys %watch) {
		if ($watch_disabled{$group} == 1) {
		    sock_write ($fh,  "watch $group\n");
		}
		foreach $service (keys %{$watch{$group}}) {
		    if ($watch{$group}->{$service}->{'disable'} == 1) {
			sock_write ($fh,  "watch $group service " .
			    "$service\n");
		    }
		}
	    }
	    sock_write ($fh,  "220 list disabled completed\n");

	#
	# list last alert history
	#
	} elsif ($cmd eq "alerthist") {
	    foreach my $l (@last_alerts)
	    {
		sock_write ($fh,  esc_str ($l) . "\n");
	    }
	    sock_write ($fh,  "220 list alerthist completed\n");

	#
	# list time of last failures for each service
	#
	} elsif ($cmd eq "failures") {
	    foreach $group (keys %watch) {
		foreach $service (keys %{$watch{$group}}) {
		    my $sref = \%{$watch{$group}->{$service}};
		    client_write_opstatus ($fh, $group, $service)
			if ($FAILURE{$sref->{"_op_status"}});
		}
	    }
	    sock_write ($fh,  "220 list failures completed\n");

	#
	# list the failure history
	#
	} elsif ($cmd eq "failurehist") {
	    foreach my $l (@last_failures)
	    {
		sock_write ($fh, esc_str ($l) . "\n");
	    }
	    sock_write ($fh,  "220 list failurehist completed\n");

	#
	# list the time of last successes for each service
	#
	} elsif ($cmd eq "successes") {
	    foreach $group (keys %watch) {
		foreach $service (keys %{$watch{$group}}) {
		    my $sref = \%{$watch{$group}->{$service}};
		    client_write_opstatus ($fh, $group, $service)
			if ($SUCCESS{$sref->{"_op_status"}});
		}
	    }
	    sock_write ($fh,  "220 list successes completed\n");

	#
	# list warnings
	#
	} elsif ($cmd eq "warnings") {
	    foreach $group (keys %watch) {
		foreach $service (keys %{$watch{$group}}) {
		    my $sref = \%{$watch{$group}->{$service}};
		    client_write_opstatus ($fh, $group, $service)
			if ($WARNING{$sref->{"_op_status"}});
		}
	    }
	    sock_write ($fh,  "220 list successes completed\n");

	#
	# list process IDs
	#
	} elsif ($cmd eq "pids") {
	    sock_write ($fh,  "server $$\n");
	    foreach $value (keys %runningpid) {
		($group, $service) = split (/\//, $runningpid{$value});
		sock_write ($fh,  "$group $service $value\n");
	    }
	    sock_write ($fh,  "220 list pids completed\n");

	#
	# list watch groups and services
	#
	} elsif ($cmd eq "watch") {
	    foreach $group (keys %watch) {
		foreach $service (keys %{$watch{$group}}) {
		    if (!defined $watch{$group}->{$service}) {
			sock_write ($fh,  "$group (undefined service)\n");
		    } else {
			sock_write ($fh,  "$group $service\n");
		    }
		}
	    }
	    sock_write ($fh,  "220 list watch completed\n");

	#
	# list server state
	#
	} elsif ($cmd eq "state") {
	    if ($STOPPED) {
		sock_write ($fh,  "scheduler stopped since $STOPPED_TIME\n");
	    } else {
		sock_write ($fh,  "scheduler running\n");
	    }
	    sock_write ($fh,  "220 list state completed\n");

	#
	# list aliases
	#
	} elsif ($cmd eq "aliases") {
	    my (@listAliasesRequest) = @argsList;

	    shift (@listAliasesRequest);

	    # if no alias request, all alias are responded
	    unless (@listAliasesRequest) {
	    	@listAliasesRequest = keys (%alias);
	    }

	    foreach my $alias (@listAliasesRequest){
	    	sock_write ($fh, "alias $alias\n");
		foreach $value (@{$alias{$alias}}) {
		    sock_write ($fh,  "$value\n");
		}
		sock_write ($fh, "\n");
	    }
	    sock_write ($fh,  "220 list aliases completed\n");

	#
	# list aliasgroups
	#
	} elsif ($cmd eq "aliasgroups") {
	    my (@listAliasesRequest);
	    @listAliasesRequest = keys (%alias);

	    sock_write ($fh,  "@listAliasesRequest\n")
	    	unless (@listAliasesRequest == 0);
	    sock_write ($fh,  "220 list aliasgroups completed\n");
	
	#
	# list deps
	#
	} elsif ($cmd eq "deps") {
	    foreach my $g (keys %watch) {
	    	foreach my $s (keys %{$watch{$g}}) {
		    my $sref = \%{$watch{$g}->{$s}};
		    if ($sref->{"depend"} ne "") {
			sock_write ($fh, "exp $g $s '" .
				esc_str ($sref->{"depend"}, 1) . "'\n");
		    } else {
			sock_write ($fh, "exp $g $s 'NONE'\n");
		    }
		    my @u =
		    	($sref->{"depend"} =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g);
		    if (@u) {
			sock_write ($fh, "cmp $g $s @u\n");
		    } else {
			sock_write ($fh, "cmp $g $s NONE\n");
		    }
		}
	    }

	    sock_write ($fh,  "220 list deps completed\n");

	#
	# downtime log
	#
	} elsif ($cmd eq "dtlog") {
	    if ($CF{"DTLOGGING"}) {
	    	if (!open (DTLOGTMP, "<  $CF{DTLOGFILE}")) {
		    sock_write ($fh, "520 list dtlog error, cannot open dtlog\n");

		} else {
		    while (<DTLOGTMP>) {
		    	sock_write ($fh, $_ ) if (!/^#/);
		    }

		    close (DTLOGTMP);

		    sock_write ($fh, "220 list dtlog completed\n");
		}

	    } else {
	    	sock_write ($fh, "520 list dtlog error, dtlogging is not turned on\n");
	    }

	} else {
	    sock_write ($fh,  "520 unknown list command\n");
	}
    
    	

    #
    # acknowledge a failure
    #
    } elsif ($cmd eq "ack" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	my ($group, $service, $comment) = split (/\s+/, $args, 3);

	if (!defined ($watch{$group})) {
	    sock_write ($fh,  "520 unknown group\n");

	} elsif (!defined $watch{$group}->{$service}) {
	    sock_write ($fh,  "520 unknown service\n");
	}

	my $sref = \%{$watch{$group}->{$service}};
	
	if ($sref->{"_op_status"} == $STAT_OK ||
		  $sref->{"_op_status"} == $STAT_UNTESTED) {
	    sock_write ($fh,  "520 service is in a non-failure state\n");

	} else {
	    $sref->{"_ack"} = 1;
	    $sref->{"_ack_comment"} =
		    un_esc_str ((parse_line ('\s+', 0, $comment))[0]);
	    sock_write ($fh,  "220 ack completed\n");
	}

    #
    # disable watch, service or host
    #
    } elsif ($cmd eq "disable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	($cmd, $args) = split (/\s+/, $args, 2);

	#
	# disable watch
	#
	if ($cmd eq "watch") {
	    if (!defined (disen_watch($args, 0))) {
		sock_write ($fh,  "520 disable error, unknown watch \"$args\"\n");
	    } else {
		$stchanged++;
		sock_write ($fh,  "220 disable watch completed\n");
	    }

	#
	# disable service
	#
	} elsif ($cmd eq "service") {
	    ($group, $service) = split (/\s+/, $args, 2);

	    if (!defined (disen_service ($group, $service, 0))) {
		sock_write ($fh,  "520 disable error, unknown service\n");
	    } else {
		$stchanged++;
		sock_write ($fh,  "220 disable service completed\n");
	    }

	#
	# disable host
	#
	} elsif ($cmd eq "host") {
	    my @notfound = ();

	    my @hosts = split (/\s+/, $args);

	    foreach my $h (@hosts)
	    {
	    	if (!host_exists ($h))
		{
		    push @notfound, $h;
		}
	    }

	    if (@notfound == 0)
	    {
		foreach my $h (@hosts)
		{
		    disen_host ($h, 0);
		    $stchanged++;
		    sock_write ($fh, "220 disable host completed\n");
		}
	    }
	    else
	    {
	    	sock_write ($fh, "520 disable host failed, @notfound does not exist\n");
	    }

	} else {
	    sock_write ($fh,  "520 command could not be executed\n");
	}

    #
    # enable watch, service or host
    #
    } elsif ($cmd eq "enable" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	($cmd, $args) = split (/\s+/, $args, 2);

	#
	# enable watch
	#
	if ($cmd eq "watch") {
	    if (!defined (disen_watch ($args, 1))) {
		sock_write ($fh,  "520 enable error, unknown watch\n");
	    } else {
		$stchanged++;
		sock_write ($fh,  "220 enable watch completed\n");
	    }


	#
	# enable service
	#
	} elsif ($cmd eq "service") {
	    ($group, $service) = split (/\s+/, $args, 2);

	    if (!defined (disen_service ($group, $service, 1))) {
		sock_write ($fh,  "520 enable error, unknown group\n");
	    } else {
		$stchanged++;
		sock_write ($fh,  "220 enable completed\n");
	    }

	#
	# enable host
	#
	} elsif ($cmd eq "host") {
	    foreach $var (split (/\s+/, $args)) {
		disen_host ($var, 1);
		$stchanged++;
	    }
	    sock_write ($fh,  "220 enable completed\n");

	} else {
	    sock_write ($fh,  "520 command could not be executed\n");
	}

    #
    # server time
    #
    } elsif ($cmd eq "servertime" && check_auth ($clients{$cl}->{"user"}, $cmd)) {
	sock_write ($fh,  join ("", time, " ", scalar (localtime), "\n"));
	sock_write ($fh,  "220 servertime completed\n");

    #
    # check auth
    #
    } elsif ($cmd eq "checkauth") {
	split(' ',$args);
	$cmd = $_[0];
	$user = $clients{$cl}->{"user"};
	#  Note that we call check_auth without syslogging here.
	if (check_auth($clients{$cl}->{"user"}, $cmd, 1))
	{
	    sock_write ($fh, "220 command authorized\n");
	}
	
	else
	{
	    sock_write ($fh, "520 command could not be executed\n");
	}


    } else {
	sock_write ($fh,  "520 command could not be executed\n");
    }

    save_state ("disabled") if ($stchanged);
}


sub client_write_opstatus {
    my $fh = shift;
    my ($group, $service) = @_;

    my $sref = \%{$watch{$group}->{$service}};
    my $summary	= esc_str ($sref->{"_last_summary"}, 1);
    my $detail	= esc_str ($sref->{"_last_detail"}, 1);
    my $depend	= esc_str ($sref->{"depend"}, 1);
    my $monitor	= esc_str ($sref->{"monitor"}, 1);

    my $comment;
    if ($sref->{"_ack"} == 1) {
	$comment = esc_str ($sref->{"_ack_comment"}, 1);
    } else {
	$comment = '';
    }

    my $alerts_sent = 0;
    foreach my $period (keys %{$sref->{"periods"}})
    {
    	$alerts_sent += $sref->{"periods"}->{$period}->{"_alert_sent"};
    }

    my $buf =
	"group=$group" . 
	" service=$service" .
	" opstatus=$sref->{_op_status}" .
	" last_opstatus=$sref->{_last_op_status}" .
	" exitval=$sref->{_exitval}" .
	" timer=$sref->{_timer}" .
	" last_success=$sref->{_last_success}" .
	" last_trap=$sref->{_last_trap}" .
	" last_check=$sref->{_last_check}" .
	" ack=$sref->{_ack}" .
	" ackcomment='$comment'" .
	" alerts_sent=$alerts_sent" .
	" depstatus=" . int ($sref->{"_depend_status"}) .
	" depend='$depend'" .
	" monitor='$monitor'" .
	" last_summary='$summary'" .
	" last_detail='$detail'";

    $buf .= " last_failure=$sref->{_last_failure}"
    	if ($sref->{"_last_failure"});

    $buf .= " interval=$sref->{interval}"
	if ($sref->{"interval"});

    $buf .= " exclude_period='$sref->{exclude_period}'"
	if ($sref->{"exclude_period"} ne "");

    $buf .= " exclude_hosts='" .
	    join (" ", keys %{$sref->{exclude_hosts}}) . "'"
	if (keys %{$sref->{"exclude_hosts"}});

    $buf .= " randskew=$sref->{randskew}"
	if ($sref->{"randskew"});

    my $l = 0;
    foreach my $p (keys %{$sref->{"periods"}})
    {
	$l = $sref->{"periods"}->{$p}->{"_last_alert"}
	    if ($sref->{"periods"}->{$p}->{"_last_alert"} > $l);
    }

    $buf .= " last_alert=$l"
	if ($l);

    if ($sref->{"_first_failure"})
    {
	my $t = time - $sref->{"_first_failure"};

    	$buf .= " first_failure=$sref->{_first_failure}" .
		" failure_duration=$t";
    }

    $buf .= "\n";

    sock_write ($fh, $buf);
}


#
# show usage
#
sub usage {
    print <<"EOF";
usage: mon [-a dir] [-c config] [-d] [-f] [-i secs] [-k num]
	[-m num] [-p num] [-P file] [-r num] [-s dir] 
       mon -v

  -a dir	alert script dir
  -A file	authorization file
  -b dir	base directory for alerts and monitors (basedir)
  -B dir	base directory for configuration files (cfbasedir)
  -c config	config file, defaults to "mon.cf"
  -d		debug
  -D dir	state directory (statedir)
  -f		fork and become a daemon
  -h		this help
  -i secs	sleep interval (seconds), defaults to 1
  -k num	keep history of last num events
  -l		load old state from statedir
  -L dir	log directory (logdir)
  -M		pre-process config file with m4
  -m num	throttle at maximum number of monitor processes
  -O facility	syslog facility to use
  -o file       on-call schedule
  -p num	server listens on port num
  -P file	PID file
  -r num	randomize startup schedule
  -s dir	monitor script dir
  -S		start with scheduler stopped
  -t port	trap port
  -v		print version

Report bugs to $AUTHOR
$RCSID
EOF
}


#
# become a daemon
#
sub daemon {
    my $pid;

    if ($pid = fork()) {
	# the parent goes away all happy and stuff
    	exit (0);
    } elsif (!defined $pid) {
    	die "could not fork: $!\n";
    }

    setsid();

    #
    # make it so that we cannot regain a controlling terminal
    #
    if ($pid = fork()) {
	# the parent goes away all happy and stuff
    	exit (0);
    } elsif (!defined $pid) {
	syslog ('err', "could not fork: $!");
	exit 1;
    }

    chdir ('/');
    umask (022);

    if (!open (N, "+>>" . $CF{"MONERRFILE"}))
    {
	syslog ("err", "could not open error output file $CF{'MONERRFILE'}: %m");
	exit (1);
    }

    if (!open(STDOUT, ">&N") ||
        !open (STDIN, "<&N") ||
	!open (STDERR, ">&N")) {
        syslog ("err", "could not redirect: %m");
	exit(1);
    }
    syslog ('info', "running as daemon");
}


#
# debug
#
sub debug {
    my ($level, @l) = @_;

    return if ($level > $opt{"d"});

    if ($opt{"d"} && !$opt{"f"}) {
    	print STDERR @l;
    } else {
    	syslog ('debug', join ('', @l));
    }
}


#
# die_die
#
sub die_die {
    my ($level, $msg) = @_;

    die "[$level] $msg\n" if ($opt{"d"});

    syslog ($level, "fatal, $msg");
    closelog();
    exit (1);
}


#
# handle cleanup of exited processes
# trigger alerts on failures (or send no alert if disabled)
# do some accounting
#
sub proc_cleanup {
    my ($summary, $tmnow, $buf);

    $tmnow = time;
    return if (keys %running == 0);

    while ((my $p = waitpid (-1, &WNOHANG)) >0)
    {
	my ($group, $service) = split (/\//, $runningpid{$p});
	my $sref = \%{$watch{$group}->{$service}};

	#
	# suck in any extra data
	#
	my $fh = $fhandles{$runningpid{$p}};
	while (my $z = sysread ($fh, $buf, 8192)) {
	    $ibufs{$runningpid{$p}} .= $buf;
	}

	$sref->{"_exitval"} = int($?>>8);
debug (1, "PID $p ($runningpid{$p}) exited with [$sref->{'_exitval'}]\n");

	$sref->{"_last_checked"} = $tmnow;

	if ($sref->{"depend"} ne "" &&
		$sref->{"dep_behavior"} eq "a")
	{
	    dep_ok ($sref);
	}

	#
	# error exit value
	#
	if ($?)
	{
	    #
	    # accounting
	    #
	    $sref->{"_failure_count"}++;
	    $sref->{"_consec_failures"}++;
	    $sref->{"_last_failure"} = $tmnow;
	    if ($sref->{"_op_status"} == $STAT_OK ||
		    $sref->{"_op_status"} == $STAT_UNKNOWN ||
		    $sref->{"_op_status"} == $STAT_UNTESTED) {
		$sref->{"_first_failure"} = $tmnow;
	    }
	    set_op_status ($group, $service, $STAT_FAIL);
	    my ($summary, $detail) = split("\n", $ibufs{$runningpid{$p}}, 2);
	    $summary = "(NO SUMMARY)" if ($summary =~ /^\s*$/m);
	    $sref->{"_last_summary"} = $summary;
	    $sref->{"_last_detail"} = $detail;
	    shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});
	    push @last_failures, "$group $service" .
		" $tm $summary";
	    syslog ('crit', "failure for $last_failures[-1]");

	    #
	    # send an alert if necessary
	    #
	    do_alert ($group, $service, $ibufs{$runningpid{$p}},
		    $?>>8, $FL_MONITOR);

	    #
	    # change interval if needed
	    #
	    if (defined ($sref->{"failure_interval"}) &&
	    		$sref->{"_old_interval"} == undef)
	    {
	    	$sref->{"_old_interval"} = $sref->{"interval"};
		$sref->{"interval"} = $sref->{"failure_interval"};
		$sref->{"_next_check"} = 0;
	    }

	    $sref->{"_failure_output"} = $ibufs{$runningpid{$p}};
	}

	#
	# success exit value
	#
	else
	{
	    if ($CF{"DTLOGGING"} && defined ($sref->{"_op_status"}) &&
		   $sref->{"_op_status"} == $STAT_FAIL)
	    {
	    	write_dtlog ($sref, $group, $service);
	    }

	    #
	    # if this service has just come back up and
	    # we are paying attention to this event,
	    # let someone know
	    #
	    if (defined ($sref->{"_op_status"}) &&
		    $sref->{"_op_status"} == $STAT_FAIL)
	    {
		if (defined($sref->{"_upalert"}) && $tmnow - $sref->{"_first_failure"} >=
			$sref->{"upalertafter"})
		{
		    do_alert ($group, $service, $sref->{"_last_output"}, 0, $FL_UPALERT);
		}
	    }

	    $sref->{"_ack"} = 0;
	    $sref->{"_ack_comment"} = '';
	    $sref->{"_first_failure"} = 0;
	    $sref->{"_last_failure"} = 0;
	    $sref->{"_consec_failures"} = 0;
	    my ($summary, $detail) = split("\n", $ibufs{$runningpid{$p}}, 2);
	    $sref->{"_last_summary"} = $summary;
	    $sref->{"_last_detail"} = $detail;

	    #
	    # reset the alertevery timer
	    #
	    foreach my $period (keys %{$sref->{"periods"}})
	    {
		$sref->{"periods"}->{$period}->{"_last_alert"} = 0;
		$sref->{"periods"}->{$period}->{"_alert_sent"} = 0;
	    }

	    #
	    # change interval back to original
	    #
	    if (defined ($sref->{"failure_interval"}) &&
	    		$sref->{"_old_interval"} != undef)
	    {
	    	$sref->{"interval"} = $sref->{"_old_interval"};
		$sref->{"_old_interval"} = undef;
		$sref->{"_next_check"} = 0;
	    }

	    $sref->{"_last_success"} = $tmnow;
	    set_op_status ($group, $service, $STAT_OK);
	}

	#
	# save the output
	#
	$sref->{"_last_output"} = $ibufs{$runningpid{$p}};

	reset_timer ($group, $service);

	remove_proc ($p);
    }
}


#
# collect output from running processes
#
sub collect_output {
    my ($buf, $rout);

    return if (!keys %running);

    my $nfound = select ($rout=$fdset_rbits, undef, undef, 0);
debug (1, "select returned $nfound file handles\n");

    return if ($! == &EINTR);

    if ($nfound) {
	#
	# look for the file descriptors that are readable,
	# and try to read as much as possible from them
	#
	foreach my $k (keys %fhandles) {
	    my $fh = $fhandles{$k};
	    if (vec ($rout, fileno($fh), 1) == 1) {
		my $z = 0;
		while ($z = sysread ($fh, $buf, 8192)) {
		    $ibufs{$k} .= $buf;
debug (1, "[$buf] from $fh\n");
		}

		#
		# ignore if EAGAIN, since we're nonblocking
		#
		if (!defined($z) && $! == &EAGAIN) {

		#
		# error on this descriptor
		#
		} elsif (!defined($z)) {
debug (1, "error on $fh: $!\n");
		    syslog ('err', "error on $fh: $!");
		    vec($fdset_rbits, fileno($fh), 1) = 0;
		} elsif ($z == 0 && $! == &EAGAIN) {
debug (1, "EAGAIN on $fh\n");

		#
		# if EOF encountered, stop trying to
		# get input from this file descriptor
		#
		} elsif ($z == 0) {
debug (1, "EOF on $fh\n");
		    vec($fdset_rbits, fileno($fh), 1) = 0;

		}
	    }
	}
    }
}




#
# handle forking a monitor process, and set up variables
#
sub run_monitor {
    my ($group, $service) = @_;
    my (@args, @groupargs, $pid, @ghosts, $monitor, $monitorargs);

    my $sref = \%{$watch{$group}->{$service}};

    ($monitor, $monitorargs) = ($sref->{"monitor"} =~ /^(\S+)(\s+(.*))?$/);

    if (!defined $MONITORHASH{$monitor} || ! -f $MONITORHASH{$monitor}) {
	syslog ('err', "no monitor found while trying to run [$monitor]");
	return undef;
    } else {
    	$monitor = $MONITORHASH{$monitor};
    }

    $monitor .= " " . $monitorargs if ($monitorargs);

    @ghosts = ();

    #
    # if monitor ends with ";;", do not append groups
    # to command line
    #
    if ($monitor =~ /;;\s*$/) {
	$monitor =~ s/\s*;;\s*$//;
	@args = quotewords ('\s+', 0, $monitor);
	@ghosts = (1);

    #
    # exclude disabled hosts
    #
    } else {
	@ghosts = grep (!/^\*/, @{$groups{$group}});

	#
	# per-service excludes
	#
	if (keys %{$sref->{"exclude_hosts"}})
	{
	    my @g = ();

	    for (my $i=0; $i<@ghosts; $i++)
	    {
		push (@g, $ghosts[$i])
		    if !$sref->{"exclude_hosts"}->{$ghosts[$i]};
	    }

	    @ghosts = @g;
	}

	@args = (quotewords ('\s+', 0, $monitor), @ghosts);
    }

    if (@ghosts == 0 && !defined ($sref->{"allow_empty_group"})) {
    	syslog ('err', "monitor for $group/$service" .
		" not called because of no host arguments\n");

    } else {
	$fhandles{"$group/$service"} = new FileHandle;

	$pid = open($fhandles{"$group/$service"}, '-|');
	if (!defined $pid) {
	    syslog ('err', "Could not fork: $!");
	    delete $fhandles{"$group/$service"};
	    return 0;

	} elsif ($pid == 0) {
	    open(STDERR, '>&STDOUT')
		or syslog ('err', "Could not dup stderr: $!");
	    open(STDIN, "</dev/null")
		or syslog ('err', "Could not connect stdin to /dev/null: $!");
	    my $v;
	    foreach $v (keys %{$sref->{"ENV"}}) {
	    	$ENV{$v} = $sref->{"ENV"}->{$v};
	    }
	    $ENV{"MON_LAST_SUMMARY"} = $sref->{"_last_summary"};
	    $ENV{"MON_LAST_OUTPUT"} = $sref->{"_last_output"};
	    $ENV{"MON_LAST_FAILURE"} = $sref->{"_last_failure"};
	    $ENV{"MON_FIRST_FAILURE"} = $sref->{"_first_failure"};
	    $ENV{"MON_DEPEND_STATUS"} = $sref->{"_depend_status"};
	    $ENV{"MON_LAST_SUCCESS"} = $sref->{"_last_success"};
	    $ENV{"MON_STATEDIR"} = $CF{"STATEDIR"};
	    $ENV{"MON_LOGDIR"} = $CF{"LOGDIR"};
	    exec @args or syslog ('err', "could not exec '@args': $!")
		&& exit(1);
	}

	$sref->{"_last_check"} = scalar (time);

	unless ($sref->{"_next_check"})
	{
	    $sref->{"_next_check"} =
	    	$sref->{"_last_check"} + $sref->{"interval"};
	}

	else
	{
	    $sref->{"_next_check"} += $sref->{"interval"};
	}

debug (1, "watching file handle ", fileno ($fhandles{"$group/$service"}),
    " for $group/$service\n");

	#
	# set nonblocking I/O and setup bit vector for select(2)
	#
	configure_filehandle ($fhandles{"$group/$service"}) ||
		syslog ("err", "could not configure filehandle for $group/$service: $!");
	vec ($fdset_rbits,
	    fileno($fhandles{"$group/$service"}), 1) = 1;
	$fdset_ebits |= $fdset_rbits;

	#
	# note that this is running
	#
	$running{"$group/$service"} = 1;
	$runningpid{$pid} = "$group/$service";
	$ibufs{"$group/$service"} = "";
	$procs++;
    }
}


#
# set the countdown timer for this service
#
sub reset_timer {
    my ($group, $service) = @_;

    my $sref = \%{$watch{$group}->{$service}};

    if ($sref->{"randskew"} != 0)
    {
    	$sref->{"_timer"} = $sref->{"interval"} +
	     (int (rand (2)) == 0 ? -int(rand($sref->{"randskew"}) + 1) :
	     	int(rand($sref->{"randskew"})+1));
    }
    
    elsif ($sref->{"_next_check"})
    {
    	$sref->{"_timer"} = $sref->{"_next_check"} - time();
    }
    
    else
    {
	$sref->{"_timer"} = $sref->{"interval"};
    }
}


#
# randomize the delay before each test
# $opt{"randstart"} is seconds
#
sub randomize_startdelay {
    my ($group, $service);

    foreach $group (keys %watch) {
	foreach $service (keys %{$watch{$group}}) {
            $watch{$group}->{$service}->{"_timer"} =
                int (rand ($CF{"RANDSTART"}));
        }
    }

}


#
# return 1 if $val is within $range,
# where $range = "number" or "number-number"
#
sub inRange {
    my ($val, $range) = @_;
    my ($retval);

    $retval = 0;
    if ($range =~ /^(\d+)$/ && $val == $1) {
        $retval = 1

    } elsif ($range =~ /^(\d+)\s*-\s*(\d+)$/ &&
	    ($val >= $1 && $val <= $2)) {
        $retval = 1
    }

    $retval;
}


#
# disable ($cmd==0) or enable a watch
#
sub disen_watch {
    my ($w, $cmd) = @_;

    return undef if (!defined ($watch{$w}));
    if ($cmd == 0) {
	$watch_disabled{$w} = 1;
    } else {
	$watch_disabled{$w} = 0;
    }
}


#
# disable ($cmd==0) or enable a service
#
sub disen_service {
    my ($g, $s, $cmd) = @_;
    my ($snum);

    return undef if (!defined $watch{$g});
    return undef if (!defined $watch{$g}->{$s});
    if ($cmd == 0) {
	$watch{$g}->{$s}->{"disable"} = 1;
    } else {
	$watch{$g}->{$s}->{"disable"} = 0;
    }
}


#
# disable ($cmd==0) or enable a host
#
sub disen_host {
    my ($h, $cmd) = @_;

    my $found = undef;

    foreach my $g (keys %groups) {
	if ($cmd == 0) {
	    if (grep (s/^$h$/*$h/, @{$groups{$g}}))
	    {
		$found = 1;
	    }
	}
	else
	{
	    if (grep (s/^\*$h$/$h/, @{$groups{$g}}))
	    {
		$found = 1;
	    }
	}
    }

    $found;
}


sub host_exists {
    my $host = shift;

    my $found = 0;

    foreach my $g (keys %groups) {
    	if (grep (/^$host$/, @{$groups{$g}}))
	{
	    $found = 1;
	    last;
	}
    }

    $found;
}


#
# save state
#
sub save_state {
    my (@states) = @_;
    my ($group, $service, @l, $state);

    foreach $state (@states) {
	if ($state eq "disabled") {
	    if (!open (STATE, ">$CF{STATEDIR}/disabled")) {
		syslog ("err", "could not write to state file: $!");
		next;
	    }

	    foreach $group (keys %groups) {
		@l = grep (/^\*/, @{$groups{$group}});
		if (@l) {
		    grep (s/^\*//, @l);
		    grep { print STATE "disable host $_\n" } @l;
		}
	    }
	    foreach $group (keys %watch) {
		if ($watch_disabled{$group} == 1) {
		    print STATE "disable watch $group\n";
		}
		foreach $service (keys %{$watch{$group}}) {
		    if ($watch{$group}->{$service}->{'disable'} == 1) {
			print STATE "disable service $group $service\n";
		    }
		}
	    }
	    close (STATE);

	} elsif ($state eq "opstatus") {
	    if (!open (STATE, ">$CF{STATEDIR}/opstatus")) {
		syslog ("err", "could not write to opstatus state file: $!");
		next;
	    }
	    foreach $group (keys %watch) {
	    	foreach $service (keys %{$watch{$group}}) {
		    print STATE "group=$group service=$service" .
			" op_status=$watch{$group}->{$service}->{_op_status}" .
			" failure_count=$watch{$group}->{$service}->{_failure_count}" .
			" alert_count=\n";
		}
	    }
	    close (STATE);
	}
    }
}


#
# load state
#
sub load_state {
    my (@states) = @_;
    my ($l, $cmd, $args, $group, $service, $what, $state);

    foreach $state (@states) {
    	if ($state eq "disabled") {
	    if (!open (STATE, "$CF{STATEDIR}/disabled")) {
		syslog ("err", "could not read state file: $!");
		next;
	    }

	    while (defined ($l = <STATE>)) {
		chomp $l;
		($cmd, $what, $args) = split (/\s+/, $l, 3);

		next if ($cmd ne "disable");

		if ($what eq "host") {
		    disen_host ($args);
		} elsif ($what eq "watch") {
		    syslog ("err", "undefined watch reading state file: $l")
			if (!defined disen_watch ($args));
		} elsif ($what eq "service") {
		    ($group, $service) = split (/\s+/, $args, 2);
		    syslog ("err",
		    	"undefined group or service reading state file: $l")
			if (!defined disen_service ($group, $service));
		}
	    }

	    syslog ("info", "state '$state' loaded");
	    close (STATE);
	}
    }
}


#
# authenticate a login
#
sub auth {
    my ($type, $user, $plaintext) = @_;
    my ($pass, %u, $l, $u, $p);


    if ($user eq "" || $plaintext eq "") {
	syslog ('err', "an undef username or password supplied");
    	return undef;
    }

    #
    # standard UNIX passwd
    #
    if ($type eq "getpwnam") {
	(undef, $pass) = getpwnam($user);
	return undef
	    if (!defined $pass);
	
	if ((crypt ($plaintext, $pass)) ne $pass) {
	    return undef;
	}
	return 1;

    #
    # shadow password
    #
    } elsif ($type eq "shadow") {

    #
    # "mon" authentication
    #
    } elsif ($type eq "userfile") {
    	if (!open (U, $CF{"USERFILE"})) {
	    syslog ('err', "could not open user file '$CF{USERFILE}': $!");
	    return undef;
	}
	while (<U>) {
	    next if (/^\s*#/ || /^\s*$/);
	    chomp;
	    ($u,$p) = split (/\s*:\s*/, $_, 2);
	    $u{$u} = $p;
	}
	close (U);
        return undef if (!defined($u{$user}));  #user was not found in userfile
	return undef if ((crypt ($plaintext, $u{$user})) ne $u{$user}); #user gave wrong password
	return 1;

    #
    # PAM authentication
    #
    } elsif ($type eq "pam") {
	local $PAM_username = $user;
	local $PAM_password = $plaintext;
    	my $pamh;
	if (!ref($pamh = new Authen::PAM($CF{'PAMSERVICE'}, $PAM_username, \&pam_conv_func))) {
	    syslog ('err', "Error code $pamh during PAM init!: $!");
	    return undef;
	}
	my $res = $pamh->pam_authenticate ;
	return undef if ($res != &Authen::PAM::PAM_SUCCESS) ;
	return 1;
    } else {
    	syslog ('err', "authentication type '$type' not known");
    }

    return undef;
}


#
# load the table of who can do which commands
#
sub load_auth {
    my ($startup) = @_;
    my ($l, $cmd, $users, $u, $host, $user, $password, $sect);

    %AUTHCMDS = ();
    %NOAUTHCMDS = ();
    %AUTHTRAPS = ();
    %AUTHSNMPTRAPS = ();
    $sect = "command";

    if (!open (C, $CF{"AUTHFILE"})) {
	err_startup ($startup, "could not open $CF{AUTHFILE}: $!");
	return undef;
    }

    while (defined ($l = <C>)) {
	next if ($l =~ /^\s*#/ || $l =~ /^\s*$/);
	chomp $l;
	$l =~ s/^\s*//;
	$l =~ s/\s*$//;

	if ($l =~ /^command\s+section/) {
	    $sect = "command";
	    next;
	} elsif ($l =~ /^trap\s+section/) {
	    $sect = "trap";
	    next;
	} elsif ($l =~ /^snmp trap section/) {
	    $sect = "snmptrap";
	    next;
	}

	if ($sect eq "command") {
	    ($cmd, $users) = split (/\s*:\s*/, $l, 2);
	    if (!defined $users) {
		err_startup ($startup, "could not parse line $. of auth file\n");
		next;
	    }
	    foreach $u (split (/\s*,\s*/, $users)) {
		if ( $u =~ /^AUTH_ANY$/ ) {
		    # Allow all authenticated users
		    $AUTHCMDS{"\L$cmd"}{$u} = 1;
		} elsif ( $u =~ /^!(.*)/ ) {
		    # Directive is to "deny-user"
		    $NOAUTHCMDS{"\L$cmd"}{$1} = 1;
		} else {
		    # Directive is to "allow-user"
		    $AUTHCMDS{"\L$cmd"}{$u} = 1;
		}
	    }

	} elsif ($sect eq "trap") {
	    if ($l !~ /^(\S+)\s+(\S+)\s+(\S+)$/) {
		syslog ('err', "invalid entry in trap sect of $CF{AUTHFILE}, line $.");
	    	next;
	    }
	    ($host, $user, $password) = ($1, $2, $3);

	    if ($host eq "*") {
		#
	    	# allow traps from all hosts
		#

	    } elsif ($host =~ /^[a-z]/ && ($host = gethostbyname ($host)) eq "") {
		syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
		next;
	    } elsif ($host =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/ &&
	    		($host = inet_aton ($host)) eq "") {
		syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
		next;
	    } else {
	    	syslog ('err', "invalid host in $CF{AUTHFILE}, line $.");
		next;
	    }

	    if ($host ne "*")
	    {
		$host = inet_ntoa ($host);
	    }

	    $AUTHTRAPS{$host}{$user} = $password;

	} elsif ($sect eq "snmptrap") {

	    if ($l !~ /^(\S+)\s+(\S+)$/) {
	    	syslog ('err', "invalid line in $CF{AUTHFILE}, line $.");
		next;
	    }

	    ($host, $password) = ($1, $2);
	    $AUTHSNMPTRAPS{$host}{$password} = 1;

	} else {
	    syslog ('err', "unknown section in $CF{AUTHFILE}: $l");
	}
    }
    close (C);
}


#
# return undef if $user isn't permitted to perform $cmd
# Optional third argument controls logging to syslog.
# e.g.,
#  check_auth("joe", "disable")
#   will check to see if user joe is authorized to disable, and
#   complain to syslog if joe is not authorized
#  check_auth("joe", "disable", 1)
#   will check to see if user joe is authorized to disable but 
#   NOT complain to syslog if joe is not authorized
#
sub check_auth {
    my ($user, $cmd, $no_syslog) = @_;

    #
    # Check to see if the authenticated user is specifically 
    # denied the ability to run this command.
    #
    if (
	(defined ($user) && $NOAUTHCMDS{$cmd}{$user}) ||
	(defined ($user) && $NOAUTHCMDS{$cmd}{"AUTH_ANY"}) 
	)
    {
	syslog ("err", "user '$user' tried '$cmd', denied");
	return undef;
    }

    #
    # Check for "all". This allows any client, authenticated or
    # not, to execute the requested command.
    #
    return 1 if ($AUTHCMDS{$cmd}{"all"});

    #
    # Check for AUTH_ANY. This allows any authenticated user to 
    # execute the requested command.
    #
    return 1 if (defined ($user) && $AUTHCMDS{$cmd}{"AUTH_ANY"});

    #
    # Check to see if the authenticated user is specifically 
    #allowed the ability to run this command.
    #
    return 1 if (defined ($user) && $AUTHCMDS{$cmd}{$user});

    syslog ("err", "user '$user' tried '$cmd', not authenticated") unless defined($no_syslog);

    return undef;
}


#
# reload things
#
sub reload {
    my (@what) = @_;

    for (@what) {
    	if ($_ eq "auth") {
	    load_auth;
	} elsif ($_ eq "oncall") {
	    load_oncall;

	} else {
	    return undef;
	}
    }

    return 1;
}


#
# (re)load the oncall schedule
#
sub load_oncall {
    my ($startup) = @_;
    my ($group, $service, $time, $who, %newoncall);

    if (!open (ONCALL, $CF{"OCFILE"})) {
    	err_startup ($startup, "could not open $CF{OCFILE}: $!");
	return undef;
    }

    %newoncall = ();
    while (<ONCALL>) {
    	next if (/^\s*$/ || /^\s*#/);
	chomp;
	if (!/^\s* ([a-zA-Z0-9_.-]+) \s+
		([a-zA-Z0-9_.-]+) \s+
		(\w{3} \s+ \d{1,2}:\d\d|default|none) \s+
		(.*) \s*$/xi) {

	    err_startup ($startup,
	    	syslog ('err', "error in oncall configuration, line $."));
	    close (ONCALL);
	    return undef;
	}

	($group, $service, $time, $who) = ($1, $2, $3, $4);
	$group =~ tr/A-Z/a-z/;
	$service =~ tr/A-Z/a-z/;
	$time =~ tr/A-Z/a-z/;

	if (!defined($groups{$group})) {
	    err_startup ($startup,
	    	"group $group in oncall line $. not defined in $CF{OCFILE}");
	    close (ONCALL);
	    return undef;
	} elsif (!defined $watch{$group}->{$service}) {
	    err_startup ($startup,
	    	"service $service in oncall line $. not defined in $CF{OCFILE}");
	    close (ONCALL);
	    return undef;
	}

	print "[$group] [$service] [$time] [$who]\n";
    }
    close (ONCALL);

    %oncall = %newoncall;
    1;
}


sub err_startup {
    my ($startup, $msg) = @_;

    if ($startup) {
    	die "$msg\n";
    } else {
    	syslog ('err', $msg);
    }
}


#
# handle SNMP trap
#
sub handle_snmp_trap {
    my ($buf, $from) = @_;
    my ($port, $addr, $fromip);
    my (%traphash);

    ($port, $addr) = sockaddr_in ($from);
    $fromip = inet_ntoa ($addr);

    if (!defined ($AUTHSNMPTRAPS{$fromip})) {
    	syslog ('err', "got SNMP trap from unauthorized agent: $fromip");
	return undef;
    }

    $TRAP_PDU->buffer ($buf);
    %traphash = $TRAP_PDU->decode;

    if (! keys %traphash) {
    	syslog ('err', "error decoding SNMP trap: " . $TRAP_PDU->error);
	return undef;
    }

    if ($AUTHSNMPTRAPS{$fromip} ne
	    crypt ($traphash{"community"}, $traphash{"community"})) {
    	syslog ('err', "unauthorized community from agent: $fromip");
	return undef;
    }

    #
    # here's the real meat
    #
}


#
# handle a trap
#
sub handle_trap {
    my ($buf, $from) = @_;

    my $time = time;
    my $noalert = 0;
    my %trap = ();
    my $flags = 0;
    my $tmnow = time;

#
# MON-specific tags
# pro	protocol
# aut	auth
# usr	username
# pas	password
# typ	type  ("failure", "up", "startup", "trap", "traptimeout")
# spc	specific type (TRAP_*)
# seq	sequence
# grp	group
# svc	service
# hst	host
# sta	status (opstatus)
# tsp	timestamp as time(2) value
# sum	summary output
# dtl	detail
#

    foreach my $l (split (/\n/, $buf))
    {
    	if ($l =~ /^(\w+)=(.*)/)
	{
	    my $trap_val = $2;
	    chomp $trap_val;
	    $trap{$1} = un_esc_str ((parse_line ('\s+', 0, $trap_val))[0]);
	}

	else
	{
	    syslog ('err', "unspecified tag in trap: $l");
	}
    }

    $trap{"sum"} = "$trap{sum}\n" if ($trap{"sum"} !~ /\n$/);

    my ($port, $addr) = sockaddr_in ($from);
    my $fromip = inet_ntoa ($addr);

    #
    # trap authentication
    #
    my ($traphost, $trapuser, $trappass);

    if (defined ($AUTHTRAPS{"*"}))
    {
	$traphost = "*";
    }
    
    else
    {
    	$traphost = $addr;
    }

    if (defined ($AUTHTRAPS{$traphost}{"*"}))
    {
    	$trapuser = "*";
	$trappass = "";
    }

    else
    {
    	$trapuser = $trap{"usr"};
	$trappass = $trap{"pas"};
    }

    if (!defined ($AUTHTRAPS{$traphost}))
    {
	syslog ('err', "received trap from unauthorized host: $fromip");
    	return undef;
    }

    if ($trapuser ne "*" &&
	    crypt ($trappass, $AUTHTRAPS{$traphost}{$trapuser}) ne
	    $AUTHTRAPS{$traphost}{$trapuser})
    {
	syslog ('err', "received trap from unauthorized user $trapuser, host $traphost");
    	return undef;
    }

    #
    # protocol version
    #
    if ($trap{"pro"} < $TRAP_PRO_VERSION)
    {
    	syslog ('err', "cannot handle traps from version less than $TRAP_PRO_VERSION");
	return undef;
    }

    #
    # validate trap type
    #
    if (!defined $trap{"typ"} || !defined ($trap{"spc"}))
    {
	syslog ('err', "no trap type specified from $fromip");
    	return undef;
    }

    #
    # validate trap type
    #

    #
    # if mon receives a trap for an unknown group/service, then the
    # default/default group/service should catch these if it is defined
    #

    my $intended;
    if ((!defined ($groups{$trap{"grp"}}) &&
	    !defined $watch{$trap{"grp"}}->{$trap{"svc"}}) &&
	    (defined($groups{'default'}) &&
	    defined($watch{'default'}->{'default'})))
    {
        $intended = "$trap{'grp'}:$trap{'svc'}";
        $trap{"grp"} = "default";
	$trap{"svc"} = "default";
    }

    if (!defined ($groups{$trap{"grp"}}))
    {
    	syslog ('err', "trap received for undefined group $trap{grp}");
	return;
    }
    
    elsif (!defined $watch{$trap{"grp"}}->{$trap{"svc"}})
    {
    	syslog ('err', "trap received for undefined service type $trap{grp}/$trap{svc}");
	return;
    }

    my $sref = \%{$watch{$trap{"grp"}}->{$trap{"svc"}}};
    $sref->{"_last_trap"} = $time;
    $sref->{"_last_detail"} = $trap{"dtl"};
    $sref->{"_last_summary"} = $trap{"sum"};

    if ($intended)
    {
       $sref->{"_intended"} = $intended;
    }

    my $old_status = $sref->{"_op_status"};

    syslog ('info', "trap $trap{typ} $trap{spc} from " .
    	"$fromip for $trap{grp} $trap{svc}, status $trap{sta}");

    my $group = $trap{"grp"};
    my $service = $trap{"svc"};

    #
    # Not sure what I want to do with this. It's not done, and
    # just because it's here doesn't mean that it is meant to work
    # how it is coded.
    #
    if (1)
    {
	if ($trap{"spc"} == $STAT_COLDSTART)
	{
	    set_op_status ($group, $service, $STAT_COLDSTART);
	    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
		if ($sref->{"trapduration"});
	}
	
	elsif ($trap{"spc"} == $STAT_WARMSTART)
	{
	    set_op_status ($group, $service, $STAT_WARMSTART);
	    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
		if ($sref->{"trapduration"});
	    $sref->{"_last_uptrap"} = $time;
	}
	
	elsif ($trap{"spc"} == $STAT_LINKDOWN)
	{
	    set_op_status ($group, $service, $STAT_LINKDOWN);
	    $sref->{"_failure_count"}++;
	    $sref->{"_first_failure"} = $tm if ($sref->{"_op_status"} != $STAT_FAIL);
	    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
		if ($sref->{"trapduration"});
	}
	
	elsif ($trap{"spc"} == $STAT_OK)
	{
	    if ($CF{"DTLOGGING"} && defined ($sref->{"_op_status"}) &&
		   $sref->{"_op_status"} == $STAT_FAIL)
	    {
	    	write_dtlog ($sref, $group, $service);
	    }

	    set_op_status ($group, $service, $STAT_OK);
	    $sref->{"_last_uptrap"} = $time;
	    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
		if ($sref->{"trapduration"});
	}
	
	elsif ($trap{"spc"} == $STAT_FAIL)
	{
	    set_op_status ($group, $service, $STAT_FAIL);
	    $sref->{"_first_failure"} = $tm if ($sref->{"_op_status"} != $STAT_FAIL);
	    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
	    	if ($sref->{"trapduration"});
	}
	
	elsif ($trap{"spc"} == $STAT_WARN)
	{
	    set_op_status ($group, $service, $STAT_WARN);

# 	} elsif ($trap{"spc"} == $STAT_HEARTBEAT) {
# 	    set_op_status ($group, $service, $STAT_OK);
# 	    $sref->{"_last_uptrap"} = $time;
# 	    $noalert++;
	}
	
	else
	{
	    syslog ('err', "trap received from $fromip" .
		    " for undefined type $trap{typ} $trap{spc} $trap{grp}");
	    return;
	}
    }

    shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});

    push @last_failures, "$trap{grp} $trap{svc}" .
	" $tm $trap{typ} $trap{spc} $trap{sum}";

    if ($sref->{"depend"} ne "" &&
	    $sref->{"dep_behavior"} eq "a")
    {
    	dep_ok ($sref);
    }

    #
    #	if trap is FAIL, send an alert
    #	if trap is OK send upalert
    #	  upalert only gets sent if an upalert for this
    #	  trap is actually defined, and if the
    #	  upalertafter config is satisfied
    #

    $flags = 0;

    if ( $trap{"spc"} == $STAT_OK ) {

        $flags = $FL_UPALERT;

        if ( defined($sref->{"_upalert"}) ) {

             if ( $tmnow - $sref->{"_first_failure"} <
                  $sref->{"upalertafter"})
             {
                 $noalert++;
             }
        }
        else {
            $noalert++;
        }
    }
    #### else just fall through and send alert

    do_alert (
	    $trap{"grp"},
	    $trap{"svc"},
	    $trap{"sum"} . $trap{"dtl"},
	    $trap{"sta"},
	    $FL_TRAP | $flags,
    ) unless ($noalert);

    if( defined($sref->{"_intended"}) )
    {
        undef($sref->{"_intended"});
    }
}


#
# trap timeout
#
sub handle_trap_timeout {
    my ($group, $service) = @_;
    my ($tmnow);

    $tmnow = time;

    my $sref = \%{$watch{$group}->{$service}};
    $sref->{"_failure_count"}++;
    $sref->{"_last_failure"} = $tmnow;
    $sref->{"_first_failure"} = $tmnow if ($sref->{"_op_status"} != $STAT_FAIL);
    set_op_status ($group, $service, $STAT_FAIL);
    $sref->{"_last_summary"} = "trap timeout";
    $sref->{"_last_detail"} = "";
    shift @last_failures if (@last_failures > $CF{"MAX_KEEP"});
    push @last_failures, "$group $service $tm $sref->{_last_summary}";
    syslog ('crit', "failure for $last_failures[-1]");

    do_alert ($group, $service, undef, undef, $FL_TRAPTIMEOUT);
}


#
# write to a socket
#
sub sock_write {
    my ($sock, $buf) = @_;
    my ($nleft, $nwritten);

    $nleft = length ($buf);
    while ($nleft) {
    	$nwritten = syswrite ($sock, $buf, $nleft);
	if (!defined ($nwritten)) {
	    return undef if ($! != EAGAIN);
	    usleep (100000);
	    next;
	}
	$nleft -= $nwritten;
	substr ($buf, 0, $nwritten) = "";
    }
}


#
# do I/O processing for traps and client connections
#
sub handle_io {

    #
    # build iovec for server connections, traps, and clients
    #
    $iovec = '';
    my $niovec = '';
    vec ($iovec, fileno (TRAPSERVER), 1) = 1;
    vec ($iovec, fileno (SERVER), 1) = 1;
    vec ($iovec, fileno (SNMPSERVER), 1) = 1 if ($CF{"SNMP"});
    foreach my $cl (keys %clients) {
	vec ($iovec, $cl, 1) = 1;
    }

    #
    # handle client I/O while there is some to handle
    #
    my $sleep = $SLEEPINT;
    my $tm0 = [gettimeofday];
    my $n;
    while ($n = select ($niovec = $iovec, undef, undef, $sleep)) {
	my $tm1 = [gettimeofday];

	if ($! != &EINTR)
	{
	    #
	    # mon trap
	    #
	    if (vec ($niovec, fileno (TRAPSERVER), 1)) {
		my ($from, $trapbuf);
		if (!defined ($from = recv (TRAPSERVER, $trapbuf, 65536, 0))) {
		    syslog ('err', "error trying to recv a trap: $!");
		} else {
		    handle_trap ($trapbuf, $from);
		}
		next;
	    
	    #
	    # SNMP trap
	    #
	    } elsif ($CF{"SNMP"} && vec ($niovec, fileno (SNMPSERVER), 1)) {
		my ($from, $trapbuf);
		if (!defined ($from = recv (SNMPSERVER, $trapbuf, 65536, 0))) {
		    syslog ('err', "error trying to recv an SNMP trap: $!");
		} else {
		    handle_snmp_trap ($trapbuf, $from);
		}
		next;

	    #
	    # client connections
	    #
	    } elsif (vec ($niovec, fileno (SERVER), 1)) {
		client_accept;
	    }

	    #
	    # read data from clients if any exists
	    #
	    if ($numclients) {
		foreach my $cl (keys %clients) {
		    next if (!vec ($niovec, $cl, 1));

		    my $buf = '';
		    $n = sysread ($clients{$cl}->{"fhandle"}, $buf, 8192);
		    if ($n == 0 && $! != &EAGAIN) {
			client_close ($cl);
		    } elsif (!defined $n) {
			client_close ($cl, "read error: $!");
		    } else {
			$clients{$cl}->{"buf"} .= $buf;
			$clients{$cl}->{"timeout"} = $CF{"CLIENT_TIMEOUT"};
			$clients{$cl}->{"last_read"} = time;
		    }
		}
	    }
	}

	#
	# execute client commands which have been read
	#
	client_dopending if ($numclients);

	last if (tv_interval ($tm0, $tm1) >= $SLEEPINT);

	$sleep = $SLEEPINT - tv_interval ($tm0, $tm1);
    }

    if (!defined ($n)) {
	    syslog ('err', "select returned an error for I/O loop: $!");
    }

    #
    # count down client inactivity timeouts and close expired connections
    #
    if ($numclients) {
	foreach my $cl (keys %clients) {
	    my $timenow = time;
	    $clients{$cl}->{"timeout"} = $timenow - $clients{$cl}->{"last_read"};

	    if ($clients{$cl}->{"timeout"} >= $CF{"CLIENT_TIMEOUT"}) {
		client_close ($cl, "timeout after $CF{CLIENT_TIMEOUT}s");
	    }
	}
    }
}


#
# generate alert and monitor path hashes
#
sub gen_scriptdir_hash {
    my ($d, @scriptdirs, @alertdirs, $found);

    %MONITORHASH = ();
    %ALERTHASH = ();

    foreach $d (split (/\s*:\s*/, $CF{"SCRIPTDIR"})) {
	if (-d "$d" && -x "$d") {
	    push (@scriptdirs, $d);
	} else {
	    syslog ('err', "scriptdir $d is not usable");
	}
    }

    foreach $d (split (/\s*:\s*/, $CF{"ALERTDIR"})) {
	if (-d $d && -x $d) {
	    push (@alertdirs, $d);
	} else {
	    syslog ('err', "alertdir $d is not usable");
	}
    }

    #
    # monitors
    #
    foreach my $group (keys %watch) {
    	foreach my $service (keys %{$watch{$group}}) {
	    next if (!defined $watch{$group}->{$service}->{"monitor"});
	    my $monitor = (split (/\s+/, $watch{$group}->{$service}->{"monitor"}))[0];
	    $found = 0;
	    foreach (@scriptdirs) {
	    	if (-x "$_/$monitor") {
		    $MONITORHASH{$monitor} = "$_/$monitor"
		    	unless (defined $MONITORHASH{$monitor});
		    $found++;
		    last;
		}
	    }
	    if (!$found) {
	    	syslog ('err', "$monitor not found in one of (\@scriptdirs[@scriptdirs])");
	    }
	}
    }

    #
    # alerts
    #
    foreach my $group (keys %watch) {
    	foreach my $service (keys %{$watch{$group}}) {
	    foreach my $period (keys %{$watch{$group}->{$service}->{"periods"}}) {
		foreach my $my_alert (
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"alerts"}},
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"upalerts"}},
			@{$watch{$group}->{$service}->{"periods"}->{$period}->{"startupalerts"}},
			    ) {
		    my $alert = $my_alert;
		    $alert =~ s/^(\S+=\S+ )*(\S+).*$/$2/;
		    $found = 0;
		    foreach (@alertdirs) {
			if (-x "$_/$alert") {
			    $ALERTHASH{$alert} = "$_/$alert"
			    	unless (defined $ALERTHASH{$alert});
			    $found++;
			}
		    }
		    if (!$found) {
			syslog ('err', "$alert not found in one of (\@alerttdirs[@alertdirs])");
		    }
		}
	    }
	}
    }

}


#
# do some processing on dirs
#
sub normalize_paths {
    
    my ($authtype, @authtypes);

    #
    # do some sanity checks on dirs
    #
    $CF{"STATEDIR"} = "$CF{BASEDIR}/$CF{STATEDIR}" if ($CF{"STATEDIR"} !~ m{^/});
    syslog ('err', "$CF{STATEDIR} does not exist") if (! -d $CF{"STATEDIR"});

    $CF{"LOGDIR"} = "$CF{BASEDIR}/$CF{LOGDIR}" if ($CF{"LOGDIR"} !~ m{^/});
    syslog ('err', "$CF{LOGDIR} does not exist") if (! -d $CF{LOGDIR});


    $CF{"AUTHFILE"} = "$CF{CFBASEDIR}/$CF{AUTHFILE}"
	    if ($CF{"AUTHFILE"} !~ m{^/});
    syslog ('err', "$CF{AUTHFILE} does not exist")
	    if (! -f $CF{"AUTHFILE"});

    $CF{"OCFILE"} = "$CF{CFBASEDIR}/$CF{OCFILE}"
	    if ($CF{"OCFILE"} !~ m{^/});

    @authtypes = split(' ' , $CF{"AUTHTYPE"}) ;
    foreach $authtype (@authtypes) {
	if ($authtype eq "userfile") {
	    $CF{"USERFILE"} = "$CF{CFBASEDIR}/$CF{USERFILE}"
		if ($CF{"USERFILE"} !~ m{^/});
	    syslog ('err', "$CF{USERFILE} does not exist")
		if (! -f $CF{"USERFILE"});
	}
    }

    $CF{"DTLOGFILE"} = "$CF{LOGDIR}/$CF{DTLOGFILE}"
	    if ($CF{"DTLOGFILE"} !~ m{^/});

    if ($CF{"HISTORICFILE"} ne "") {
	$CF{"HISTORICFILE"} = "$CF{LOGDIR}/$CF{HISTORICFILE}"
		if ($CF{"HISTORICFILE"} !~ m{^/});
    }

    #
    # script and alert dirs may have multiple paths
    #
    foreach my $dir (\$CF{"SCRIPTDIR"}, \$CF{"ALERTDIR"}) {
	my @n;
	foreach my $d (split (/\s*:\s*/, $$dir)) {
	    $d =~ s{/$}{};
	    $d = "$CF{BASEDIR}/$d" if ($d !~ m{^/});
	    syslog ('err', "$d does not exist, check your alertdir and mondir paths")
		unless (-d $d);
	    push @n, $d;
	}
	$$dir = join (":", @n);
    }
}


#
# set opstatus and save old status
#
sub set_op_status {
    my ($group, $service, $status) = @_;

    $watch{$group}->{$service}->{"_last_op_status"} = 
	$watch{$group}->{$service}->{"_op_status"};
    $watch{$group}->{$service}->{"_op_status"} = $status;
}


sub debug_dir {
    print STDERR <<EOF;
    basedir	[$CF{BASEDIR}]
    cfbasedir	[$CF{CFBASEDIR}]

    cf		[$CF{CF}]
    statedir	[$CF{STATEDIR}]
    logdir	[$CF{LOGDIR}]
    authfile	[$CF{AUTHFILE}]
    ocfile	[$CF{OCFILE}]
    userfile	[$CF{USERFILE}]
    dtlogfile	[$CF{DTLOGFILE}]
    historicfile[$CF{HISTORICFILE}]
    monerrfile  [$CF{MONERRFILE}]
    scriptdir	[$CF{SCRIPTDIR}]
    alertdir	[$CF{ALERTDIR}]
EOF

    foreach my $m (keys %MONITORHASH) {
	print STDERR "M $m=[$MONITORHASH{$m}]\n";
    }
    foreach my $m (keys %ALERTHASH) {
	print STDERR "A $m=[$ALERTHASH{$m}]\n";
    }
}


#
# globals affected by config file are
# all stored in %CF
#
sub init_cf_globals {
    $CF{"BASEDIR"} = $opt{"b"} || "/usr/lib/mon";
    $CF{"BASEDIR"} =~ s{/$}{};
    $CF{"CFBASEDIR"} = $opt{"B"} || "/etc/mon";
    $CF{"CF"} = $opt{"c"} || "$CF{CFBASEDIR}/mon.cf";
    $CF{"CF"} = "$PWD/$CF{CF}" if ($CF{"CF"} !~ /^\//);
    $CF{"SCRIPTDIR"} = "/usr/local/lib/mon/mon.d:mon.d";
    $CF{"ALERTDIR"}  = "/usr/local/lib/mon/alert.d:alert.d";
    $CF{"LOGDIR"} = $opt{"L"} || (-d "/var/log/mon" ? "/var/log/mon" : "log.d");
    $CF{"STATEDIR"}  = -d "/var/state/mon" ? "/var/state/mon"
		: -d "/var/lib/mon" ? "/var/lib/mon"
		: "state.d";
    $CF{"AUTHFILE"}  = "auth.cf";
    $CF{"AUTHTYPE"}  = "getpwnam";
    $CF{"PAMSERVICE"}  = "passwd";
    $CF{"USERFILE"}  = "monusers.cf";
    $CF{"OCFILE"}    = "oncall.cf";
    $CF{"PIDFILE"}   = (-d "/var/run/mon" ? "/var/run/mon"
		    : -d "/var/run" ? "/var/run"
		    : "/etc") . "/mon.pid";
    $CF{"MONERRFILE"} = "/dev/null";
    $CF{"DTLOGFILE"} = "downtime.log";
    $CF{"DTLOGGING"} = 0;
    $CF{"MAX_KEEP"}  = 100;
    $CF{"CLIENT_TIMEOUT"} = 30;
    $CF{"SERVPORT"}  = getservbyname ("mon", "tcp") || 2583;
    $CF{"TRAPPORT"}  = getservbyname ("mon", "udp") || 2583;
    $CF{"MAXPROCS"}  = 0;
    $CF{"SNMP"} = 0;
    $CF{"SNMPPORT"} = 34000;
    $CF{"HISTORICFILE"} = "";
    $CF{"HISTORICTIME"} = 0;
    $CF{"DEP_RECUR_LIMIT"} = 10;
    $CF{"SYSLOG_FACILITY"} = "daemon";
    $CF{"STARTUPALERTS_ON_RESET"} = 0;
}


#
# globals not affected by config file
#
sub init_globals {
    $TRAP_PRO_VERSION = 0.3807;
    $SLEEPINT  = 1;
    $STOPPED   = 0;
    $STOPPED_TIME = 0;
    $START_TIME = time;
    $PROT_VERSION = 0x2611;
    $HOSTNAME  = hostname;
    $PWD = getcwd;

    #
    # flags
    #
    $FL_MONITOR = 1;
    $FL_UPALERT = 2;
    $FL_TRAP = 4;
    $FL_TRAPTIMEOUT = 8;
    $FL_STARTUPALERT = 16;
    $FL_TEST = 32;

    #
    # specific trap types
    #
    ($TRAP_COLDSTART, $TRAP_WARMSTART, $TRAP_LINKDOWN, $TRAP_LINKUP,
	$TRAP_AUTHFAIL, $TRAP_EGPNEIGHBORLOSS, $TRAP_ENTERPRISE, $TRAP_HEARTBEAT) = (0..7);

    #
    # operational statuses
    #
    ($STAT_FAIL, $STAT_OK, $STAT_COLDSTART, $STAT_WARMSTART, $STAT_LINKDOWN,
	$STAT_UNKNOWN, $STAT_TIMEOUT, $STAT_UNTESTED, $STAT_DEPEND, $STAT_WARN) = (0..9);
    
    %FAILURE = (
    	$STAT_FAIL => 1,
	$STAT_LINKDOWN => 1,
	$STAT_TIMEOUT => 1,
    );

    %SUCCESS = (
    	$STAT_OK => 1,
	$STAT_COLDSTART => 1,
	$STAT_WARMSTART => 1,
	$STAT_UNKNOWN => 1,
	$STAT_UNTESTED => 1,
    );

    %WARNING = (
    	$STAT_COLDSTART => 1,
	$STAT_WARMSTART => 1,
	$STAT_UNKNOWN => 1,
	$STAT_WARN => 1,
    );

    %OPSTAT = ("fail" => $STAT_FAIL, "ok" => $STAT_OK, "coldstart" => $STAT_COLDSTART,
	    "warmstart" => $STAT_WARMSTART, "linkdown" => $STAT_LINKDOWN,
	    "unknown" => $STAT_UNKNOWN, "timeout" => $STAT_TIMEOUT,
	    "untested" => $STAT_UNTESTED);

    #
    # fast lookup hashes for alerts and monitors
    #
    %MONITORHASH = ();
    %ALERTHASH = ();

    $TRAP_PDU = new Mon::SNMP;
}


#
# clear timers
#
sub clear_timers {
    my ($group, $service) = @_;

    return undef if (!defined $watch{$group}->{$service});

    my $sref = \%{$watch{$group}->{$service}};

    $sref->{"_trap_timer"} = $sref->{"traptimeout"}
    	if ($sref->{"traptimeout"});

    $sref->{"_trap_duration_timer"} = $sref->{"trapduration"}
    	if ($sref->{"trapduration"});

    $sref->{"_timer"} = $sref->{"interval"}
    	if ($sref->{"interval"});

    foreach my $period (keys %{$sref->{"periods"}}) {
    	my $pref = \%{$sref->{"periods"}->{$period}};

	$pref->{"_last_alert"} = 0
	    if ($pref->{"alertevery"});
	
	$pref->{"_consec_failures"} = 0
	    if ($pref->{"alertafter_consec"});
	
	$pref->{'_1stfailtime'} = 0
	    if ($pref->{"alertafterival"});
    }
}


#
# load some amount of the alert history into memory
#
sub readhistoricfile {
    return if ($CF{"HISTORICFILE"} eq "");

    if (!open (HISTFILE, $CF{"HISTORICFILE"})) {
	syslog ('err',  "Could not read history from $CF{HISTORICFILE} : $!");	
	return;
    }

    my $epochLimit = 0;
    if ($CF{"HISTORICTIME"} != 0) {
	$epochLimit = time - $CF{"HISTORICTIME"};
    }

    @last_alerts = ();

    while (<HISTFILE>) {
	next if (/^\s*$/ || /^\s*#/);
    	chomp;
	my $epochAlert = (split(/\s+/))[3];
	push (@last_alerts, $_) if ($epochAlert >= $epochLimit);
    }

    close (HISTFILE);

    if (defined $CF{"MAX_KEEP"}) {
    	splice(@last_alerts, 0, $#last_alerts + 1 - $CF{"MAX_KEEP"});
    }
}


#
# This routine simply calls an alert.
#
# call with %args = (
#       group		=> "name of group",
#       service		=> "name of service",
#       pref		=> "optional period reference",
#	alert		=> "alert script",
#	args		=> "args to alert script",
# 	flags		=> "flags, as in $FL_*",
#	retval		=> "return value of monitor",
#	output		=> "output of monitor",
# )
#
sub call_alert {
    my (%args) = @_;

    foreach my $mandatory_arg (qw(
		group service flags
		retval alert output
	    )) {
    	return (undef) if (!defined $args{$mandatory_arg});
    }

    my @groupargs = grep (!/^\*/, @{$groups{$args{"group"}}});

    my $tmnow = time;
    my ($summary) = split("\n", $args{"output"});
    $summary = "(NO SUMMARY)" if ($summary =~ /^\s*$/m);

    my $sref = \%{$watch{$args{"group"}}->{$args{"service"}}};
    my $pref;

    if (defined $args{"pref"}) {
	$pref = $args{"pref"};
    }

    my $alert = "";
    if (!defined $ALERTHASH{$args{"alert"}} ||
	    ! -f $ALERTHASH{$args{"alert"}}) {
	syslog ('err', "no alert found while trying to run $args{alert}");
	return undef;
    } else {
	$alert = $ALERTHASH{$args{"alert"}};
    }

    my $alerttype = "";           # sent to syslog and stored in @last_alerts
    my $alert_type = "failure";   # MON_ALERTTYPE set to this
    if ($args{"flags"} & $FL_UPALERT) {
    	$alerttype = "upalert";
	$alert_type = "up";
    } elsif ($args{"flags"} & $FL_STARTUPALERT) {
    	$alerttype = "startupalert";
	$alert_type = "startup";
    } elsif ($args{"flags"} & $FL_TRAPTIMEOUT) {
    	$alerttype = "traptimeoutalert";
	$alert_type = "traptimeout";
    } elsif ($args{"flags"} & $FL_TRAP) {
    	$alerttype = "trapalert";
	$alert_type = "trap";
    } elsif ($args{"flags"} & $FL_TEST) {
    	$alerttype = "testalert";
	$alert_type = "test";
    } else {
    	$alerttype = "alert";
    }

    #
    # log why we are triggering an alert
    #
    my $a = $alert;
    $a =~ s{^.*/([^/]+)$}{$1};
    syslog ("alert", "calling $alerttype $a for" .
	" $args{group}/$args{service} ($alert,$args{args}) $summary");

    my $pid = open (ALERT, "|-");
    if (!defined $pid) {
    	syslog ('err', "could not fork: $!");
	return undef;
    }

    #
    # child, the actual alert
    #
    if ($pid == 0) {
	#
	# set env variables to pass to the alert
	#
	foreach my $v (keys %{$sref->{"ENV"}}) {
	    $ENV{$v} = $sref->{"ENV"}->{$v};
	}

	$ENV{"MON_LAST_SUMMARY"}	= $sref->{"_last_summary"};
	$ENV{"MON_LAST_OUTPUT"}		= $sref->{"_last_output"};
	$ENV{"MON_LAST_FAILURE"}	= $sref->{"_last_failure"};
	$ENV{"MON_FIRST_FAILURE"}	= $sref->{"_first_failure"};
	$ENV{"MON_LAST_SUCCESS"}	= $sref->{"_last_success"};
	$ENV{"MON_DESCRIPTION"}		= $sref->{"description"};
	$ENV{"MON_GROUP"}		= $args{"group"};
	$ENV{"MON_SERVICE"}		= $args{"service"};
	$ENV{"MON_RETVAL"}		= $args{"retval"};
	$ENV{"MON_OPSTATUS"}		= $sref->{"_op_status"};
	$ENV{"MON_ALERTTYPE"}		= $alert_type;
	$ENV{"MON_STATEDIR"}		= $CF{"STATEDIR"};
	$ENV{"MON_LOGDIR"}		= $CF{"LOGDIR"};

        if( defined($sref->{"_intended"}) )
	{
            $ENV{"MON_TRAP_INTENDED"} = $sref->{"_intended"};
        }

        else
	{
            undef ($ENV{"MON_TRAP_INTENDED"}) if (defined($ENV{"MON_TRAP_INTENDED"}));
        }

	my $t;
	$t = "-u" if ($args{"flags"} & $FL_UPALERT);
	$t = "-T" if ($args{"flags"} & $FL_TRAP);
	$t = "-O" if ($args{"flags"} & $FL_TRAPTIMEOUT);

	my @execargs = (
	    $alert,
	    "-s", "$args{service}",
	    "-g", "$args{group}",
	    "-h", "@groupargs",
	    "-t", "$tmnow",
	);

	if ($t) {
	    push @execargs, $t;
	}

	if ($args{"args"} ne "") {
	    push @execargs, quotewords('\s+',0,$args{"args"});
	}

	if (!exec @execargs) {
	    syslog ('err', "could not exec alert $alert: $!");
	    return undef;
	}
	exit;
    }

    #
    # this will block if the alert is sucking gas
    #
    print ALERT $args{"output"};
    close (ALERT);
    waitpid $pid, 0;

    #
    # test alerts don't count
    #
    return (1) if ($args{"flags"} & $FL_TEST);

    #
    # tally this alert
    #
    if (defined $args{"pref"}) {
	$pref->{"_last_alert"} = $tmnow;
    }
    $sref->{"_alert_count"}++;

    #
    # store this in the log
    #
    shift @last_alerts if (@last_alerts > $CF{"MAX_KEEP"});

    my $alertline = "$alerttype $args{group} $args{service}" .
	" $tmnow $alert ($args{args}) $summary";
    push @last_alerts, $alertline;

    #
    # append to alert history file
    #
    if ($CF{"HISTORICFILE"} ne "") {
    	if (!open (HISTFILE, ">>$CF{HISTORICFILE}")) {
	    syslog ('err',  "Could not append alert history to $CF{HISTORICFILE}: $!");
	} else {
	    print HISTFILE $alertline, "\n";
	    close (HISTFILE);
	}
    }

    return 1;
}


#
# recursively evaluate a dependency expression
# substitutes "GROUP:SERVICE" with "1" or "0" if the service is pass/fail, resp.
#
# returns an anonymous hash reference
#
# {
#	status =>,           # "D"  recursion depth exceeded
#                            # "O"  everything is OK
#                            # "E"  eval error
#	depend =>,           # 1 for success (no deps in a failure state)
#                            # 0 if any deps failed
#	error =>,            # the textual error associated with "D" or "E" status
# }
#
sub depend {
    my ($depend, $depth) = @_;
    debug (1, "checking DEP [$depend]\n");

    if ($depth > $CF{"DEP_RECUR_LIMIT"}) {
	return {
	    status => "D",
	    depend => undef,
	    error  => "recursion too deep for ($depend)",
	};
    }

    foreach my $depstr ($depend =~ /[a-zA-Z0-9_.-]+:[a-zA-Z0-9_.-]+/g)
    {
	my ($group ,$service) = split(':', $depstr);

	my $sref = \%{$watch{$group}->{$service}};
	my $depval = undef;

	#
	# disabled watches and services are counted as "passing"
	#
	if ($watch_disabled{$group} || $sref->{"disable"} == 1)
	{
	    $depval = 1;

	#
	# root dependency found
	#
	}
	elsif ($sref->{"depend"} eq "")
	{
	    debug (1, "  found root dep $group,$service\n");

	    $depval = $SUCCESS{$sref->{"_op_status"}};

	#
	# not a root dep, recurse
	#
	}
	else
	{
	    #
	    # do it recursively
	    #
	    my $dstatus = depend ($sref->{"depend"}, $depth + 1);
	    debug (1,
	    	"recur depth $depth returned $dstatus->{status},$dstatus->{depend}\n");

	    #
	    # a bad thing happened, bail out
	    #
	    if ($dstatus->{"status"} ne "O")
	    {
		debug (1,
		    "recursive dep failure for $group,$service (status=$dstatus->{status})\n");
		return $dstatus;
	    }

	    $depval = $dstatus->{"depend"} && $sref->{"_op_status"};
	}

	my $v = int ($depval);
	debug (1, "  ($group,$service) $depth depend=[$v][$depend]");
	$depend =~ s/\b$depstr\b/$v/g;
	debug (1, "  depend=[$depend]\n");
    }

    debug (1, "  before eval: [$depend]");
    my $e = eval("$DEP_EVAL_SANDBOX $depend");
    debug (1, "  after eval: [$e]\n");

    if ($@ eq "")
    {
	return
	{
	    status	=> "O",
	    depend	=> $e,
	};

    }
    else
    {
    	return
	{
	    status	=> "E",
	    depend	=> $e,
	    error	=> $@,
	};
    }
}


#
# returns undef on error
#         0 if dependency failure, sets _depend_status to 0
#         1 if dependencies are OK, sets _depend_status to 1
#
sub dep_ok
{
    my $sref = shift;

    my $s = depend ($sref->{"depend"}, 0);

    if ($s->{"status"} eq "D")
    {
	debug (1, "dep recursion too deep\n");
	return undef;

    }
    elsif ($s->{"status"} eq "E")
    {
	syslog ("notice", "eval error for dependency starting at $sref->{depend}");
	return undef;
    }
    elsif ($s->{"status"} eq "O" && !$s->{"depend"})
    {
	$sref->{"_depend_status"} = 0;
	return 0;
    }

    $sref->{"_depend_status"} = 1;

    return 1;
}


#
# convert a string to a hex-escaped string, returning
# the escaped string.
#
# $str is the string to be escaped
# if $inquotes is true, backslashes are doubled, making
#       the escaped string suitable to be enclosed in
#       single quotes and later passed to Text::quotewords.
#       For example,   var='quoted value'
#
sub esc_str {
    my $str = shift;
    my $inquotes = shift;

    my $escstr = "";

    for (my $i = 0; $i < length ($str); $i++)
    {
    	my $c = substr ($str, $i, 1);

	if (ord ($c) < 32 ||
	    ord ($c) > 126 ||
	    $c eq "\"" ||
	    $c eq "\'")
	{
	    $c = sprintf ("\\%02x", ord($c));
	}
	elsif ($inquotes && $c eq "\\")
	{
	    $c = "\\\\";
	}

	$escstr .= $c;
    }

    $escstr;
}


#
# convert a hex-escaped string into an unescaped string,
# returning the unescaped string
#
sub un_esc_str {
    my $str = shift;

    $str =~ s{\\([0-9a-f]{2})}{chr(hex($1))}eg;

    $str;
}


sub syslog_die {
    my $msg = shift;

    syslog ("err", $msg);
    die "$msg\n";
}


#
# Have a "conversation" with a PAM authentication module. This fools the
# PAM module into authenticating us non-interactively.
#
sub pam_conv_func {
    my @res;
    while ( @_ ) {
	my $code = shift;
	my $msg = shift;
	my $ans = "";
	
	$ans = $PAM_username if ($code == Authen::PAM::PAM_PROMPT_ECHO_ON() );
	$ans = $PAM_password if ($code == Authen::PAM::PAM_PROMPT_ECHO_OFF() );

	push @res, Authen::PAM::PAM_SUCCESS();
	push @res, $ans;
    }
    push @res, Authen::PAM::PAM_SUCCESS();
    return @res;
}


sub write_dtlog
{
    my ($sref, $group, $service) = @_;

    my $tmnow = time;

    $sref->{"_first_failure"} = $START_TIME
       if ($sref->{"_first_failure"} == 0);

    if (!open (DTLOG, ">>$CF{DTLOGFILE}"))
    {
    	syslog ('err', "could not append to $CF{DTLOGFILE}: $!");
	$CF{"DTLOGGING"} = 0;
    }

    else
    {
	$CF{"DTLOGGING"} = 1;
	print DTLOG ($tmnow,
	   " $group",
	   " $service",
	   " ", 0 + $sref->{"_first_failure"},
	   " ", 0 + $tmnow - $sref->{"_first_failure"},
	   " $sref->{'interval'}",
	   " $sref->{'_last_summary'}\n") or
	   syslog ('err', "error writing to $CF{DTLOGFILE}: $!");
	close(DTLOG);
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
提供的源码资源涵盖了Java应用等多个领域,每个领域都包含了丰富的实例和项目。这些源码都是基于各自平台的最新技术和标准编写,确保了在对应环境下能够无缝运行。同时,源码中配备了详细的注释和文档,帮助用户快速理解代码结构和实现逻辑。 适用人群: 适合毕业设计、课程设计作业。这些源码资源特别适合大学生群体。无论你是计算机相关专业的学生,还是对其他领域编程感兴趣的学生,这些资源都能为你提供宝贵的学习和实践机会。通过学习和运行这些源码,你可以掌握各平台开发的基础知识,提升编程能力和项目实战经验。 使用场景及目标: 在学习阶段,你可以利用这些源码资源进行课程实践、课外项目或毕业设计。通过分析和运行源码,你将深入了解各平台开发的技术细节和最佳实践,逐步培养起自己的项目开发和问题解决能力。此外,在求职或创业过程中,具备跨平台开发能力的大学生将更具竞争力。 其他说明: 为了确保源码资源的可运行性和易用性,特别注意了以下几点:首先,每份源码都提供了详细的运行环境和依赖说明,确保用户能够轻松搭建起开发环境;其次,源码中的注释和文档都非常完善,方便用户快速上手和理解代码;最后,我会定期更新这些源码资源,以适应各平台技术的最新发展和市场需求。 所有源码均经过严格测试,可以直接运行,可以放心下载使用。有任何使用问题欢迎随时与博主沟通,第一时间进行解答!
提供的源码资源涵盖了小程序应用等多个领域,每个领域都包含了丰富的实例和项目。这些源码都是基于各自平台的最新技术和标准编写,确保了在对应环境下能够无缝运行。同时,源码中配备了详细的注释和文档,帮助用户快速理解代码结构和实现逻辑。 适用人群: 适合毕业设计、课程设计作业。这些源码资源特别适合大学生群体。无论你是计算机相关专业的学生,还是对其他领域编程感兴趣的学生,这些资源都能为你提供宝贵的学习和实践机会。通过学习和运行这些源码,你可以掌握各平台开发的基础知识,提升编程能力和项目实战经验。 使用场景及目标: 在学习阶段,你可以利用这些源码资源进行课程实践、课外项目或毕业设计。通过分析和运行源码,你将深入了解各平台开发的技术细节和最佳实践,逐步培养起自己的项目开发和问题解决能力。此外,在求职或创业过程中,具备跨平台开发能力的大学生将更具竞争力。 其他说明: 为了确保源码资源的可运行性和易用性,特别注意了以下几点:首先,每份源码都提供了详细的运行环境和依赖说明,确保用户能够轻松搭建起开发环境;其次,源码中的注释和文档都非常完善,方便用户快速上手和理解代码;最后,我会定期更新这些源码资源,以适应各平台技术的最新发展和市场需求。 所有源码均经过严格测试,可以直接运行,可以放心下载使用。有任何使用问题欢迎随时与博主沟通,第一时间进行解答!
提供的源码资源涵盖了Java应用等多个领域,每个领域都包含了丰富的实例和项目。这些源码都是基于各自平台的最新技术和标准编写,确保了在对应环境下能够无缝运行。同时,源码中配备了详细的注释和文档,帮助用户快速理解代码结构和实现逻辑。 适用人群: 适合毕业设计、课程设计作业。这些源码资源特别适合大学生群体。无论你是计算机相关专业的学生,还是对其他领域编程感兴趣的学生,这些资源都能为你提供宝贵的学习和实践机会。通过学习和运行这些源码,你可以掌握各平台开发的基础知识,提升编程能力和项目实战经验。 使用场景及目标: 在学习阶段,你可以利用这些源码资源进行课程实践、课外项目或毕业设计。通过分析和运行源码,你将深入了解各平台开发的技术细节和最佳实践,逐步培养起自己的项目开发和问题解决能力。此外,在求职或创业过程中,具备跨平台开发能力的大学生将更具竞争力。 其他说明: 为了确保源码资源的可运行性和易用性,特别注意了以下几点:首先,每份源码都提供了详细的运行环境和依赖说明,确保用户能够轻松搭建起开发环境;其次,源码中的注释和文档都非常完善,方便用户快速上手和理解代码;最后,我会定期更新这些源码资源,以适应各平台技术的最新发展和市场需求。 所有源码均经过严格测试,可以直接运行,可以放心下载使用。有任何使用问题欢迎随时与博主沟通,第一时间进行解答!

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值