/*-
 * avail.c --
 *	Functions to check the status of the local machine to see
 *	if it can accept processes.
 *
 * Copyright (c) 1988, 1989 by the Regents of the University of California
 * Copyright (c) 1988, 1989 by Adam de Boor
 * Copyright (c) 1989 by Berkeley Softworks
 *
 * Permission to use, copy, modify, and distribute this
 * software and its documentation for any non-commercial purpose
 * and without fee is hereby granted, provided that the above copyright
 * notice appears in all copies.  The University of California,
 * Berkeley Softworks and Adam de Boor make no representations about
 * the suitability of this software for any purpose.  It is provided
 * "as is" without express or implied warranty.
 */
#ifndef lint
static char *rcsid =
"$Id: avail.c,v 1.50 2001/02/17 01:09:58 stolcke Exp $ ICSI (Berkeley)";
#endif /* not lint */

#include    <signal.h>
#include    <sys/time.h>
#include    <sys/resource.h>

#include    "customsInt.h"

static unsigned long   	maxLoad = DEF_MAX_LOAD * LOADSCALE;
static int  	  	minSwapFree = DEF_MIN_SWAP;
static int  	  	minIdle = DEF_MIN_IDLE;
static int  	  	idleCrit = DEF_IDLECRIT;
int  	  		maxImports = DEF_MAX_IMPORTS;
static int  	  	minProcFree = DEF_MIN_PROC;
int			cpuLimit = DEF_CPU_LIMIT;
int			memLimit = DEF_MEM_LIMIT;
int			niceLevel = DEF_NICE_LEVEL;
int			npriLevel = DEF_NPRI_LEVEL;
int			checkUser = DEF_CHECK_USER;
static int		evictDelay = DEF_EVICT_DELAY;
int			localJobs = DEF_LOCAL_JOBS;
int			maxExclusives = 1;

static Rpc_Event  	availEvent;	/* Event for checking availability */
static struct timeval	availInterval;	/* Interval at which checks should
					 * be made for the availability of
					 * this host. */
static int  	    	availCheck;  	/* Mask of criteria to examine */
int	    	    	avail_Bias; 	/* Bias for rating calculation */

double			avail_LoadBias = 0.0; /* Bias for load correction */
static Rpc_Event	decayEvent;	/* Event for decaying load bias */

int			avail_Exclusive;/* No of 'exclusive' jobs on server */
static Rpc_Event	evictEvent;	/* Event for evicting imported jobs */
static struct timeval	evictInterval;	/* Delay till eviction */

/*-
 *-----------------------------------------------------------------------
 * AvailDecayBias --
 *	Decay the load bias value.
 *
 * Results:
 *	FALSE.
 *
 * Side Effects:
 *	None.
 *
 *-----------------------------------------------------------------------
 */
static Boolean
AvailDecayBias ()
{
     avail_LoadBias *= LOADDECAY_FACTOR;
     return (FALSE);
}

/*-
 *-----------------------------------------------------------------------
 * Avail_Send --
 *	Send the availability of the local host.
 *
 * Results:
 *	None.
 *
 * Side Effects:
 *	An availability packet is sent to the master.
 *
 *-----------------------------------------------------------------------
 */
Boolean
Avail_Send ()
{
    Avail         avail;
    static int	  sending = 0;	    /* XXX: A kludge to prevent endless
				     * recursion. At times, for no reason I've
				     * been able to determine, the avail event
				     * will be triggered during the call to
				     * CUSTOMS_AVAIL (hard to believe since
				     * the timeout for the avail event is
				     * twice as long as the total for the
				     * rpc, but...). Once it starts, it
				     * continues and the calls never seem to
				     * complete. To prevent this, we use a
				     * static flag and don't send anything
				     * if a call is already being sent. */

    if (sending) {
	return(FALSE);
    } else {
	sending = 1;
    }
    
    avail.addr =  	localAddr.sin_addr;
    avail.interval.tv_sec	= availInterval.tv_sec + 1;
    avail.interval.tv_usec	= availInterval.tv_usec;
    avail.avail = 	Avail_Local(AVAIL_EVERYTHING, &avail.rating);

#ifdef DOUBLECHECK_TIMEOUT
    avail.clock = 	time((time_t *)0);
#else
    avail.clock = 	0;
#endif
    avail.nJobs =	Import_NJobs();

    stats.availChecks++;
    if (avail.avail == 0) {
	stats.avail++;
    }

    if (verbose) {
	xlog (XLOG_DEBUG, "Avail_Send: localhost %s available",
		avail.avail ? "not" : "is");
    }

    if (localJobs) {
	avail.avail |= AVAIL_TOLOCAL;
    }

    if (!Elect_InProgress() &&
	(Rpc_Call(udpSocket, &masterAddr,
		  (Rpc_Proc)CUSTOMS_AVAIL,
		  sizeof(avail), (Rpc_Opaque)&avail,
		  0, (Rpc_Opaque)0,
		  CUSTOMSINT_NRETRY, &retryTimeOut) != RPC_SUCCESS)) {
		      Elect_GetMaster();
    }
    sending = 0;
    return (FALSE);
}

/*-
 *-----------------------------------------------------------------------
 * AvailSet --
 *	Set the availability criteria. Returns an OR of bits if the
 *	parameters are out-of-range.
 *
 * Results:
 *	Any of the AVAIL_* bits or'ed together (or 0 if things are ok).
 *
 * Side Effects:
 *	The availability criteria are altered.
 *
 *-----------------------------------------------------------------------
 */
/*ARGSUSED*/
static void
AvailSet (from, msg, len, adPtr, fromRemote)
    struct sockaddr_in	*from;	    /* Address of sender */
    Rpc_Message	  	msg;	    /* Message for return */
    int	    	  	len;	    /* Length of criteria */
    Avail_Data	  	*adPtr;	    /* New criteria */
    Boolean 	  	fromRemote; /* TRUE if from remote call */
{
    int	    	  result;

    if (fromRemote && !Local(from)) {
	Rpc_Error(msg, RPC_ACCESS);
    } else if (len != sizeof(Avail_Data)) {
	Rpc_Error(msg, RPC_BADARGS);
    } else {
	/*
	 * Criteria change only by root.
	 */
	if (fromRemote && adPtr->changeMask) {
	    CustomsReserved("AvailSet", from, msg);
	}
	/*
	 * Bounds-check the passed parameters, setting bits in result to
	 * correspond to bad values.
	 */
	result = 0;
	if ((adPtr->changeMask & AVAIL_IDLE) && (adPtr->idleTime > MAX_IDLE)) {
	    result |= AVAIL_IDLE;
	}
	if ((adPtr->changeMask & AVAIL_SWAP) && (adPtr->swapPct > MAX_SWAP)) {
	    result |= AVAIL_SWAP;
	}
	if ((adPtr->changeMask & AVAIL_LOAD) &&
	    (adPtr->loadAvg < MIN_LOAD) &&
	    (adPtr->loadAvg != 0))
	{
	    result |= AVAIL_LOAD;
	}
	if ((adPtr->changeMask & AVAIL_IMPORTS) &&
	    (adPtr->imports < MIN_IMPORTS) &&
	    (adPtr->imports != 0))
	{
	    result |= AVAIL_IMPORTS;
	}
	if ((adPtr->changeMask & AVAIL_PROC) && (adPtr->procs > MAX_PROC)) {
	    result |= AVAIL_PROC;
	}
	if ((adPtr->changeMask & AVAIL_CPU) &&
#ifdef RLIMIT_CPU
	    (adPtr->cpuLimit < MIN_CPU) &&
#endif 
	    (adPtr->cpuLimit != 0))
	{
	    result |= AVAIL_CPU;
	}
	if ((adPtr->changeMask & AVAIL_MEMORY) &&
#ifdef RLIMIT_RSS
	    (adPtr->memLimit < MIN_MEMORY) &&
#endif
	    (adPtr->memLimit != 0))
	{
	    result |= AVAIL_MEMORY;
	}
	if ((adPtr->changeMask & AVAIL_NICE) &&
	    ((adPtr->niceLevel > MAX_NICE) ||
	     (adPtr->niceLevel < 0)))
	{
	    result |= AVAIL_NICE;
	}
	if ((adPtr->changeMask & AVAIL_NPRI) &&
#ifdef sgi
	    ((adPtr->npriLevel < NDPLOMAX) ||
	     (adPtr->npriLevel > NDPLOMIN)) &&
#endif
	    (adPtr->npriLevel != 0))
	{
	    result |= AVAIL_NPRI;
	}
	if ((adPtr->changeMask & AVAIL_CHECK) &&
	    ((adPtr->checkUser < DONT_CHECK) ||
	     (adPtr->checkUser > MAX_CHECK)))
	{
	    result |= AVAIL_CHECK;
	}
	if ((adPtr->changeMask & AVAIL_EVICT) &&
	    (adPtr->evictDelay < MIN_EVICT) &&
	    (adPtr->evictDelay != 0))
	{
	    result |= AVAIL_EVICT;
	}
	if (result == 0) {
	    /*
	     * Everything ok -- change what needs changing.
	     */
	    if (adPtr->changeMask & AVAIL_TOLOCAL) {
		localJobs = adPtr->localJobs;
	    }
	    if (adPtr->changeMask & AVAIL_IDLE) {
		minIdle = adPtr->idleTime;
	    }
	    if (adPtr->changeMask & AVAIL_IDLECRIT) {
		idleCrit = adPtr->idleCrit;
	    }
	    if (adPtr->changeMask & AVAIL_SWAP) {
		minSwapFree = adPtr->swapPct;
	    }
	    if (adPtr->changeMask & AVAIL_LOAD) {
		maxLoad = adPtr->loadAvg;
	    }
	    if (adPtr->changeMask & AVAIL_IMPORTS) {
		maxImports = adPtr->imports;
	    }
	    if (adPtr->changeMask & AVAIL_PROC) {
		minProcFree = adPtr->procs;
	    }
	    if (adPtr->changeMask & AVAIL_CPU) {
		cpuLimit = adPtr->cpuLimit;
	    }
	    if (adPtr->changeMask & AVAIL_MEMORY) {
		memLimit = adPtr->memLimit;
	    }
	    if (adPtr->changeMask & AVAIL_NICE) {
		niceLevel = adPtr->niceLevel;
	    }
	    if (adPtr->changeMask & AVAIL_NPRI) {
		npriLevel = adPtr->npriLevel;
	    }
	    if (adPtr->changeMask & AVAIL_CHECK) {
		checkUser = adPtr->checkUser;
	    }
	    if (adPtr->changeMask & AVAIL_EVICT) {
		evictDelay = adPtr->evictDelay;
	    }
	}
	/*
	 * Set return value: changeMask gets error bits. the other fields get
	 * the current criteria.
	 */
	adPtr->changeMask = result;
	adPtr->localJobs = localJobs;
	adPtr->idleTime = availCheck & AVAIL_IDLE ? minIdle : 0;
	adPtr->idleCrit = idleCrit;
	adPtr->swapPct = availCheck & AVAIL_SWAP ? minSwapFree : 0;
	adPtr->loadAvg = availCheck & AVAIL_LOAD ? maxLoad : 0;
	adPtr->imports = availCheck & AVAIL_IMPORTS ? maxImports : 0;
	adPtr->procs = availCheck & AVAIL_PROC ? minProcFree : 0;
	adPtr->cpuLimit = cpuLimit;
	adPtr->memLimit = memLimit;
	adPtr->niceLevel = niceLevel;
	adPtr->npriLevel = npriLevel;
	adPtr->checkUser = checkUser;
	adPtr->evictDelay = evictDelay;

	/*
	 * Only send a reply if the call was actually remote (it's not
	 * when called from main...)
	 */
	if (fromRemote) {
	    Rpc_Return(msg, len, (Rpc_Opaque)adPtr);
	}
    }
}

/*-
 *-----------------------------------------------------------------------
 * AvailSetInterval --
 *	Alter the interval at which availability checks are made.
 *
 * Results:
 *	None.
 *
 * Side Effects:
 *	The interval in availInterval is changed and availEvent is altered
 *	to reflect this change.
 *
 *-----------------------------------------------------------------------
 */
static void
AvailSetInterval (from, msg, len, intervalPtr)
    struct sockaddr_in	*from;
    Rpc_Message	  	msg;
    int	    	  	len;
    Time_Interval	*intervalPtr;
{
    if (!Local(from)) {
	Rpc_Error(msg, RPC_ACCESS);
    } else if (len != sizeof(Time_Interval)) {
	Rpc_Error(msg, RPC_BADARGS);
    } else if (intervalPtr->tv_sec < MIN_CHECK) {
	Rpc_Error(msg, RPC_BADARGS);
    } else {
	/*
	 * Allow change only by root.
	 */
	CustomsReserved("AvailSetInterval", from, msg);

	availInterval.tv_sec = intervalPtr->tv_sec;
	availInterval.tv_usec = intervalPtr->tv_usec;
	Rpc_EventReset(availEvent, &availInterval);
	Rpc_Return(msg, 0, (Rpc_Opaque)0);
    }
}

/*-
 *-----------------------------------------------------------------------
 * AvailEvict --
 *	Evict current imports from this machine.
 *
 * Results:
 *	None.
 *
 * Side Effects:
 *	Imports are notified of impending eviction (SIGUSR2 by default),
 *	terminated (SIGXCPU by default), or killed (SIGKILL).
 *
 *-----------------------------------------------------------------------
 */
static Boolean
AvailEvict (signo)
    int		signo;
{
    int nJobs;

    nJobs = Import_NJobs();
    Rpc_EventDelete(evictEvent);
    evictEvent = (Rpc_Event)0;

    /*
     * Depending on the severity of the current action, decide what to
     * do next.
     */
    switch (signo) {
    case EVICT_NOTIFY:
	if (verbose) {
	    xlog (XLOG_DEBUG,
		    "AvailEvict: notifying %d import(s) with signal %d",
		    nJobs, signo);
	}
	(void)Import_Kill(0, signo, (struct sockaddr_in *)0);
	/*
	 * Setup grace period for jobs that don't heed the warning.
	 * Prepare for orderly termination after that.
	 */
	evictInterval.tv_sec = evictDelay;
	evictInterval.tv_usec = 0;
	evictEvent = Rpc_EventCreate(&evictInterval, AvailEvict,
	                             (Rpc_Opaque)EVICT_SIGNAL);
	break;
    case EVICT_SIGNAL:
	if (verbose || nJobs > 0) {
	    xlog (verbose ? XLOG_DEBUG : XLOG_INFO,
		    "AvailEvict: evicting %d import(s) with signal %d",
		    nJobs, signo);
	}
	(void)Import_Kill(0, signo, (struct sockaddr_in *)0);
	/*
	 * Set up unconditional termination.
	 */
	evictInterval.tv_sec = GRACE_CPU;
	evictInterval.tv_usec = 0;
	evictEvent = Rpc_EventCreate(&evictInterval, AvailEvict,
	                             (Rpc_Opaque)SIGKILL);
	break;
    case SIGKILL:
	if (verbose || nJobs > 0) {
	    xlog (verbose ? XLOG_DEBUG : XLOG_INFO,
		    "AvailEvict: killing %d import(s) with signal %d",
		    nJobs, signo);
	}
	(void)Import_Kill(0, signo, (struct sockaddr_in *)0);
	/*
	 * It's all over.
	 */
	break;
    default:
	if (verbose)
	    xlog (XLOG_DEBUG, 
		    "AvailEvict: not sure what to do with signal %d", signo);
    }

    return (FALSE);
}

/*-
 *-----------------------------------------------------------------------
 * Avail_Init --
 *	Initialize things for here...
 *
 * Results:
 *	None.
 *
 * Side Effects:
 *	We exit if can't initialize.
 *
 *-----------------------------------------------------------------------
 */
void
Avail_Init(criteria, checkTime)
    Avail_Data	  *criteria;	    /* Initial criteria */
    int	    	  checkTime;	    /* Initial check interval */
{
    /*
     * Set up the load bias computation
     */
    availInterval.tv_sec = LOADDECAY_TIME;
    availInterval.tv_usec = 0;
    decayEvent = Rpc_EventCreate(&availInterval, AvailDecayBias, (Rpc_Opaque)0);

    availCheck = OS_Init();

    availInterval.tv_sec = checkTime ? checkTime : DEF_CHECK_TIME;
    availInterval.tv_usec = 0;
    availEvent = Rpc_EventCreate(&availInterval, Avail_Send, (Rpc_Opaque)0);

    evictEvent = (Rpc_Event)0;
	
    Rpc_ServerCreate(udpSocket, CUSTOMS_AVAILINTV, AvailSetInterval,
		     Swap_Timeval, Rpc_SwapNull, (Rpc_Opaque)0);
    Rpc_ServerCreate(udpSocket, CUSTOMS_SETAVAIL, AvailSet,
		     Swap_Avail, Swap_Avail, (Rpc_Opaque)TRUE);
	
    AvailSet(&localAddr, (Rpc_Message)0, sizeof(Avail_Data), criteria,
	     FALSE);
    if (criteria->changeMask) {
	xlog (XLOG_WARNING,
	      "Default availability criteria contain illegal values");
    }
}

/*-
 *-----------------------------------------------------------------------
 * Avail_Local --
 *	See if the local host is available for migration
 *
 * Results:
 *	0 if it is, else one of the AVAIL bits indicating which criterion
 *	wasn't satisfied.
 *
 * Side Effects:
 *	None.
 *
 *-----------------------------------------------------------------------
 */
int
Avail_Local(what, ratingPtr)
    int		what; 		/* Mask of things to check */
    Rpc_Long	*ratingPtr;	/* Place to store rating of current
				 * availability, or value of violated
				 * criterion. */
{
    static Boolean wasIdle = FALSE;
			/* Idle last time we checked ? */
    int freeCpus;	/* estimated number of unloaded CPUs, in `load' units */

    /*
     * Mask out bits the OS module says it can't check.
     */
    what &= availCheck;

    /*
     * If a minimum idle time was specified, check to make sure the
     * keyboard idle time exceeds that.
     * NOTE: It is important to check for idleness before the other 
     * criteria, because eviction processing depends on the result.
     */
    if ((what & AVAIL_IDLE) && minIdle) {
	long idleTime = OS_Idle(idleCrit);

	if (idleTime < minIdle) {
	    if (verbose) {
		xlog (XLOG_DEBUG,
			"Avail_Local: only %ld seconds idle (minimum %d)",
			idleTime, minIdle);
	    }
	    if (wasIdle) {
		/*
		 * Schedule evictions.
		 */
		wasIdle = FALSE;
		if (evictDelay > 0) {
		    evictInterval.tv_sec = 1;
		    evictInterval.tv_usec = 0;
		    evictEvent = Rpc_EventCreate(&evictInterval, AvailEvict,
		                                 (Rpc_Opaque)EVICT_NOTIFY);
		}
	    }
	    *ratingPtr = idleTime;
	    return AVAIL_IDLE;
	}
    }

    /*
     * Idle again -- abort any evictions.
     */
    wasIdle = TRUE;
    if (evictEvent) {
	if (verbose)
	    xlog (XLOG_DEBUG, "Avail_Local: retracting eviction notice");
	Rpc_EventDelete(evictEvent);
	evictEvent = (Rpc_Event)0;
    }

    /*
     * Check if host is in exclusive use (client has requested that it be
     * the sole user of this machine).
     */
    if (avail_Exclusive >= maxExclusives) {
	if (verbose) {
	    xlog (XLOG_DEBUG, "Avail_Local: in exclusive use");
	}
	/*
	 * Return number of exclusive jobs
	 */
	*ratingPtr = avail_Exclusive;
	return AVAIL_EXCLUSIVE;
    }

    /*
     * If a server host was specified, check that it's alive, to prevent
     * hanging on imports. For simplicity, we assume the server also runs
     * customs, so we ping it.
     */
    if (serverAddr.sin_addr.s_addr != 0) {
	int rstat;

	rstat = Rpc_Call(udpSocket, &serverAddr,
			 (Rpc_Proc)CUSTOMS_PING,
			 0, (Rpc_Opaque)0, 0, (Rpc_Opaque)0,
			 CUSTOMSINT_NRETRY, &retryTimeOut);
	if (rstat != RPC_SUCCESS) {
	    if (verbose) {
		xlog (XLOG_DEBUG,
			"Avail_Local: server ping: %s",
			Rpc_ErrorMessage(rstat));
	    }
	    /*
	     * Return the server address, so reginfo can be more
	     * informative. Note that the rating is byte-swapped, so
	     * the address needs to be transmitted in host order.
	     */
	    *ratingPtr = ntohl(serverAddr.sin_addr.s_addr);
	    return AVAIL_SERVER;
	}
    }

    /*
     * Check that we're not exceeding our importquota for maximum number
     * of simultaneous jobs.
     */
    if ((what & AVAIL_IMPORTS) && maxImports) {
        int nImports = Import_NJobs();

	if (nImports >= maxImports) {
	    *ratingPtr = nImports;
	    return AVAIL_IMPORTS;
	}
    }

    /*
     * Either the machine has been idle long enough or the user didn't
     * specify an idle time, so now, if the user gave a free swap space
     * percentage beyond which the daemon may not go, tally up the total
     * free blocks in the swap map and see if it's too few.
     */
    if ((what & AVAIL_SWAP) && minSwapFree) {
	int swapPct = OS_Swap();
	
	if (swapPct < minSwapFree) {
	    if (verbose) {
		xlog (XLOG_DEBUG, "Avail_Local: only %d%% free swap blocks",
			swapPct);
	    }
	    *ratingPtr = swapPct;
	    return AVAIL_SWAP;
	}
    }

    /*
     * A new criterion for availability is that a minimum additional number of
     * processes can still be created.
     */
    if ((what & AVAIL_PROC) && minProcFree) {
	int procs = OS_Proc();
	
	if (procs < minProcFree) {
	    if (verbose) {
		xlog (XLOG_DEBUG, "Avail_Local: only %d processes left",
			procs);
	    }
	    *ratingPtr = procs;
	    return AVAIL_PROC;
	}
    }

    /*
     * So far so good. Now if the user gave some maximum load average (note
     * that it can't be 0) which the daemon may not exceed, check the current
     * load to make sure it doesn't exceed the limit.
     */
    if ((what & AVAIL_LOAD) && maxLoad > 0) {
	int load = OS_Load() + (int)(avail_LoadBias * LOADSCALE);

	/*
	 * Avoid negative values due to bias
	 */
	if (load < 0) load = 0;

	if (load >= maxLoad) {	/* avoid index 0, so make it >= */
	    if (verbose) {
		xlog (XLOG_DEBUG, "Avail_Local: load = %f (load bias = %.2f)",
			(double) load/LOADSCALE, avail_LoadBias);
	    }
	    *ratingPtr = load;
	    return AVAIL_LOAD;
	}
	/*
	 * We take the maximum load value to be an indication of the number of
	 * available CPUs. The current load value (after bias application) is
	 * is an estimate of how many of those CPUs are busy.  Hence ...
	 */
	freeCpus = maxLoad - load;
    } else {
	/*
	 * If load is not an issue we rate the machine as if it had load 0
	 */
	freeCpus = LOADSCALE;
    }

    /*
     * The rating is proportional to the number of `free' CPUs and the fixed
     * bias for the machine.  Except that more than one free processor isn't
     * really helping a typical (single-threaded) application, so correct for
     * that.
     */
    if (freeCpus > LOADSCALE) {
	freeCpus = LOADSCALE;
    }
    if (avail_Bias == 0) {
	avail_Bias = 1;	/* default bias */
    }
    *ratingPtr = freeCpus * avail_Bias;

    /*
     * Great! This machine is available.
     */
    if (verbose) {
	xlog (XLOG_DEBUG,
		"Avail_Local: overall rating = %ld (bias factor = %d, load bias = %.2f)",
		*ratingPtr, avail_Bias, avail_LoadBias);
    }

    return 0;
}
