
/*
 *         PVM version 3.3:  Parallel Virtual Machine System
 *               University of Tennessee, Knoxville TN.
 *           Oak Ridge National Laboratory, Oak Ridge TN.
 *                   Emory University, Atlanta GA.
 *      Authors:  A. L. Beguelin, J. J. Dongarra, G. A. Geist,
 *    W. C. Jiang, R. J. Manchek, B. K. Moore, and V. S. Sunderam
 *                   (C) 1992 All Rights Reserved
 *
 *                              NOTICE
 *
 * Permission to use, copy, modify, and distribute this software and
 * its documentation for any purpose and without fee is hereby granted
 * provided that the above copyright notice appear in all copies and
 * that both the copyright notice and this permission notice appear in
 * supporting documentation.
 *
 * Neither the Institutions (Emory University, Oak Ridge National
 * Laboratory, and University of Tennessee) nor the Authors make any
 * representations about the suitability of this software for any
 * purpose.  This software is provided ``as is'' without express or
 * implied warranty.
 *
 * PVM version 3 was funded in part by the U.S. Department of Energy,
 * the National Science Foundation and the State of Tennessee.
 */

/*
 *	startup.c
 *
 *	Exec more pvmds.  It's good for you.
 *
$Log: startup.c,v $
 * Revision 1.9  1994/10/15  19:31:31  manchek
 * fixed typo in log message
 *
 * Revision 1.8  1994/06/04  21:45:24  manchek
 * added unix domain sockets.
 * ripped out old (serial) startup code
 *
 * Revision 1.7  1994/06/03  20:38:27  manchek
 * version 3.3.0
 *
 * Revision 1.6  1993/11/30  19:54:41  manchek
 * check the default entry in filehosts when adding new hosts
 *
 * Revision 1.5  1993/11/30  16:46:10  manchek
 * pass whole remote command as a single arg to rsh
 *
 * Revision 1.4  1993/11/30  15:54:37  manchek
 * master pvmd once again doesn't close fds 0..2 -
 * this broke rexec startup
 *
 * Revision 1.3  1993/10/25  20:53:51  manchek
 * fixed a few typos in error log messages.
 * added code to close all fds and reopen 0..2 as /dev/null
 *
 * Revision 1.2  1993/10/04  20:30:30  manchek
 * mksocks() now uses pvmdsockfile() instead of TDSOCKNAME
 *
 * Revision 1.1  1993/08/30  23:26:51  manchek
 * Initial revision
 *
 */

#ifdef IMA_BSD386
#include <machine/endian.h>
#endif
#ifdef IMA_LINUX
#include <endian.h>
#endif
#ifdef IMA_TITN
#include <bsd/sys/types.h>
#else
#include <sys/types.h>
#endif
#include <sys/time.h>
#if defined(IMA_RS6K) || defined(IMA_POWER4)
#include <sys/select.h>
#endif
#include <sys/socket.h>
#ifndef NOUNIXDOM
#include <sys/un.h>
#endif
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <ctype.h>
#ifdef	SYSVSTR
#include <string.h>
#define	CINDEX(s,c)	strchr(s,c)
#else
#include <strings.h>
#define	CINDEX(s,c)	index(s,c)
#endif
#include <netdb.h>
#include <signal.h>

#include "global.h"
#include "fromlib.h"
#include "ddpro.h"
#include "protoglarp.h"
#include "pvmalloc.h"
#include "mesg.h"
#include "host.h"
#include "listmac.h"
#include "tvdefs.h"
#include "bfunc.h"

#ifndef	RSHCOMMAND
#define	RSHCOMMAND	"/usr/ucb/rsh"
#endif

#ifndef	RSHTIMEOUT
#define	RSHTIMEOUT	60
#endif

/* if > 1, uses parallel startup strategy */

#ifndef	RSHNPLL
#define	RSHNPLL	5
#endif

#ifndef	max
#define	max(a,b)	((a)>(b)?(a):(b))
#endif

#ifndef	min
#define	min(a,b)	((a)<(b)?(a):(b))
#endif

#ifndef	SOMAXCONN
#define	SOMAXCONN	5
#endif


struct hst {
	int h_tid;
	char *h_name;
	char *h_login;
	char *h_sopts;
	int h_flag;
#define	HST_PASSWORD	1		/* ask for a password */
#define	HST_MANUAL		2		/* do manual startup */
	char *h_cmd;
	char *h_result;
};

#ifdef	NOTMPNAM
#define	TMPNAMFUN(x)	pvmtmpnam(x)
#define	LEN_OF_TMP_NAM	32
char *pvmtmpnam();

#else	/*NOTMPNAM*/
#define	TMPNAMFUN(x)	tmpnam(x)
#ifdef	L_tmpnam
#define	LEN_OF_TMP_NAM	L_tmpnam
#else
#define	LEN_OF_TMP_NAM	64
#endif
#endif	/*NOTMPNAM*/


/***************
 **  Globals  **
 **           **
 ***************/

char *getenv();

extern int errno;

extern char *inadport_hex();
extern void pvmbailout();
char *pvmdsockfile();
char *pvmgethome();

extern int debugmask;				/* from pvmd.c */
extern char *debugger;				/* from pvmd.c */
extern char **epaths;				/* from pvmd.c */
extern struct htab *filehosts;		/* from pvmd.c */
extern struct htab *hosts;			/* from pvmd.c */
extern char *loclsnam;				/* from pvmd.c */
extern int loclsock;				/* from pvmd.c */
#ifndef NOUNIXDOM
extern char *loclspath;				/* from pvmd.c */
#endif
extern int log_fd;					/* from logging.c */
extern int log_how;					/* from logging.c */
extern char *myarchname;			/* from pvmd.c */
extern int mytid;					/* from pvmd.c */
extern int myunixpid;				/* from pvmd.c */
extern int netsock;					/* from pvmd.c */
extern struct htab *oldhosts;		/* from pvmd.c */
extern int ourudpmtu;				/* from pvmd.c */
extern int ppnetsock;				/* from pvmd.c */
extern int runstate;				/* from pvmd.c */
extern int tidhmask;				/* from pvmd.c */
extern char *username;				/* from pvmd.c */


/***************
 **  Private  **
 **           **
 ***************/

static char rcsid[] = "$Id: startup.c,v 1.9 1994/10/15 19:31:31 manchek Exp $";
static char pvmtxt[1024];		/* scratch for error log */


/*	mksocs()
*
*	Make UDP sockets netsock and ppnetsock.  Make TCP master socket
*	loclsock.
*
*	Returns 0 if ok,
*	else 2 if pvmd already running,
*	else 1.
*/

int
mksocs()
{
	struct hostd *hp = hosts->ht_hosts[hosts->ht_local];
	struct hostd *hp0 = hosts->ht_hosts[0];
	struct sockaddr_in sin;
	char buf[128];
	char *sfn;
	int d;
#ifndef NOSOCKOPT
	int bsz;
#endif
	char *p;
	int cc;
#ifndef NOUNIXDOM
	char spath[LEN_OF_TMP_NAM];	/* local socket path */
	struct sockaddr_un uns;
#endif

	/*
	* make pvmd-pvmd socket
	*/

	if ((netsock = socket(AF_INET, SOCK_DGRAM, 0)) == -1) {
		pvmlogperror("mksocs() socket netsock");
		return 1;
	}

	hp->hd_sad.sin_port = 0;
	if (bind(netsock, (struct sockaddr*)&hp->hd_sad, sizeof(hp->hd_sad)) == -1)
	{
		pvmlogperror("mksocs() bind netsock");
		return 1;
	}
	cc = sizeof(hp->hd_sad);
	if (getsockname(netsock, (struct sockaddr*)&hp->hd_sad, &cc) == -1) {
		pvmlogperror("mksocs() getsockname netsock");
		return 1;
	}

	/*
	* make pvmd-pvmd' socket
	*/

	if ((ppnetsock = socket(AF_INET, SOCK_DGRAM, 0)) == -1) {
		pvmlogperror("mksocs() socket ppnetsock");
		return 1;
	}

	hp0->hd_sad.sin_port = 0;
	if (bind(ppnetsock, (struct sockaddr*)&hp0->hd_sad, sizeof(hp0->hd_sad))
	== -1) {
		pvmlogperror("mksocs() bind ppnetsock");
		return 1;
	}
	cc = sizeof(hp0->hd_sad);
	if (getsockname(ppnetsock, (struct sockaddr*)&hp0->hd_sad, &cc) == -1) {
		pvmlogperror("mksocs() getsockname ppnetsock");
		return 1;
	}

	/*
	* make pvmd-local task socket
	*/

#ifdef NOUNIXDOM
	if ((loclsock = socket(AF_INET, SOCK_STREAM, 0)) == -1) {
		pvmlogperror("mksocs() socket loclsock");
		return 1;
	}

	/*
	* first try localhost address (loopback) then regular address
	* XXX 127.0.0.1 is a hack, we should really gethostbyaddr()
	*/

	BZERO((char*)&sin, sizeof(sin));
	sin.sin_family = AF_INET;
	sin.sin_addr.s_addr = htonl(0x7f000001);
	sin.sin_port = 0;

	if (bind(loclsock, (struct sockaddr*)&sin, sizeof(sin)) == -1) {
		sin = hp->hd_sad;
		if (bind(loclsock, (struct sockaddr*)&sin, sizeof(sin)) == -1) {
			pvmlogperror("mksocs() bind loclsock");
			return 1;
		}
	}
	cc = sizeof(sin);
	if (getsockname(loclsock, (struct sockaddr*)&sin, &cc) == -1) {
		pvmlogperror("mksocs() getsockname loclsock");
		return 1;
	}

	if (listen(loclsock, SOMAXCONN) == -1) {
		pvmlogperror("mksocs() listen loclsock");
		return 1;
	}

#ifndef NOSOCKOPT
	bsz = ourudpmtu * 2;
	if (setsockopt(netsock, SOL_SOCKET, SO_SNDBUF,
			(char*)&bsz, sizeof(bsz)) == -1
	|| setsockopt(netsock, SOL_SOCKET, SO_RCVBUF,
			(char*)&bsz, sizeof(bsz)) == -1
	|| setsockopt(ppnetsock, SOL_SOCKET, SO_SNDBUF,
			(char*)&bsz, sizeof(bsz)) == -1
	|| setsockopt(ppnetsock, SOL_SOCKET, SO_RCVBUF,
			(char*)&bsz, sizeof(bsz)) == -1) {
		pvmlogperror("mksocs() setsockopt");
		return 1;
	}
#endif /*NOSOCKOPT*/

	p = inadport_hex(&sin);

#else /*NOUNIXDOM*/
	if ((loclsock = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) {
		pvmlogperror("mksocs() socket loclsock");
		return 1;
	}

	BZERO((char*)&uns, sizeof(uns));
	uns.sun_family = AF_UNIX;
	spath[0] = 0;
	(void)TMPNAMFUN(spath);
	strcpy(uns.sun_path, spath);
/*
XXX len?
*/

	if (bind(loclsock, (struct sockaddr*)&uns, sizeof(uns)) == -1) {
		pvmlogperror("mksocs() bind loclsock");
		return 1;
	}

	if (listen(loclsock, SOMAXCONN) == -1) {
		pvmlogperror("mksocs() listen loclsock");
		return 1;
	}

	loclspath = STRALLOC(spath);
	p = spath;

#endif /*NOUNIXDOM*/

	/*
	* make pvmd-local task socket address file
	*/

	if (!(sfn = pvmdsockfile())) {
		pvmlogerror("mksocs() pvmdsockfile() failed\n");
		pvmbailout(0);
	}

	if ((d = open(sfn, O_CREAT|O_EXCL|O_WRONLY|O_TRUNC, 0600)) == -1) {
		if (errno == EEXIST) {
#ifndef	OVERLOADHOST
			(void)sprintf(pvmtxt,
					"mksocs() %s exists.  pvmd already running?\n", sfn);
			pvmlogerror(pvmtxt);
			return 2;
#endif

		} else {
			pvmlogperror(sfn);
			pvmlogerror("mksocs() can't write address file\n");
			return 1;
		}

	} else {
		cc = write(d, p, strlen(p));
		if (cc != strlen(p)) {
			if (cc == -1) {
				pvmlogperror(sfn);
				pvmlogerror("mksocs() can't write address file\n");

			} else {
				(void)sprintf(pvmtxt,
						"mksocs() aargh, short write on %s: %d\n", sfn, cc);
				pvmlogerror(pvmtxt);
				pvmlogerror("mksocs() is the partition full?\n");
			}
			(void)close(d);
			(void)unlink(sfn);
			return 1;
		}
		loclsnam = STRALLOC(sfn);
		(void)close(d);
	}

	/* set PVMSOCK envar */

	sprintf(buf, "PVMSOCK=%s", p);
	p = STRALLOC(buf);
	pvmputenv(p);

	return 0;
}


/*	colonsep()
*
*	Break string into substrings on ':' delimiter.
*	Return null-terminated array of strings, in new malloc'd space.
*	Modifies the original string.
*/

char **
colonsep(s)
	char *s;	/* the string to break up */
{
	char **els;
	int nel = 2;			/* length of els */
	char *p, *q;

	for (p = s; p = CINDEX(p, ':'); p++)
		nel++;
	els = TALLOC(nel, char*, "path");

	nel = 0;
	for (p = s; p; p = q) {
		if (q = CINDEX(p, ':'))
			*q++ = 0;
		els[nel++] = p;
	}
	els[nel] = 0;
	return els;
}


/*	varsub()
*
*	Substitute environment variables into string.
*	Variables named by $NAME or ${NAME}.
*	Return string in new malloc'd space.
*/

char *
varsub(s)
	char *s;
{
	int rm = 8;		/* length of result string space */
	char *r;		/* result string */
	int rl = 0;
	char *p;
	char *vn, *vv;
	char c;
	int l;

	r = TALLOC(rm, char, "var");
	while (*s) {
		for (p = s; *p && *p != '$'; p++) ;
		if (l = p - s) {
			if (rl + l >= rm) {
				rm = rl + l + 1;
				r = TREALLOC(r, rm, char);
			}
			strncpy(r + rl, s, l);
			rl += l;
		}
		s = p++;
		if (*s == '$') {
			if (*p == '{')
				p++;
			vn = p;
			while (isalnum(*p) || *p == '_')
				p++;
			c = *p;
			*p = 0;
			vv = getenv(vn);
			*p = c;
			if (*p == '}')
				p++;
			if (vv)
				l = strlen(vv);
			else {
				vv = s;
				l = p - s;
			}
			if (l) {
				if (rl + l >= rm) {
					rm = rl + l + 1;
					r = TREALLOC(r, rm, char);
				}
				strncpy(r + rl, vv, l);
				rl += l;
			}
			s = p;
		}
	}
	r[rl] = 0;
	return r;
}


/*	crunchzap()
*
*	Parse a string into words delimited by <> pairs.
*	Max number of words is original value of *acp.
*
*	Trashes out the original string.
*	Returns 0 with av[0]..av[*acp - 1] pointing to the words.
*	Returns 1 if too many words.
*/

crunchzap(s, acp, av)
	char *s;		/* the string to parse */
	int *acp;		/* max words in, ac out */
	char **av;		/* pointers to args */
{
	register int ac;
	register char *p = s;
	register n = *acp;

	/* separate out words of command */

	ac = 0;
	while (*p) {
		while (*p && *p++ != '<');
		if (*p) {
			if (ac >= n) {
	/* command too long */
				*acp = ac;
				return 1;
			}
			av[ac++] = p;
			while (*p && *p != '>') p++;
			if (*p) *p++ = 0;
		}
	}
	*acp = ac;
	return 0;
}


/*	master_config()
*
*	Master pvmd.  Config a host table with length 1.
*/

master_config(hn, argc, argv)
	char *hn;			/* hostname or null */
	int argc;
	char **argv;
{
	struct hostent *he;
	struct hostd *hp;
	struct hostd *hp2;
	int i;
	char *s;

	if (argc > 2) {
		pvmlogerror("usage: pvmd3 [-ddebugmask] [-nhostname] [hostfile]\n");
		pvmbailout(0);
	}
	if (argc == 2) {
		filehosts = readhostfile(argv[1]);
	}
	if (debugmask & PDMSTARTUP) {
		if (filehosts) {
			pvmlogerror("master_config() host file:\n");
			ht_dump(filehosts);

		} else
			pvmlogerror("master_config() null host file\n");
	}

	hosts = ht_new(1);
	hosts->ht_serial = 1;
	hosts->ht_master = 1;
	hosts->ht_cons = 1;
	hosts->ht_local = 1;

	hp = hd_new(1);
	hp->hd_name = STRALLOC(hn);
	hp->hd_arch = STRALLOC(myarchname);
	hp->hd_mtu = UDPMAXLEN;
	ht_insert(hosts, hp);
	hd_unref(hp);

	hp = hd_new(0);
	hp->hd_name = STRALLOC("pvmd'");
	hp->hd_arch = STRALLOC(myarchname);
	hp->hd_mtu = UDPMAXLEN;
	ht_insert(hosts, hp);
	hd_unref(hp);

	/*
	* get attributes from host file if available
	*/

	hp = hosts->ht_hosts[1];
	if (filehosts &&
			((hp2 = nametohost(filehosts, hp->hd_name))
			|| (hp2 = filehosts->ht_hosts[0]))) {
		applydefaults(hp, hp2);
	}

	if (!hp->hd_epath)
		hp->hd_epath = STRALLOC(DEFBINDIR);
	epaths = colonsep(varsub(hp->hd_epath));
	if (!hp->hd_bpath)
		hp->hd_bpath = STRALLOC(DEFDEBUGGER);
	debugger = varsub(hp->hd_bpath);
	if (!hp->hd_wdir)
		hp->hd_wdir = STRALLOC(pvmgethome());
	s = varsub(hp->hd_wdir);
	if (chdir(s) == -1)
		pvmlogperror(s);
	PVM_FREE(s);

	if (!(he = gethostbyname(hp->hd_aname ? hp->hd_aname : hp->hd_name))) {
		sprintf(pvmtxt, "master_config() %s: can't gethostbyname\n", hn);
		pvmlogerror(pvmtxt);
		pvmbailout(0);
	}

	BCOPY(he->h_addr_list[0], (char*)&hp->hd_sad.sin_addr,
		sizeof(struct in_addr));

	hp = hosts->ht_hosts[0];
	BCOPY(he->h_addr_list[0], (char*)&hp->hd_sad.sin_addr,
		sizeof(struct in_addr));

	if (mksocs())
		pvmbailout(0);

	/* print local socket address on stdout in case someone cares */

	printf("%s\n", getenv("PVMSOCK"));
	fflush(stdout);

	/* close everything but our sockets */

	for (i = getdtablesize(); --i > 2; )
/* XXX don't like this - hard to maintain */
		if (i != netsock && i != ppnetsock && i != loclsock && i != log_fd)
			(void)close(i);

	/* reopen 0, 1, 2*/

	(void)open("/dev/null", O_RDONLY, 0);
	(void)open("/dev/null", O_WRONLY, 0);
	(void)dup2(1, 2);

	log_how &= ~1;

	runstate = PVMDNORMAL;
	return 0;
}


/*	slave_config()
*
*	Slave pvmd being started by master.  Trade minimal config info
*	so we can send packets back and forth.
*/

slave_config(hn, argc, argv)
	char *hn;
	int argc;
	char **argv;
{
	int lh;			/* local host index */
	int mh;			/* master host index */
	struct hostd *hp;
	int i, j;
	int ac;
	int ms = 0;		/* manual (humanoid) startup */
	int dof = 1;	/* fork, exit parent (default) */
	int bad = 0;
	char *p;

	for (i = j = ac = 1; i < argc; i++) {
		if (argv[i][0] == '-') {
			switch (argv[i][1]) {

			case 'S':
				ms = 1;
				break;

			case 'f':
				dof = 0;
				break;

			default:
				sprintf(pvmtxt, "slave_config() unknown switch: %s\n", argv[i]);
				pvmlogerror(pvmtxt);
				bad++;
			}

		} else {
			argv[j++] = argv[i];
			ac++;
		}
	}
	argc = ac;

	if (bad || argc != 6) {
		pvmlogerror("slave_config: bad args\n");
		pvmbailout(0);
	}

	mh = atoi(argv[1]);
	lh = atoi(argv[4]);
	hosts = ht_new(1);
	hosts->ht_serial = 1;
	hosts->ht_master = mh;
	hosts->ht_cons = mh;
	hosts->ht_local = lh;

	hp = hd_new(mh);
	hp->hd_name = STRALLOC("?");
	hex_inadport(argv[2], &hp->hd_sad);
	hp->hd_mtu = atoi(argv[3]);
	ht_insert(hosts, hp);
	hd_unref(hp);

	hp = hd_new(0);
	hp->hd_name = STRALLOC("pvmd'");
	hp->hd_arch = STRALLOC(myarchname);
	hp->hd_mtu = UDPMAXLEN;
	hex_inadport(argv[5], &hp->hd_sad);
	ht_insert(hosts, hp);
	hd_unref(hp);

	hp = hd_new(lh);
	hp->hd_name = STRALLOC(hn);
	hp->hd_arch = STRALLOC(myarchname);
	hp->hd_mtu = UDPMAXLEN;
	hex_inadport(argv[5], &hp->hd_sad);
	ht_insert(hosts, hp);
	hd_unref(hp);

	if (i = mksocs()) {
		if (i == 2) {
			printf("PvmDupHost\n");
			fflush(stdout);
		}
		pvmbailout(0);
	}

	printf("ddpro<%d> arch<%s> ip<%s> mtu<%d>\n",
		DDPROTOCOL,
		myarchname,
		inadport_hex(&hp->hd_sad),
		ourudpmtu);
	fflush(stdout);

	if (!ms)
		(void)read(0, (char*)&i, 1);

	if (dof) {
		if (i = fork()) {
			if (i == -1)
				pvmlogperror("slave_config() fork");
			exit(0);
		}

	/* close everything but our sockets */

		for (i = getdtablesize(); --i >= 0; )
/* XXX don't like this - hard to maintain */
			if (i != netsock && i != loclsock && i != log_fd)
				(void)close(i);
	}

	/* reopen 0, 1, 2*/

	(void)open("/dev/null", O_RDONLY, 0);
	(void)open("/dev/null", O_WRONLY, 0);
	(void)dup2(1, 2);

	log_how &= ~1;

	p = STRALLOC(DEFBINDIR);
	epaths = colonsep(varsub(p));
	PVM_FREE(p);
	p = STRALLOC(DEFDEBUGGER);
	debugger = varsub(p);
	PVM_FREE(p);
	runstate = PVMDSTARTUP;
	return 0;
}


int
hoster(mp)
	struct mesg *mp;
{
	struct mesg *mp2;
	int num;
	int i;
	struct hst **hostlist;
	struct hst *hp;
	char *p;

	/*
	* unpack the startup message
	*/

	upkint(mp, &num);
	if (debugmask & PDMSTARTUP) {
		sprintf(pvmtxt, "hoster() %d to start\n", num);
		pvmlogerror(pvmtxt);
	}
	if (num > 0) {
		hostlist = TALLOC(num, struct hst *, "xxx");
		for (i = 0; i < num; i++) {
			hp = TALLOC(1, struct hst, "xxx");
			hostlist[i] = hp;
			hp->h_flag = 0;
			hp->h_result = 0;
			if (upkint(mp, &hp->h_tid)
			|| upkstralloc(mp, &hp->h_sopts)
			|| upkstralloc(mp, &hp->h_login)
			|| upkstralloc(mp, &hp->h_cmd)) {
				pvmlogerror("hoster() bad message format\n");
				pvmbailout(0);
			}
			if (debugmask & PDMSTARTUP) {
				sprintf(pvmtxt, "%d. t%x %s so=\"%s\"\n", i,
						hp->h_tid,
						hp->h_login,
						hp->h_sopts);
				pvmlogerror(pvmtxt);
			}
			if (p = CINDEX(hp->h_login, '@')) {
				hp->h_name = STRALLOC(p + 1);
				*p = 0;
				p = STRALLOC(hp->h_login);
				PVM_FREE(hp->h_login);
				hp->h_login = p;

			} else {
				hp->h_name = hp->h_login;
				hp->h_login = 0;
			}
			if (!strcmp(hp->h_sopts, "pw"))
				hp->h_flag |= HST_PASSWORD;
			if (!strcmp(hp->h_sopts, "ms"))
				hp->h_flag |= HST_MANUAL;
		}
	}

	/*
	* do it
	*/

	pl_startup(num, hostlist);

	/*
	* send results back to pvmd
	*/

	mp2 = mesg_new(0);
	mp2->m_dst = mp->m_src;
	mp2->m_cod = DM_STARTACK;
	mp2->m_wid = mp->m_wid;
	pkint(mp2, num);
	for (i = 0; i < num; i++) {
		pkint(mp2, hostlist[i]->h_tid);
		pkstr(mp2, hostlist[i]->h_result
				? hostlist[i]->h_result : "PvmDSysErr");
	}
	if (debugmask & PDMSTARTUP)
		pvmlogerror("hoster() pvmd' sending back host table\n");
	sendmessage(mp2);
	work();		/* no return */
	return 0;	/* not reached */
}


/********************************************
*  this is the new (parallel) startup code  *
*                                           *
********************************************/

struct slot {
	struct slot *s_link, *s_rlink;		/* free/active list */
	struct hst *s_hst;					/* host table entry */
	struct timeval s_bail;				/* timeout time */
	int s_rfd, s_wfd, s_efd;			/* slave stdin/out/err */
	char s_buf[256];					/* config reply line */
	int s_len;							/* length of s_buf */
};


static struct slot slots[RSHNPLL+2];	/* state var/context for each slot */
static struct slot *slfree = 0;			/* free list of slots */

close_slot(sp)
	struct slot *sp;
{
	if (sp->s_wfd != -1)
		(void)close(sp->s_wfd);
	if (sp->s_rfd != -1)
		(void)close(sp->s_rfd);
	if (sp->s_efd != -1)
		(void)close(sp->s_efd);
	LISTDELETE(sp, s_link, s_rlink);
	LISTPUTBEFORE(slfree, sp, s_link, s_rlink);
	return 0;
}


pl_startup(num, hostlist)
	int num;
	struct hst **hostlist;
{
	int nxth = 0;						/* next host in list to start */
	struct slot *slact = 0;				/* active list of slots */
	struct hst *hp;
	struct slot *sp, *sp2;
	struct timeval tnow;
	struct timeval tout;
	struct fd_set rfds;
	int nfds;
	int i;
	int n;
	char *p;
	char ebuf[256];						/* for reading stderr */

	/* init slot free list */

	slfree = &slots[RSHNPLL+1];
	slfree->s_link = slfree->s_rlink = slfree;
	slact = &slots[RSHNPLL];
	slact->s_link = slact->s_rlink = slact;
	for (i = RSHNPLL; i-- > 0; ) {
		LISTPUTAFTER(slfree, &slots[i], s_link, s_rlink);
	}

	/*
	* keep at this until all hosts in table are completed
	*/

	for (; ; ) {

		/*
		* if empty slots, start on new hosts
		*/

		for (; ; ) {

			/* find a host for slot */

			if (slfree->s_link != slfree && nxth < num)
				hp = hostlist[nxth++];
			else
				break;

			sp = slfree->s_link;
			LISTDELETE(sp, s_link, s_rlink);
			sp->s_hst = hp;
			sp->s_len = 0;
			if (debugmask & PDMSTARTUP) {
				sprintf(pvmtxt, "pl_startup() trying %s\n", hp->h_name);
				pvmlogerror(pvmtxt);
			}
			phase1(sp);
			if (hp->h_result) {
				/* error or fully started (manual startup) */

				LISTPUTBEFORE(slfree, sp, s_link, s_rlink);

			} else {
				/* partially started */

				LISTPUTBEFORE(slact, sp, s_link, s_rlink);
				gettimeofday(&sp->s_bail, (struct timezone*)0);
				tout.tv_sec = RSHTIMEOUT;
				tout.tv_usec = 0;
				TVXADDY(&sp->s_bail, &sp->s_bail, &tout);
			}
		}

		/* if no hosts in progress, we are finished */

		if (slact->s_link == slact)
			break;

		/*
		* until next timeout, get output from any slot
		*/

		FD_ZERO(&rfds);
		nfds = 0;
		TVCLEAR(&tout);
		gettimeofday(&tnow, (struct timezone*)0);
		for (sp = slact->s_link; sp != slact; sp = sp->s_link) {
			if (TVXLTY(&sp->s_bail, &tnow)) {
				sprintf(pvmtxt, "pl_startup() %s timed out after %d secs\n",
						sp->s_hst->h_name, RSHTIMEOUT);
				pvmlogerror(pvmtxt);
				sp->s_hst->h_result = STRALLOC("PvmCantStart");
				sp2 = sp->s_rlink;
				close_slot(sp);
				sp = sp2;
				continue;
			}

			if (!TVISSET(&tout) || TVXLTY(&sp->s_bail, &tout))
				tout = sp->s_bail;
			if (sp->s_rfd >= 0)
				FD_SET(sp->s_rfd, &rfds);
			if (sp->s_rfd > nfds)
				nfds = sp->s_rfd;
			if (sp->s_efd >= 0)
				FD_SET(sp->s_efd, &rfds);
			if (sp->s_efd > nfds)
				nfds = sp->s_efd;
		}

		if (slact->s_link == slact)
			break;

		nfds++;

		if (TVXLTY(&tnow, &tout)) {
			TVXSUBY(&tout, &tout, &tnow);
		} else {
			TVCLEAR(&tout);
		}
		if (debugmask & PDMSTARTUP) {
			sprintf(pvmtxt, "pl_startup() select timeout is %d.%06d\n",
					tout.tv_sec, tout.tv_usec);
			pvmlogerror(pvmtxt);
		}
		if ((n = select(nfds, &rfds, (fd_set*)0, (fd_set*)0, &tout)) == -1) {
			if (errno != EINTR) {
				pvmlogperror("pl_startup() select");
				pvmbailout(0);
			}
		}
		if (debugmask & PDMSTARTUP) {
			(void)sprintf(pvmtxt, "pl_startup() select returns %d\n", n);
			pvmlogerror(pvmtxt);
		}
		if (n < 1) {
			if (n == -1 && errno != EINTR) {
				pvmlogperror("pl_startup() select");
				pvmbailout(0);	/* XXX this is too harsh */
			}
			continue;
		}

		/*
		* check for response on stdout or stderr of any slave.
		*/

		for (sp = slact->s_link; sp != slact; sp = sp->s_link) {

			/*
			* stdout ready.  get complete line then scan config info from it.
			*/
			if (sp->s_rfd >= 0 && FD_ISSET(sp->s_rfd, &rfds)) {
				n = read(sp->s_rfd, sp->s_buf + sp->s_len,
						sizeof(sp->s_buf) - sp->s_len);
				if (n > 0) {
					sp->s_len += n;
					if (sp->s_len >= sizeof(sp->s_buf)) {
						sprintf(pvmtxt, "pl_startup() pvmd@%s: big read\n",
								sp->s_hst->h_name);
						pvmlogerror(pvmtxt);
						sp->s_hst->h_result = STRALLOC("PvmCantStart");
					}
					sp->s_buf[sp->s_len] = 0;
					if (p = CINDEX(sp->s_buf + sp->s_len - n, '\n')) {
						if (debugmask & PDMSTARTUP) {
							sprintf(pvmtxt, "pvmd@%s: %s",
									sp->s_hst->h_name, sp->s_buf);
							pvmlogerror(pvmtxt);
						}
						*p = 0;
						sp->s_hst->h_result = STRALLOC(sp->s_buf);
					}

				} else {
					if (n) {
						sprintf(pvmtxt, "pl_startup() pvmd@%s",
								sp->s_hst->h_name);
						pvmlogperror(pvmtxt);
					} else {
						sprintf(pvmtxt, "pl_startup() pvmd@%s: EOF\n",
								sp->s_hst->h_name);
						pvmlogerror(pvmtxt);
					}
					sp->s_hst->h_result = STRALLOC("PvmCantStart");
				}
				if (sp->s_hst->h_result) {
					sp2 = sp->s_rlink;
					close_slot(sp);
					sp = sp2;
					continue;
				}
			}

			/*
			* response on stderr.  log prefixed by remote's host name.
			*/
			if (sp->s_efd >= 0 && FD_ISSET(sp->s_efd, &rfds)) {
				if ((n = read(sp->s_efd, ebuf, sizeof(ebuf)-1)) > 0) {
					char *p = ebuf, *q, c;

					ebuf[n] = 0;
					sprintf(pvmtxt, "pvmd@%s: ", sp->s_hst->h_name);
					q = pvmtxt + strlen(pvmtxt);
					while (c = *p++ & 0x7f) {
						if (isprint(c))
							*q++ = c;

						else {
							*q++ = '^';
							*q++ = (c + '@') & 0x7f;
						}
					}
					*q++ = '\n';
					*q = 0;
					pvmlogerror(pvmtxt);

				} else {
					(void)close(sp->s_efd);
					sp->s_efd = -1;
				}
			}
		}
	}
	return 0;
}


phase1(sp)
	struct slot *sp;
{
	struct hst *hp;
	char *hn;
	char *av[16];			/* for rsh args */
	int ac;
	char buf[512];
	int pid = -1;			/* pid of rsh */
	char *p;

#ifndef NOREXEC
	struct servent *se;
	static u_short execport = 0;

	if (!execport) {
		if (!(se = getservbyname("exec", "tcp"))) {
			sprintf(pvmtxt, "phase1() can't getservbyname(): %s\n", "exec");
			pvmbailout(0);
		}
		execport = se->s_port;
		endservent();
	}
#endif

	hp = sp->s_hst;
	hn = hp->h_name;
	sp->s_rfd = sp->s_wfd = sp->s_efd = -1;

	/*
	* XXX manual startup hack... this is if we can't use rexec or rsh
	*/

	if (hp->h_flag & HST_MANUAL) {
		fprintf(stderr, "*** Manual startup ***\n");
		fprintf(stderr, "Login to \"%s\" and type:\n", hn);
		fprintf(stderr, "%s\n", hp->h_cmd);

	/* get version */

		fprintf(stderr, "Type response: ");
		fflush(stderr);
		if (!(fgets(buf, sizeof(buf), stdin))) {
			sprintf(pvmtxt, "host %s read error\n", hn);
			pvmlogerror(pvmtxt);
			goto oops;
		}
		p = buf + strlen(buf) - 1;
		if (*p == '\n')
			*p = 0;
		hp->h_result = STRALLOC(buf);
		fprintf(stderr, "Thanks\n");
		fflush(stderr);
		return 0;
	}

	/*
	* XXX end manual startup hack
	*/

	if (!(hp->h_flag & HST_PASSWORD)) {		/* use rsh to start */
		int wpfd[2], rpfd[2], epfd[2];
		int i;

		if (debugmask & PDMSTARTUP) {
			sprintf(pvmtxt, "phase1() trying rsh to %s\n", hn);
			pvmlogerror(pvmtxt);
		}

	/* fork an rsh to startup the slave pvmd */

#ifdef	IMA_TITN
		if (socketpair(AF_UNIX, SOCK_STREAM, 0, wpfd) == -1
		|| socketpair(AF_UNIX, SOCK_STREAM, 0, rpfd) == -1
		|| socketpair(AF_UNIX, SOCK_STREAM, 0, epfd) == -1) {
			pvmlogperror("phase1() socketpair");
			goto oops;
		}
#else
		if (pipe(wpfd) == -1 || pipe(rpfd) == -1 || pipe(epfd) == -1) {
			pvmlogperror("phase1() pipe");
			goto oops;
		}
#endif

		if (debugmask & PDMSTARTUP) {
			sprintf(pvmtxt, "phase1() pipes: %d %d %d %d %d %d\n",
					wpfd[0], wpfd[1], rpfd[0], rpfd[1], epfd[0], epfd[1]);
			pvmlogerror(pvmtxt);
		}

		if ((pid = fork()) == -1) {
			pvmlogperror("phase1() fork");
			pvmbailout(0);
		}
		if (!pid) {
			(void)dup2(wpfd[0], 0);
			(void)dup2(rpfd[1], 1);
			(void)dup2(epfd[1], 2);
			for (i = getdtablesize(); --i > 2; )
				(void)close(i);
			ac = 0;
			av[ac++] = RSHCOMMAND;
			av[ac++] = hn;
			if (hp->h_login) {
				av[ac++] = "-l";
				av[ac++] = hp->h_login;
			}
			av[ac++] = hp->h_cmd;
			av[ac++] = 0;
			if (debugmask & PDMSTARTUP) {
				for (ac = 0; av[ac]; ac++)
					fprintf(stderr, "av[%d]=\"%s\" ", ac, av[ac]);
				fputc('\n', stderr);
			}
			execvp(av[0], av);
			fputs("phase1() execvp failed\n", stderr);
			fflush(stderr);
			_exit(1);
		}
		(void)close(wpfd[0]);
		(void)close(rpfd[1]);
		(void)close(epfd[1]);
		sp->s_wfd = wpfd[1];
		sp->s_rfd = rpfd[0];
		sp->s_efd = epfd[0];

	} else {		/* use rexec to start */

#ifdef NOREXEC
		sprintf(pvmtxt, "slconfg() sorry, no rexec()\n");
		pvmlogerror(pvmtxt);
		goto oops;
#else
		if (debugmask & PDMSTARTUP) {
			sprintf(pvmtxt, "phase1() rexec \"%s\"\n", hp->h_cmd);
			pvmlogerror(pvmtxt);
		}
		if ((sp->s_wfd = sp->s_rfd = rexec(&hn, execport,
				(hp->h_login ? hp->h_login : username),
				(char*)0, hp->h_cmd, &sp->s_efd))
		== -1) {
			sprintf(pvmtxt, "phase1() rexec failed for host %s\n", hn);
			pvmlogerror(pvmtxt);
			goto oops;
		}
#endif
	}
	return 0;

oops:
	hp->h_result = STRALLOC("PvmCantStart");
	if (sp->s_wfd != -1)
		close(sp->s_wfd);
	if (sp->s_rfd != -1)
		close(sp->s_rfd);
	if (sp->s_efd != -1)
		close(sp->s_efd);
	sp->s_wfd = sp->s_rfd = sp->s_efd = -1;
	return 1;
}


