/* This program compares two text files and displays the
   differences. Finding the end of a difference between the
   files requires some arbitrary rules since we don't want to
   generate a series of small difference reports just because
   the old and new text have a few words in common.

   To avoid this problem, comparisons are on a "line" basis
   and the user can specify the line delimiter character and
   how many lines must match to end a difference. The end of 
   file mark is also a line delimiter, but is not considered
   part of the line.  The null character is used as an end of
   buffer mark and is converted to endfile if read from a file.

   Note that the last line in a buffer may be incomplete
   but will be treated as complete.

   Since linefeeds and the delimiter are converted by the output routine to CR-LF pairs,
   to avoid multiple linefeeds or carriage returns, the user may
   specify one character code to be deleted from all file input.

   The CP/M command line has the format:
	DF filea fileb options
   where filea and fileb may include drives as in b:myname.doc and
   where options consist of key letters followed immediately by a decimal number.
   Valid options are:
	D decimal value of delimiter character
	M number of lines which must match to end an area of difference
	I decimal value of character code to be deleted on iniut.
   Defaults are linefeed, 2, carriage return.

   Although normal, small areas of difference are processed quickly,
   large differences can take several minutes to analyze. Be patient.
CHANGE HISTORY:
1.2	Changed  calling args of form iob[] to &iob[].
	This was caught by the 1.31+ BDS C compiler.
1.3	Recompiled with corrected movmem.
1.3	Strip parity from input files in case built by Wordstar.
1.4	Adapt bufferred file i/o to BDS-C 1.4.
1.5	Recompile by BDS C 1.4+.
*/

#include "bdscio.h"

#define VERSION "1.5 10/05/80"

#define EOF 0x1a	/* end of file mark */
#define MAXDIFF 8192	/* difference buffer size */

main(argc, argv)
int argc;	/* Number of arguments passed from CP/M. The first is garbage. */
char *argv[];	/* array of pointers to argument strings */
{
	/* Storage declarations */
	struct _buf iob[2];		/* File buffers with */
	char buff[2][MAXDIFF+1];	/* Text difference buffers with delimiter bytes at ends*/
	int arg, file;
	char *p;
	int nm;				/* Number of matches arg */
	char delim;			/* Line delimiter */
	char ignore;			/* Input character to be ignored */
	int topline[2];			/* Top line numbers*/
	int n;				/* Miscellaneous */

	/* Set default arguments */
	nm = 2;		/* Number of matches to resynchronize */
	delim = '\n';	/* Line delimiter (linefeed)*/
	ignore = 0x0D;	/* Ignore carriage returns in file input.*/
	puts("DIFFERENCE DISPLAY FOR PRINTABLE FILES - VERSION ");
	puts(VERSION);
	puts("\nBY:\tRICHARD GREENLAW\n\t251 COLONY CT.\n\tGAHANNA, OHIO 43230\n(OMIT ARGUMENTS FOR HELP)\n");



	if(argc < 3) {
		puts("USEAGE:\nDF FILEA FILEB OPTIONS\n");
		puts("WHERE FILES CAN SPECIFY DRIVES AND\nOPTIONS ARE LETTER CODES FOLLOWED BY DECIMAL NUMBERS:\n");
		puts("\tD\tLINE DELIMITER CHARACTER (DEFAULT=LF)\n");
		puts("\tI\tINPUT CHARACTER TO BE IGNORED (DEFAULT=CR)\n");
		puts("\tM\tNUMBER OF MATCHING LINES TO END DIFFERENCE (2)\n");
		error("EXAMPLE: DF A:XY.C B:XY.BAK M3 I10 D13");
	}
	/* Open the files specified in the CP/M command line */
	for(file = 0; file < 2; ++file) {
		/* Protect against wild card name which could rename a good file */
		for(p = argv[file+1]; *p; ++p)
			if(*p == '?')
				error("AMBIGUOUS FILE NAME NOT ALLOWED\n");
		/* Try to open it */
		if(fopen(argv[file+1], &iob[file]) < 0) {
			puts(argv[file+1]);
			error(" WON'T OPEN\n");
		}
	}

	/* Process options, if any, from command line */
	/* Format is Letter followed by decimal number */
	for(arg = 3; arg < argc; ++arg) {
		n = atoi(argv[arg] + 1);
		if(n <= 0 || n > 128)
			error("BAD OPTION VALUE\n");
		switch(*argv[arg]) {
		case 'M':	/* Number of line matches to require */
			nm = n;
			break;
		case 'D':	/* Line delimiter character code */
			delim = n;
			break;
		case 'I':	/* Input character to be ignored */
			ignore = n;
			break;
		default:
			error("BAD OPTION CODE\n");
		}
	}

	/* Mark the buffers empty and delimit ends */
	buff[0][0] = buff[1][0] = buff[0][MAXDIFF] = buff[1][MAXDIFF]= '\0';
	/* Initialize line numbers corresponding to tops of buffers */
	topline[0] = topline[1] = 1;

	/* Process until both end of files are at top of buffers*/

	while(buff[0][0] != EOF || buff[1][0] != EOF) {

		/* Stream text through the buffer, skipping matching lines, until missmatch or endfiles at top
		   This fills or loads endfile into each buffer */

		n = skipml(buff, iob, delim, ignore);
		/* Update line numbers at tops of buffers */
		topline[0] += n;
		topline[1] += n;

		/* Find where files agree after difference,
		   report differences, and stream text through
		   buffers until the agreeing line groups are
		   at the top of the buffers or both endfiles
		   are at the top of the buffers */

		if(buff[0][0] != EOF || buff[1][0] != EOF)
			/* There is a real missmatch at the top */
			processmm(buff, delim, nm, topline, argv);
	}
	puts("-------------------END OF COMPARISONS---------------------------\n");
}

/* Stream text through the buffers until the top lines missmatch
   or contain endfiles.  This fills or loads an endfile into
   each buffer. NULL (0) is used to delimit the end of buffer.
   When the buffer is full the EOB (NULL) is in an extra byte
   just after the buffer.

   Returns the number of lines skipped bacause they match.

*/

skipml(buff, iob, delim, ignore)
char buff[2][MAXDIFF+1], delim, ignore;
struct _buf iob[2];
{
	int len, file, skipcnt;
	char *bottom, *pto, *pfrom, *pa, *pb;

	skipcnt = 0;	/*initialize number of lines skipped */

	/* Continue as long as top lines match and are not EOF or buffers are empty */
	while(cmpl(buff[0], buff[1], delim) || buff[0][0] == '\0') {
		/* Find missmatch, EOF or EOB */
		for(pa = buff[0], pb = buff[1]; len = cmpl(pa, pb, delim); pa += len, pb += len)
			++skipcnt;
		/* Pa, pb indicate line with missmatch, EOF or EOB */
		/* Top lines match but may be null */

		/* For each file */
		for(file = 0; file <= 1; ++file) {
			/* find out how much information remains */
			/* Copy up through EOF or EOB, discarding matching text */
			pto = buff[file] - 1;
			pfrom = (file ? pb: pa) - 1;
			do
				*++pto = *++pfrom;
			while (*pto && *pto != EOF);

			/* pto indicates new EOB or EOF. The EOB can be just after the buffer.*/
			if(*pto != EOF) {
				/* Fill remainder of buffer from file unless/until EOF */
				bottom = &buff[file][MAXDIFF-1];
				while(pto <= bottom && (*pto++ = getc2(&iob[file], ignore)) != EOF) ;
			}
		}
	}
	return skipcnt;
}

/* Compare lines.  A line consists of text terminated by the
   delimiter specified by delim or terminated by NULL or EOF.
   Delim is considered part of the line it terminates, but
   NULL and EOF are not.  Returns character count of line
   if the two lines match.  Returns zero if any missmatch or
   if nothing but NULL or EOF.
*/

cmpl(pa, pb, delim)
char *pa, *pb, delim;
{
	int k;

	for(k = 1; *pa == *pb; ++k, ++pa, ++pb) {
		switch(*pa) {
		case EOF: case '\0':
			return k -1;
		default:
			if (*pa == delim)
				return k;
		}
	}
	return 0;
}

/* Get characters from file and handle errors */
getc2(io, ignore)
struct _buf *io;
char ignore;
{
	int ci;
	while(((ci = getc(io))&0x7f) == ignore) ;
	return ci <= 0 ? EOF : ci&0x7f;
}

error(p)
char *p;
{
	puts(p);
	exit();
}

/* Process missmatch.
   Top lines missmatch.
   Find the point in each file where at least nm lines
   match (EOFs can match several times). Report everything
   above these points as a file difference.  Then stream
   text through the buffers until the matching line groups
   or endfiles are at the top.

   To ensure minimum meaningful differences by matching
   at the minimum depths (in lines) from the tops
   of the buffers the search algorithm is as follows:

   Reference lines are chosen at increasing depths,
   alternating between the two buffers. Each is
   compared to every line in the other buffer from the
   top to the same depth as the reference line. If no
   matches, the next reference line (in the other buffer
   or at the next depth) is tried. If a match is found
   it is checked for nm lines in each file. If all lines
   match, synchronization has been achieved.  EOFs are
   seen as an infinite series of EOF lines to force
   cleanup at the ends of the files.  If the reference
   depth exceeds the depth of either buffer the program
   will give up and abort cleanly.
*/


processmm(buff, delim, nm, topline, argv)
char buff[2][MAXDIFF+1], delim, *argv[];
int nm, topline[2];
{
	char *refp[2], *refpf;		/* Reference line pointers */
	char *olp, *olp2;		/* line pointers in other buffer */
	char match, search;		/* Flags */
	int rdepth;			/* depth of reference line */
	int odepth;			/* depth in other file */
	char *pa, *pb, *p[2];
	int len;
	int rfile, ofile;
	char *rbottom, *obottom;	/* last bytes in buffers*/
	char *pto, *pfrom;
	int mcnt;			/* match count */
	int skipct[2];			/* Lines in each area og missmatch*/

	/* Begin search as described above */
	match = 0;
	search = 1;
	refp[0] = buff[0];	/* Points to reference line */
	refp[1] = buff[1];

	/* For increasing reference depth while still searching */
	for(rdepth = 0; search; ++rdepth) {
		/* For each buffer at each reference depth */
		for(rfile = 0; rfile <= 1 && search; ++rfile) {
			ofile = rfile ? 0 : 1;	 	/* other file */
			refpf = refp[rfile];
			rbottom = &buff[rfile][MAXDIFF-1];
			obottom = &buff[ofile][MAXDIFF-1];

			/* Compare reference line to each line in other buffer up to reference depth */
			if(refpf >= rbottom)
				search = 0;	/* buffer too small */
			else {
				for(odepth = 0, olp = buff[ofile]; odepth <= rdepth && search; ++odepth, olp += lnlen(olp, delim)) {
					olp2 = olp;
					/* Try for match of depth nm starting at refpf and olp */
					for(mcnt = 0, match = 1, pa =refpf, pb = olp; mcnt < nm && match; ++mcnt, pa += len, pb += len) {
						/* Note won't scan past EOF or NULL delimiters because line length will be zero.
						   Alligned EOFs will be forced to match, but NULLs won't */
						match = len = cmpl(pa, pb, delim);
						if(*pa == EOF && *pb == EOF)
							match = 1;	/*EOFS match with length zero */
					}
					if(match) {
						search = 0;	/* we found nm matches ! */
					}
				}
			}
			/* Advance this file's reference pointer */
			refp[rfile] += lnlen(refpf, delim);
		}
	}

	/* Search is over. Results are:
	   match flag indicates successful match of nm lines or to EOFs.
	   if successful:
		rfile has reference file plus one,
		refpf points to the reference line,
		olp2 points to the matching line in the other file,
		rdepth has line number in buffer plus one,
		odepth has line number in buffer plus one.
	*/
	if (!match)
		error("DIFFERENCE TOO BIG FOR BUFFER! ABORTING.\n");
	/* Files resynchronized, get pointers and line counts as functions of file. */
	if(rfile == 2) {
		p[0] = olp2;
		p[1] = refpf;
		skipct[0] = odepth -1;
		skipct[1] = rdepth -1;
	}
	else {
		p[0] = refpf;
		p[1] = olp2;
		skipct[0] = rdepth -1;
		skipct[1] = odepth -1;
	}
	/* Print dividing line */
	for(len = 0; len < 63; ++len)
		putchar('-');
	putchar('\n');

	/* For each file */
	for(rfile = 0; rfile <= 1; ++rfile) {
		/* Display the differing text */
		printf("-------- TEXT FROM %s, %d LINES FROM LINE %d ----------\n",argv[rfile+1], skipct[rfile], topline[rfile]);
		topline[rfile] += skipct[rfile];
		printbuff(buff[rfile], p[rfile] - 1, delim);

		/* Move matching and following lines to top through the end of buffer delimiter */
		for(pto = buff[rfile], pfrom = p[rfile]; *pfrom ; ++pto, ++pfrom)
			*pto = *pfrom;
		*pto = '\0';	/* Copy the null too */
	}
}

/* Determine length of line including delim but not EOF or NULL */
lnlen(p, delim)
char *p, delim;
{
	int k;
	for(k = 0; ;++k, ++p) {
		switch(*p) {
		case EOF: case '\0':
			return k;
		default:
			if(*p == delim)
				return k + 1;
		}
	}
}

/* Display characters in range of addresses */
printbuff(ps, pe, delim)
char *ps, *pe, delim;
{
	while(ps <= pe) {
		putchar(*ps == delim ? '\n' : *ps);
		++ps;
	}
}
