/* RasterOp
 * VAX version
 * Per Bothner, May 1985
 *
 * This program is source for three subroutines, depending on cpp flags:
 * PatternOp(out, pattern, skip, function) (#ifdef FILL) Fill raster w/pattern
 * UnaryOp(out, function) (#ifndef FILL #ifdef UNARY) one-operand rasterop
 * RasterOp(out, in, function) (otherwise) two-operand general rasterop
 *
 * In each case it works by using the input arguments to compile specialized
 * machine code (to the array codeTable) and then executing it.
 */
extern short Debug;
#define DebugRop (Debug&0x1000)
#include <bitbltops.h>
#include <Vfont.h>

/* Comp{B,W,L}: append a {byte,short,long} to the codeTable */
#define CompB(x) {*code++ = (x);}
#define CompW(x) {CompB((x)&0xFF); CompB((x)>>8);}
#define CompL(x) {CompB(x); CompB((x)>>8); CompB((x)>>16); CompB((x)>>24);}

/* Usage of registers while executing the codeTable */
#define ColReg 7	/* number of columns (words) remaining */
#define RowReg 6	/* number of rows (scanlines) remaining */
#define SrcReg 9	/* temporary source register */
#define TempReg (SrcReg-1) /* used for quad-word shifting */
#define DstPtrReg 11	/* == out -- points to current destination word*/
#define SrcPtrReg 10	/* == in -- points to current source word*/
#define SaveReg 0	/* save old value of Src for later use */

/* Macros to help construct instructions (for codeTable) */
#define Adjust 0x80 /* either auto-increment or auto-decrement */
#define INCR(reg) ((reg)+Adjust)
#define MOV 0x90	/* MOV? instruction */
#define ROTL 0x9C	/* rotate long */
#define B 0	/* byte operation */
#define W 0x20	/* word (short) operation */
#define L 0x40	/* long operations */
/* OPSIZE is one of B, W or L */
#define CompOp(op) CompB(op+OPSIZE)
#define CompMove CompOp(MOV)
#define CompLoad(size) CompB(loadOp+size)

/* addressing modes - macros to compile effective addresses */
#define REG(reg) (0x50+(reg))	/* register mode */
#define CompReg(reg) CompB(REG(reg))
#define CompLit(literal) CompB(literal) /* small literal: 0..63 */
#define CompImmedB(val) {CompB(0x8F); CompB(val); }
#define CompImmedW(val) {CompB(0x8F); CompW(val); }
#define CompImmedL(val) {CompB(0x8F); CompL(val); }
#define CompImmedX(val) {long _j=val; CompB(0x8F); if (OPSIZE==L) {CompL(_j);}\
	else if (OPSIZE==W) {CompW(_j);} else {CompB(_j);} }
#define CompLitL(val) { /* optimized version of CompImmedL*/ \
    if ((unsigned)(val) < 64) {CompLit(val);} \
    else {CompImmedL(val);} }
#define CompIncr(reg) CompB(INCR(reg))

#ifdef FILL
#define UNARY
PatternOp(out, pattern, skip, function)
    register VRaster *out;	/* bitmap to fill with pattern */
    unsigned short *pattern;
    int skip;		/* number of rows to skip in pattern */
#else FILL
#ifndef UNARY
RasterOpS(out, in, function) { RasterOp(out, in, function); }
RasterOp(out, in, function)
  register VRaster *out, *in;		/* r11, r10 */
#else UNARY
UnaryOpS(out, function) { UnaryOp(out, function); }
UnaryOp(out, function)
  register VRaster *out;		/* r11 */
#endif UNARY
#endif FILL
  int function;	/* a subset of the GXfunction codes */
  {
#ifdef UNARY
    register *dummy;			/* r10 - use up an extra register */
#endif
    /* only change fieldSize bits of destination word, starting at fieldPos */
    int fieldPos, fieldSize;		/* -4(fp), -8(fp) */
    int OPSIZE;
    u_char CodeTable[100/*???*/];	/* "compile" into this buffer */
    register u_char *code = CodeTable;	/* SrcReg */
    register i;				/* TempReg */
    register width = out[-1].bBox.h;	/* ColReg */
    register rows = out[-1].bBox.v;	/* RowReg */
    short dstStride = out[-1].stride;	/* increment of DstPtrReg at new row */
    u_char *outerTop, *innerTop;
    u_char loadOp;
    static char canonicFunc[16] = { /* See below */
	-1, 4, -1, 0xC, -1, -1, -1, 0xD,
	 2, 6, -1, 0xE,	-1, -1, -1, 0};
#ifndef UNARY
    short srcStride = in[-1].stride;	/* increment of SrcPtrReg at new row */
    int shift = BitOffset(out) - BitOffset(in);;

    if (width > (i = in[-1].bBox.h)) width = i;
    if (rows > (i = in[-1].bBox.v)) rows = i;
#endif

    if (width <= 0 || rows <= 0) return;
 /* some of the code (primarily for the leftmost column)
  * assumes that shifts are -15..15, though for a true 32-bit machine it
  * should handle shifts of -31..31 (for alignment reasons)
  */

 /* The source is normally loaded with the MOV* instruction. Sometimes
  * it is simpler to invert the source while loading it (with MCOM*),
  * but we must then adjust the function to take account of this.
  */
    function &= 15;
    if ((i = canonicFunc[function]) < 0)
	loadOp = MOV;
    else
      {
	loadOp = 0x92;	/* MCOM - invert source operand when loading it */
	function = i;	/* fix the function to take account of this */
      }

 /* The compiled code is basically like this:
  *
  * for scanline = dst_top; scanline <= dst_bottom; scanline++
  *	update leftmost column (0..15 bits)
  *	for each whole word (left-to-right)
  *	    update it
  *	update the rightmost column
  * return
  *
  * The rightmost (most significant) 'shift' bits of one column are saved
  * (if needed for the next column) in the most significant bits of SaveReg.
  */
    outerTop = code;
#ifdef FILL
    CompB(0xCA /* BICL2 */); CompImmedL(0xFFFFFFF0); CompReg(TempReg);
    CompLoad(W); CompB(0x40+TempReg); CompB(0x60+SrcPtrReg); CompReg(SrcReg);
    CompB(0x9C /*ROTL*/); CompLit(16); CompReg(SrcReg); CompReg(SaveReg);
    CompB(MOV+W); CompReg(SrcReg); CompReg(SaveReg);
#endif FILL
    if ((i = BitOffset(out)) > 0) /* bitOffsets assumed 0..15 */
      { /* compile left column - currently does it as 16 bits only */
	fieldPos = i; fieldSize = 16 - i;
	if ((width -= fieldSize) <= 0)
	  { /* only one destination column */
	    fieldSize += width; /* fieldSize = total width */
#ifndef UNARY
	    /* test if only one source column */
	    if (width + shift <= 0) shift &= 15; /* make positive */
#endif UNARY
	    width = 0;
	  }
#ifndef UNARY
	if (shift < 0)
          {
	    CompLoad(L); CompIncr(SrcPtrReg); CompReg(SaveReg);
	    srcStride -= 4; /* since we load a whole long-word at once */
/* WARNING: next line is probably bogus (if INSV is used) !!! */
	    CompB(ROTL); CompLit(-shift); CompReg(SaveReg); CompReg(SrcReg);
	    shift += 16; /* fix shift from -15..-1 to 1..15 */
	   }
	else
          {
	    CompLoad(W); CompIncr(SrcPtrReg); CompReg(SrcReg);
	    if (shift != 0)
	      {
		CompB(ROTL); CompLit(16); CompReg(SrcReg); CompReg(SaveReg);
	if (function != (GXcopy & 15))
	  {
		CompB(0x78/*ASHL*/); CompLit(shift);
		    CompReg(SrcReg); CompReg(SrcReg);
	  }
	      }
	    srcStride -= 2;
          }
#endif UNARY
	OPSIZE = W;
	asm(" jsb RasterCompile");
	dstStride -= 2;
      }
#ifndef UNARY
    else if (shift != 0)
      {
	CompLoad(W); CompIncr(SrcPtrReg); CompReg(SaveReg);
	CompB(ROTL); CompLit(16); CompReg(SaveReg); CompReg(SaveReg);
	/* perhaps doing MOVL might have been faster? */
	srcStride -= 2;
	shift &= 15;
      }
#endif UNARY
#ifdef FILL
    CompB(MOV+L); CompReg(SaveReg); CompReg(SrcReg);
#endif
    if (width >= 32)
      {
	/* compile inner loop: scan along one row */
	fieldPos = 0; fieldSize = 32;
	i = width>>5; /* # of columns */
	CompB(MOV+L); CompLit(i); CompReg(ColReg);
	innerTop = code;
	i <<= 2; /* # of bytes */
	dstStride -= i;
#ifndef UNARY
	srcStride -= i;
	CompLoad(L); CompIncr(SrcPtrReg); CompReg(SrcReg);
	if (shift != 0)
	  {
	    /* The idea is that the residue from the previous source fetch is
	     * kept in the right 'shift' bits of SaveReg, and used next time.
	     */
	    CompB(MOV+L); CompReg(SaveReg); CompReg(TempReg);
	    CompB(MOV+L); CompReg(SrcReg); CompReg(SaveReg);
	    CompB(0x79 /*ASHQ*/); CompLit(shift); /* shift 1..15 */
		CompReg(TempReg); CompReg(TempReg);
	  }
#endif UNARY
	OPSIZE = L; asm(" jsb RasterCompile");
	/* Compile bottom of inner loop */
	CompB(0xF5 /* SOBGTR */);
	CompReg(ColReg);
	i = innerTop - (code+1); CompB(i); /* displacement */

	i = 4*(width>>5);
      }
    if (i = width & 31)
      {
	fieldPos = 0; fieldSize = i;
	if (i <= 16)
	  {
#ifndef UNARY
	    if (i > shift)
	      {
		CompLoad(W); CompIncr(SrcPtrReg); CompReg(SrcReg);
		srcStride -= 2;
		if (shift != 0)
		  {
		    CompB(MOV+L); CompReg(SaveReg); CompReg(TempReg);
		    CompB(0x79 /*ASHQ*/); CompLit(shift); /* shift 1..15 */
			CompReg(TempReg); CompReg(TempReg);
		  }
	      }
	    else
	      {
		CompB(ROTL); CompLit(shift);
		    CompReg(SaveReg); CompReg(SrcReg);
	      }
#endif UNARY
	    OPSIZE = W; asm(" jsb RasterCompile"); dstStride -= 2;
	  }
	else
	  {
#ifndef UNARY
	    if (i - shift > 16)
	      {
		CompLoad(L); CompIncr(SrcPtrReg); CompReg(SrcReg);
	        srcStride -= 4;
	      }
	    else	    
	      {
		CompLoad(W); CompIncr(SrcPtrReg); CompReg(SrcReg);
		srcStride -= 2;	    
	      }
	    if (shift != 0)
	      {
		CompB(MOV+L); CompReg(SaveReg); CompReg(TempReg);
		CompB(0x79 /*ASHQ*/); CompLit(shift); /* shift 1..15 */
		    CompReg(TempReg); CompReg(TempReg);
	      }
#endif UNARY
	    OPSIZE = L; asm(" jsb RasterCompile"); dstStride -= 4;
	  }
      }
#ifndef UNARY
    if (srcStride > 0)
      { CompB(0xC0 /*ADDL2*/); CompLitL(srcStride); CompReg(SrcPtrReg); }
#endif UNARY
    if (dstStride > 0)
      { CompB(0xC0 /*ADDL2*/); CompLitL(dstStride); CompReg(DstPtrReg); }

#ifdef FILL
    CompB(0xD6 /* INCL */); CompReg(TempReg);
#endif FILL
    /* Compile bottom of outer loop */
    CompB(0xF5 /* SOBGTR */); CompReg(RowReg);
	i = outerTop - (code+1); /* displacement */ CompB(i);

    CompB(0x04); /* RET - return from subroutine (RasterOpS) */

    if (DebugRop) {
	printf("CodeTable: %x (dstStride:%d) = {\n", CodeTable, dstStride);
	for (i = code - CodeTable,code = CodeTable; --i >= 0; )
	    printf("%x,",*code++);
	printf("}\n");
      }
    out = (VRaster*)out[-1].start;
#ifdef FILL
    i = skip;
    dummy = (int*)pattern;
#else FILL
#ifndef UNARY
    in = (VRaster*)in[-1].start;
#else UNARY
    code = (u_char*)0; /* actually: SrcReg = 0 */
#endif UNARY
#endif FILL
    width = (int)CodeTable; asm(" jmp (r7)"); /* jump to start of Codetable */

/*
 * This pseudo-function compiles code into codeTable to do the equivalent of:
 * 	*DstPtrReg++ (function=) SrcReg
 * 'function' is a two-operand logical operation (as defined in <framebuf.h>).
 * If OPSIZE={W,L}, then OpType is {u_short,u_long}, and
 * we can "declare" register OpType SrcReg, *DstPtrReg.
 *
 * One compilication: dst must remain unchanged in bits where mask is zero:
 * dst = (dst & ~ mask) | ((dst function src) & mask),
 * -where the "mask" is fieldPos 0 bits, fieldSize 1 bits, remainder 0 bits.
 */

#define InvertSrc {CompOp(0x92); CompReg(SrcReg); CompReg(SrcReg);}
#define HasMask (OPSIZE==L? fieldSize < 32: fieldSize<16)
#define AndWithMask     {if (HasMask) \
 { asm("mcoml $0,r8"); asm("insv $0,-4(fp),-8(fp),r8");\
   CompOp(0x8A) /*BIC*/; CompImmedX(i); CompReg(SrcReg); }}

asm("RasterCompile:")
if (DebugRop)
printf("[rComp: %c, pos: %d, len: %d]", OPSIZE==L?'L':'W',fieldPos,fieldSize);
    switch(function)
      {
	case GXclear & 15: /* 0 */
	    if (HasMask)
	      {
		CompB(0xF0); /* INSV */
		    CompLit(0);
		    CompLit(fieldPos); CompLit(fieldSize);
		    CompB(0x60+DstPtrReg);
		CompB(0xC0 /*ADDL2*/);
		    CompLit(OPSIZE>>4); CompReg(DstPtrReg);
	      }
	    else
	      { CompOp(0x94 /*CLR*/); CompIncr(DstPtrReg);}
	    break;
/*	case 0x1: mapped by canonicFunc to 0x4 */
	case GXpaintInverted & 15: /* 2 */
	    AndWithMask;
	    CompOp(0x8A /* BIC2 */); CompReg(SrcReg); CompIncr(DstPtrReg);
	    break;
/*	case GXcopyInverted & 15 (3): mapped by canonicFunc to 0xC (GXcopy) */
	case GXxor & 15: /* 6 */
	    AndWithMask;
	    CompOp(0x8C /* XOR2 */); CompReg(SrcReg); CompIncr(DstPtrReg);
	    break;
/*	case 0x7: mapped by canonicFunc to 0xD */
/*	case GXand & 15 (8): mapped by canonicFunc to 0x2 (GXpaintInverted) */
/*	case 0x9: mapped by canonicFunc to 0x6 (GXxor) */
	case GXnoop & 15: /* A */
	    break;
/*	case 0xB: mapped by canonicFunc to 0xE (GXpaint) */
	case 0xC: /* GXcopy */
	    if (HasMask)
	      {
#ifdef FILL
#define Temp2Reg 1
		CompOp(MOV); CompB(0x60+DstPtrReg); CompReg(Temp2Reg);
		CompOp(0x8C /*XOR2*/); CompReg(Temp2Reg); CompReg(SrcReg);
	        AndWithMask;
		CompOp(0x8D /*XOR3*/); CompReg(Temp2Reg); CompReg(SrcReg);
		    CompIncr(DstPtrReg);
#else FILL
		CompB(0xF0); /* INSV */
		    CompReg(SrcReg);
		    CompLit(fieldPos); CompLit(fieldSize);
		    CompB(0x60+DstPtrReg);
		CompB(0xC0 /*ADDL2*/);
		    CompLit(OPSIZE>>4); CompReg(DstPtrReg);
#endif FILL
	      }
	    else
	      { CompMove; CompReg(SrcReg); CompIncr(DstPtrReg); }
	    break;
	case GXpaint & 15: /* E */
	    AndWithMask;
	    CompOp(0x88 /*BIS2*/); CompReg(SrcReg); CompIncr(DstPtrReg);
	    break;
/*	case GXset & 15 (F): mapped by canonicFunc to 0 (GXclear) */

	case 0x4:
	case GXinvert & 15: /* 5 */
	case 0xD:
	    abort(); /* not implemented */
      }
    asm(" rsb"); /* return from RasterCompile */
}
