;;; -*- Base: 10; Syntax: Common-Lisp; Package: cmi; Patch-File: Yes; -*- 

(in-package 'cmi)

;;
;;; Another twuffler patch
;;; to twuffle-language.lisp
;;; 11/4/91 12:23:40 nesheim
;;; Use physical-move-always rather than physical-move.

(defun execute-twuffle-program (dest source field-length instruction-array)
  ;; paris-field paris-field fixnum array-of-array-of-fixnums
  ;;
  ;; If the dest is big enough (>= 64 bits), and if it is packed (field-vp-increment = 1)
  ;; then we want to use the dest as a temp.
  ;;
  ;; If the source is too small, we want to copy it into a big enough
  ;; buffer to do the actual local-shuffle-transposes.
  #+lc(declare (type (simple-array twuffle-instruction (*)) instruction-array))
  #-lc (setq *twuffle-last-program-run* instruction-array)
  #-lc (when *twuffle-print-program*
	 (pretty-print-twuffle-program instruction-array)
	 )
  ;; First, check that we're doing the twuffle on the right machine and field-size
  (let* ((header-instruction (aref instruction-array 0))
	 (opcode (twuffle-instruction-opcode header-instruction))
	 (arguments (twuffle-instruction-arguments header-instruction))
	 (number-of-local-bits  (aref (aref arguments 0) 0))
	 (number-of-global-bits (aref (aref arguments 1) 0))
	 (actual-local-size
	  (* field-length (geometry-id-vp-ratio
			   (cm:vp-set-geometry cm:*current-vp-set*))))
	 )
    #-lc
    (when *debug-twuffle*
      (format t "~% Number-of-local-bits: ~a, Number-of-global-bits: ~a"
	      number-of-local-bits number-of-global-bits)
      (format t "~% Actual-local-size ~a" actual-local-size)
      )
    (assert (= opcode *twuffle-opcode-header*))
    (assert (= cm:*physical-processors-length* number-of-global-bits))
    (assert (= number-of-local-bits (log2 actual-local-size)))
    )
  (let* (
	 (geometry-id (cm:vp-set-geometry cm:*current-vp-set*))
	 (source-vp-ratio (geometry-id-vp-ratio geometry-id))
	 ;; this is for the special case of a 64K machine
	 (min-temp-bits (max 6 (- cm:*physical-processors-length* 4 5)))
	 (minimum-temp-length (round (ash 1 min-temp-bits) source-vp-ratio))
	 (temp-length (max field-length minimum-temp-length))
	 (physical-length (* temp-length source-vp-ratio))
	 temp2
	 )
    #-lc
    (when *debug-twuffle*
      (format t "~% minimum-temp-length for execute is: ~d" minimum-temp-length)
      (format t "~& temp-length is: ~d" temp-length))
    (when *twuffle-timing*
      (loop for i from 1 below 20
	    do (cm:timer-clear i))
      (cm:timer-clear 63)
      (cm:timer-start 63))
    (cmi::with-stack-fields ((temp1 temp-length)
			     )
      ;;; This is _really_ hairy
      ;;; Because we add the extra bit (for <6 local-bits cases)
      ;;; at the top of the _local-bits_ (and not the _data-bits_),
      ;;; we need to copy the source data into the lowest part of
      ;;;_physical_ memory of temp1, not to the lowest part of the
      ;;; _vp_ memory.
      ;;;
      ;;; And because the source may be vp-interleaved, we copy it
      ;;; into a _tight_ field, which is not vp-interleaved (really-temp).
      (cm:u-move-zero-1l temp1 temp-length)
;;;      (cm:u-move-1l temp1 source field-length)
      (cmi::with-stack-fields ((really-temp field-length))
	(cmi::with-vp-fields ((temp1       :location temp1-loc)
			      (really-temp :location really-loc))
	  (cm:u-move-always-1l really-temp source field-length)
	  (cmi::physical-move-always temp1-loc really-loc (* source-vp-ratio field-length))
	  ))
      (if (and (my-is-field-contiguous dest field-length)
	       (= temp-length field-length))
	  (progn
	    #-lc (when *debug-twuffle* (format t "~% Using dest as temp"))
	    (setf temp2 dest))
	  (progn
	    #-lc (when *debug-twuffle* (format t "~% NOT using dest as temp"))
	    (setf temp2 (cm:allocate-stack-field temp-length))))
      (cm:u-move-zero-1l temp2 temp-length)
      (cmi::with-vp-fields ((temp1 :location temp1-loc)
			    (temp2 :location temp2-loc)
			    )
	(loop with answer-in-temp2 = nil
	      for instruction-pointer from 1
	      do
	   (let* ((instruction (aref instruction-array instruction-pointer))
		  (opcode (twuffle-instruction-opcode instruction))
		  (arguments (twuffle-instruction-arguments instruction)))
;;;	     (format t "~& executing instruction ~a" instruction-pointer)
;;;	     (format t "~& temp2-loc is ~a, temp1-loc is ~a" temp2-loc temp1-loc)
	     (when *twuffle-timing*
	       (cm:timer-start instruction-pointer))
	     (cond
	       ((= opcode *twuffle-opcode-news-to-send*)
;;;		  (format t "~&global G->B")
		(let* ((number-of-axes (aref (aref arguments 0) 0))
		       (axis-size      (aref arguments 1))
		       (axis-start     (aref arguments 2))
		       (sprint-bit-continuation (aref (aref arguments 3) 0)))
		  (news-to-send
		   temp1-loc
		   number-of-axes
		   axis-size
		   axis-start
		   physical-length
		   )
		  (news-to-send-sprint-bit
		   temp1-loc sprint-bit-continuation physical-length)
		  ))
	       
;;;		 ((= opcode *twuffle-opcode-send-to-news*)
;;;		  (format t "~&global B->G")
;;;		  (let* ((number-of-axes (aref (aref arguments 0) 0))
;;;			 (axis-size      (aref arguments 1))
;;;			 (axis-start     (aref arguments 2))
;;;			 (sprint-bit-continuation (aref (aref arguments 3) 0)))
;;;		    (send-to-news-sprint-bit temp1-loc sprint-bit-continuation physical-length)
;;;		    (send-to-news
;;;		     temp1-loc
;;;		     number-of-axes
;;;		     axis-size
;;;		     axis-start
;;;		     physical-length
;;;   )))
;	       ;; Note that for the send-to-permuted-address, we require that the
;	       ;; vp-set for the send be set before the execute-twuffle-program.
;	       ;; In addition, we want the virtual addresses of the source and
;	       ;; the dest.
;	       ((= opcode *twuffle-opcode-send*)
;		(let* ((source-vp-ratio     (aref (aref arguments 0) 0))
;		       (send-address-length (aref (aref arguments 1) 0))
;		       (index-array         (aref arguments 2))
;		       (length              (aref (aref arguments 3) 0))
;		       )
;		    (send-to-permuted-address
;		      temp2
;		      temp1
;		      source-vp-ratio
;		      send-address-length
;		      index-array
;		      length)
;		    (rotatef temp1 temp2)
;		    (rotatef temp1-loc temp2-loc)
;		    (setf answer-in-temp2 (not answer-in-temp2))
;		    ))
	       ((= opcode *twuffle-opcode-global-local*)
;;;		(format t "~&phase-1")
		(let* ((number-of-axes    (aref (aref arguments 0) 0))
		       (axis-size-array   (aref arguments 1))
		       (local-start-array (aref arguments 2))
		       (node-start-array  (aref arguments 3))
		       (local-size        (aref (aref arguments 4) 0)))
		  (cmi::phase-1-exchange
		   temp1-loc
		   number-of-axes
		   axis-size-array
		   local-start-array
		   node-start-array
		   local-size)))
	       ((= opcode *twuffle-opcode-sprint-local*)
;;;	      (format t "~&sprint-local")
		(ocmi::transpose32-2-1l
		 temp1-loc
		 temp1-loc
		 (aref (aref arguments 0) 0)
		 ))
	       ((= opcode *twuffle-opcode-local-shuffle-transpose*)
;;;		(format t "~&sprint-local-deluxe")
		(let* ((sprint-array (aref arguments 0))
		       (index-array  (aref arguments 1))
		       (local-bits   (aref (aref arguments 2) 0)))
		  (lst
		   temp2-loc
		   temp1-loc
		   sprint-array
		   index-array
		   local-bits)
		  (rotatef temp1 temp2)
		  (rotatef temp1-loc temp2-loc)
		  (setf answer-in-temp2 (not answer-in-temp2))
		  )
		)
	       ((= opcode *twuffle-opcode-new-lst*)
;;;		(format t "~&sprint-local-deluxe")
		(let* ((gather-array (aref arguments 0))
		       (scatter-array  (aref arguments 1))
		       (local-bits   (aref (aref arguments 2) 0)))
		  (cached-lst
		   temp2-loc
		   temp1-loc
		   gather-array
		   scatter-array
		   local-bits)
		  (rotatef temp1 temp2)
		  (rotatef temp1-loc temp2-loc)
		  (setf answer-in-temp2 (not answer-in-temp2))
		  )
		)
	       ((= opcode *twuffle-opcode-local-local*)
;;;		(format t "~&local-local")
		(let* ((index-array (aref arguments 0))
		       (local-bits  (aref (aref arguments 1) 0)))
		  (ls-local-shuffle
		   temp2-loc
		   temp1-loc
		   index-array
		   local-bits)
		  (rotatef temp1 temp2)
		  (rotatef temp1-loc temp2-loc)
		  (setf answer-in-temp2 (not answer-in-temp2))
		  )
		)
	       ((= opcode *twuffle-opcode-move*)
;;;		(format t "~&move")
		(cm:u-move-1l temp2 temp1 temp-length)	      
		(rotatef temp1 temp2)
		(rotatef temp1-loc temp2-loc)
		(setf answer-in-temp2 (not answer-in-temp2))
		)
	       ((= opcode *twuffle-opcode-local-news-to-send*)
;;;		(format t "~&G->B")
		(let* ((bees (aref arguments 0))
		       (number-of-local-bits (aref-number arguments 1))
		       (number-of-binary-bits (aref-number arguments 2)))
		  (gb-local-shuffle
		   temp2-loc temp1-loc bees number-of-local-bits number-of-binary-bits)
		  (rotatef temp1 temp2)
		  (rotatef temp1-loc temp2-loc)
		  (setf answer-in-temp2 (not answer-in-temp2))))
	       ((= opcode *twuffle-opcode-local-send-to-news*)
;;;		(format t "~&B->G")
		(let* ((gees (aref arguments 0))
		       (number-of-local-bits (aref-number arguments 1))
		       (number-of-gray-bits (aref-number arguments 2)))
		  (bg-local-shuffle
		   temp2-loc temp1-loc gees number-of-local-bits number-of-gray-bits)
		  (rotatef temp1 temp2)
		  (rotatef temp1-loc temp2-loc)
		  (setf answer-in-temp2 (not answer-in-temp2))))
	       ((= opcode *twuffle-opcode-end*)
;;;		(format t "~&Finished: encountered an END opcode")
		(when answer-in-temp2
;;;		    (format t "~&Answer in temp: moving it out")
		  ;; by the way -- this is RIGHT, and don't you dare
		  ;; touch it !!!!
		  (cm:u-move-always-1l temp2 temp1 temp-length)
		  (rotatef temp1 temp2)
		  (rotatef temp1-loc temp2-loc))
		(when *twuffle-timing*
		  (cm:timer-stop instruction-pointer)
		  (cm:timer-stop 63)
		  )
		(return))
					;*		 (:otherwise
	       (t
		(ferror "Don't know what the opcode is: ~a" opcode))
	       ))
	   (when *twuffle-timing*
	     (cm:timer-stop instruction-pointer)
	     )
	      )
	)
      (cmi::with-stack-fields ((really-temp field-length))
	(cmi::with-vp-fields ((temp1       :location temp1-loc)
			      (really-temp :location really-loc))
	  (cmi::physical-move-always really-loc temp1-loc
			      (* source-vp-ratio field-length))
	  (cm:u-move-always-1l dest really-temp field-length)
	  ))
;;;      (cm:u-move-1l source temp1 field-length)
    )))

(cmi::increment-patch-level 5)
