;;; -*- SYNTAX: COMMON-LISP; MODE: LISP; BASE: 10; PACKAGE: *LISP; -*-

;;; *Lisp Sampler - A Send In Four Fits
;;; By William R. Swanson and J.P.Massar,
;;; Thinking Machines Corporation,
;;; 4/10/91

;;; (For use with the *Lisp Sampler chapter of the
;;; "Getting Started in *Lisp" guide.)

(in-package '*lisp)

;;; --- Fit The First: Adding Very Large Integers

;;; Given two integer values stored as a single bit in each processor
;;; in send address order (that is, with the least significant bit stored
;;; in processor 0), we want to add these numbers and display the result.

;;; Any processor which contains a 1 in both input pvars
;;; is going to produce a carry that will propagate to all
;;; subsequent processors until it reaches a processor with
;;; a 0 in both input pvars.

;;; Any processor which contains a 0 in both input arguments
;;; thus acts as a "barrier" for carries, preventing a carry
;;; from propagating beyond it.

;;; Thus processors with either two 1's or two 0's define
;;; 'segments' of carry propagation or non-propagation.

;;; We can use the segmented scanning feature of scan!!
;;; to determine which processors will be affected by carry bits.
;;; We can then compute the sum as a parallel logical combination
;;; of the two input pvars and a pvar indicating which processors
;;; are influenced by carry bits.

(defun very-long-add!! (bit-pvar1 bit-pvar2)
  (declare (type (field-pvar 1) bit-pvar1 bit-pvar2))
  (*let* ((zero-zero (=!! 0 bit-pvar1 bit-pvar2))
	  (one-one (=!! 1 bit-pvar1 bit-pvar2))
	  carry-segments will-receive-carry dest)
    (declare (type boolean-pvar zero-zero one-one)
	     (type boolean-pvar carry-segments will-receive-carry)
	     (type (field-pvar 1) dest))

    ; Determine points at which carries start and end
    (*set carry-segments
	  (or!! (zerop!! (self-address!!)) zero-zero one-one))

    ; Determine processors that will be affected by a carry
    (*set will-receive-carry
	  (scan!! one-one 'copy!!
		  :segment-pvar carry-segments :include-self nil))

    ; Exclude processor zero, because it can't receive a carry
    (*setf (pref will-receive-carry 0) nil)

    ; Perform the addition
    (*set dest
	  (if!! (or!! one-one zero-zero)
	        ; Pairs of 1's and 0's produce 1's with carry, else 0's
		(if!! will-receive-carry 1 0)
		; All other values will be 0 with carry, 1's otherwise
		(if!! will-receive-carry 0 1)))
    dest))

;;; Function to display the bit pvars we're using
;;; so that high-order bits are printed first, not
;;; low-order bits, as ppp would produce.
(defun pbp (pvar &key (length 20) title) "Print Bit Pvar"
   (*let ((display-pvar
	    (if!! (<!! (self-address!!) length)
		  (pref!! pvar (-!! length (self-address!!) 1))
		  0)))
     (ppp display-pvar :end length :title title))
   (values))

;;; This function tests the very-long-add!! function:

(defun test-very-long-add ()
  (let ((length1 (+ 12 (random 5)))
	(length2 (+ 12 (random 5))))
    (*let ((bit-pvar1 0) (bit-pvar2 0))
      (declare (type (field-pvar 1) bit-pvar1 bit-pvar2))

      ;;; Store random binary numbers in the bit pvars
      (*when (<!! (self-address!!) length1)
	(*set bit-pvar1 (random!! 2)))
      (*when (<!! (self-address!!) length2)
	(*set bit-pvar2 (random!! 2)))

      ;;; Display the two binary numbers
      (pbp bit-pvar1 :length 20 :title "Bit-pvar 1")
      (pbp bit-pvar2 :length 20 :title "Bit-pvar 2")

      ;;; Display the result of adding them
      (pbp (very-long-add!! bit-pvar1 bit-pvar2)
	   :length 20 :title "Result    "))
    (values)))

;;; Sample output:
;
;> (*defvar bits (if!! (<!! (self-address!!) 12) 1 0))
;BITS
;> (ppp bits :end 20)
;1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 
;> (pbp bits :length 20)
;0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 
;
;> (test-very-long-add)
;Bit-pvar 1: 0 0 0 0 0 1 1 0 1 1 0 1 1 1 1 0 1 0 1 0 
;Bit-pvar 2: 0 0 0 0 0 0 0 0 1 1 1 0 1 1 0 1 1 1 1 0 
;Result    : 0 0 0 0 0 1 1 1 1 1 0 0 1 1 0 0 1 0 0 0 
;> (test-very-long-add)
;Bit-pvar 1: 0 0 0 0 0 0 1 1 1 0 0 1 1 0 1 1 1 0 0 1 
;Bit-pvar 2: 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 0 1 1 0 
;Result    : 0 0 0 0 0 1 0 0 0 0 1 1 0 1 1 0 1 1 1 1 

;;; --- Fit The Second: A Segmented news!! Function

;;; This function performs a news!!-like operation on a one-dimensional
;;; grid, using a :segment-pvar argument to define segments much like
;;; the corresponding argument to scan!!.

(defun segmented-news!! (pvar segment-pvar)
  (*let (end-segment-pvar result temp)
    ;;; Define a second segment pvar that has T's at
    ;;; the _end_ of the segments defined by segment-pvar
    (*set end-segment-pvar
       (scan!! segment-pvar 'copy!! :direction :backward
	       :segment-pvar t!! :include-self nil))
    ;;; Last active processor in end-segment-pvar must be T
    (*setf (pref end-segment-pvar (*max (self-address!!))) T)
    ;;; use scan!! to shift pvar values forward one position
    (*set result
      (scan!! pvar 'copy!! :segment-pvar t!! :include-self nil))
    ;;; use a backward scan to copy the last value
    ;;; of each segment back to the start of the segment
    (*set temp
      (scan!! pvar 'copy!! :segment-pvar end-segment-pvar
	      :include-self t :direction :backward))
    ;;; combine the copied last elements from temp pvar
    ;;; with the elements in the result pvar
    (*when segment-pvar (*set result temp))
    result))

;;; Sample Output:
;
;> (*defvar segment-pvar (zerop!! (mod!! (self-address!!) 4)))
;SEGMENT-PVAR
;> (*setf (pref segment-pvar 1) T) ; set processor 1 as well
;NIL
;> (ppp (self-address!!) :end 20 :format "~2D ")
; 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 
;> (ppp segment-pvar :end 20 :format " ~:[.~;T~] ")
; T  T  .  .  T  .  .  .  T  .  .  .  T  .  .  .  T  .  .  . 
;> (ppp (segmented-news!! (self-address!!) segment-pvar) :end 20 :format "~2D ")
; 0  3  1  2  7  4  5  6 11  8  9 10 15 12 13 14 19 16 17 18

;;; --- Fit The Third: Using the Router Efficiently

;;; Using *defstruct pvars, sorts, and scans, you can simulate the
;;; kind of manipulations that pref!! goes through in performing a
;;; "many collisions" get, and also see how you can use these operations
;;; to write code that uses the CM router very efficiently

;;; A *defstruct is used to sort data that needs to be sent via the router.
;;; For each processor, this data structure is used to record both
;;; the self-address of the processor and the address from which the
;;; processor needs to fetch a value.

(*defstruct gmc-data
  (fetch-address nil :type fixnum)
  (originating-address nil :type fixnum))

;;; The pref!!-many-collisions algorithm uses the following strategy:
;;;  - Define a gmc-data pvar that records the self-address of
;;;    each pvar and the location from which it needs to fetch a value.
;;;  - Perform a send to sort the elements of the gmc-data pvar
;;;    by the fetch-address of each structure.
;;;  - Create a segment pvar that indicates the segments formed by
;;;    gmc-data elements with the same value in the fetch-address slot
;;;  - Use the segment pvar to do a no-collisions get for only the
;;;    first element in each segment.
;;;  - Use a copy scan to copy the retrieved value to all processors
;;;    in the segment.
;;;  - Do a no-collisions send to transfer the retrieved values to
;;;    the processors that originally requested them.

(defun pref!!-many-collisions (data address)
  (let ((number-of-active-processors (*sum 1)))
    (*let ((sort-data (make-gmc-data!! :fetch-address address
			 :originating-address (self-address!!))))

      ;; Sort the sort-data pvar by fetch-address
      ;; and pack into low-address processors so it is contiguous
      (*pset :no-collisions sort-data sort-data
	     (rank!! (gmc-data-fetch-address!! sort-data) '<=!!))

      ;; Select only those processors that contain data after the sort
      (*all (*when (<!! (self-address!!) number-of-active-processors)

	 ;; Create a segment pvar that contains T for the first element
         ;; of each group of elements with the same fetch-address
	 (*let ((beginning-of-segment
		  (or!! (zerop!! (self-address!!))
			(/=!! (gmc-data-fetch-address!! sort-data)
			      (scan!! (gmc-data-fetch-address!! sort-data)
				      'max!! :include-self nil))))
		fetched-data)

	   ;; Do a get only for the first element of each segment
	   (*when beginning-of-segment
	     (*set fetched-data
		   (pref!! data (gmc-data-fetch-address!! sort-data))))

	   ;; Use scan!! to copy the fetched data to the other elements
	   (*set fetched-data (scan!! fetched-data 'copy!!
				      :segment-pvar beginning-of-segment))

	   ;; Use the originating-address slots to send the data back
	   ;; to the processors that originally requested it
	   (*pset :no-collisions fetched-data fetched-data
		  (gmc-data-originating-address!! sort-data))
	   fetched-data
	   ))))))

;;; Sample Output:
;
;> (*defvar collision-addresses (floor!! (self-address!!) 6))
;COLLISION-ADDRESSES
;> (ppp collision-addresses :end 30)
;0 0 0 0 0 0 1 1 1 1 1 1 2 2 2 2 2 2 3 3 3 3 3 3 4 4 4 4 4 4 
;> (ppp (pref!!-many-collisions (*!! (self-address!!) 3) collision-addresses) :end 30)
;0 0 0 0 0 0 3 3 3 3 3 3 6 6 6 6 6 6 9 9 9 9 9 9 12 12 12 12 12 12 

;;; --- Fit The Fourth: Using the Router Efficiently

;;; This example uses VP sets, router communication and scanning
;;; to define a parallel data-structure with an arbitrary number
;;; of elements per processor, called a "list" pvar on account of
;;; its flexible nature.

;;; This is accompished by defining a
;;; new VP set on the fly that has sufficient processors to hold
;;; all of the elements of the list pvar, and by defining the
;;; list pvar in this new VP set in such a way that it is divided
;;; into segments of elements, one segment for each processor in
;;; the original VP set. In this way, each processor in the original
;;; VP set is assigned a "list" of elements for its use, of any
;;; required length. 

;;; This form defines a flexible Vp Set that is used to contain
;;; the elements of the list pvar. The actual size of this VP set
;;; is determined at run time.

(def-vp-set list-vp-set nil
  :*defvars ((segment-pvar nil) (elements nil)))

;;; This variable contains, for each processor, the send address of
;;; the first "list" element in the list-vp-set
(*defvar *list-start-processor*)

;;; This variable contains, for each processor, the number of elements
;;; assigned to that processor
(*defvar *number-of-elements*)

;;; This macro allocates a list pvar for the duration of a body of code.
;;; It takes two pvar arguments:
;;;   length - the length of the segment of elements that should
;;;            be allocated for each processor
;;;   value  - the value with which those elements should be initialized.

(defmacro allocate-list-pvar (length value &body body)
  `(let* ((total-processors-required (*sum ,length)))
     ;;; Allocate enough processors in list-vp-set to hold the elements
     (allocate-processors-for-vp-set list-vp-set
	(list (max *minimum-size-for-vp-set*
		   (next-power-of-two->= total-processors-required))))
     (*with-vp-set list-vp-set
       (*set segment-pvar nil elements nil))
     ;;; Get send addresses of elements in that list-vp-set that will
     ;;; contain the first element of each segment
     (*let ((*list-start-processor*
	      (scan!! ,length '+!! :include-self nil))
	    (*number-of-elements* ,length))
       ;;; For processors that have requested a non-zero number of
       ;;; elements, send the initial values to the first element of
       ;;; the corresponding segments.
       (*when (plusp!! ,length)
	 (*pset :no-collisions ,value elements *list-start-processor*
		:notify segment-pvar))
       ;;; Use a scan to copy the initial value to all elements in each
       ;;; segment.
       (*with-vp-set list-vp-set
	 (*set elements
	       (scan!! elements 'copy!! :segment-pvar segment-pvar)))
       ;;; Evaluate body forms with list-vp-set defined
       (progn
	 ,@body
	 )
       ;;; Deallocate the list-vp-set so that it can be reused.
       (deallocate-processors-for-vp-set list-vp-set)
       )))

;;; This function displays the contents of the first n lists
(defun print-lists (n)
  (dotimes (i n)
    (format t "~&( ")
    (let ((start-address (pref *list-start-processor* i))
	  (length (pref *number-of-elements* i)))
      (*with-vp-set list-vp-set
	(dotimes (i length)
	  (format t "~D " (pref elements (+ start-address i))))))
    (format t ")")))
						   
;;; This function tests the list-defining macro:

(defun test-lists (n)
  (*let ((lengths (random!! 8)))
    (ppp lengths :end n)
    (allocate-list-pvar lengths (self-address!!)
      (print-lists n))))

;;; Sample output:
;
;> (test-lists 6)
;4 3 5 4 3 2
;( 0 0 0 0 )
;( 1 1 1 )
;( 2 2 2 2 2 )
;( 3 3 3 3 )
;( 4 4 4 )
;( 5 5 )
;NIL
;> (test-lists 6)
;3 7 3 5 4 6
;( 0 0 0 )
;( 1 1 1 1 1 1 1 )
;( 2 2 2 )
;( 3 3 3 3 3 )
;( 4 4 4 4 )
;( 5 5 5 5 5 5 )
;NIL
;> (test-lists 6)
;2 5 5 1 0 4
;( 0 0 )
;( 1 1 1 1 1 )
;( 2 2 2 2 2 )
;( 3 )
;( )
;( 5 5 5 5 )
;NIL
