! t_AMRmpi.F90
! -----------------------------------------------------------------------
! This is free and unencumbered software released into the public domain.
!
! Anyone is free to copy, modify, publish, use, compile, sell, or 
! distribute this software, either in source code form or as a compiled 
! binary, for any purpose, commercial or non-commercial, and by any 
! means.
!
! In jurisdictions that recognize copyright laws, the author or authors 
! of this software dedicate any and all copyright interest in the 
! software to the public domain. We make this dedication for the benefit 
! of the public at large and to the detriment of our heirs and 
! successors. We intend this dedication to be an overt act of 
! relinquishment in perpetuity of all present and future rights to this 
! software under copyright law.
!
! THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
! EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
! MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
! IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 
! OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
! ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
! OTHER DEALINGS IN THE SOFTWARE.
!
! For more information, please refer to http://unlicense.org/
! ----------------------------------------------------------------------

! -------------------------------------------------------------------
! 1. set these definitions in Srend.F90 exactly like this (with the # in column 1):
#define SREND_AMR
#define SREND_VALPHA 512
#define SREND_TILE_MONTAGE 0
!
! 2. uncomment exactly one just below of USE_PARAMESH_AMR or USE_PATCH_AMR
#define USE_PARAMESH_AMR
!#define USE_PATCH_AMR
!
! 3. compile the tile montage routine (needed to combine tiles to single image)
!    gfortran srend_tile_montage.F90 -O3 -o montage
!
! 4. compile then run:
!    mpif90  Srend.F90 -O3 -c
!    gcc sleep.c -c
!    mpif90 t_AMRmpi.F90 Srend.o sleep.o -O3 -o t_AMRmpi
!    mpirun -np 4 ./t_AMRmpi
! - - - - - - - - - - - - - - - - - - - - - 
!   openmp compile flag: -fopenmp
!   shell command before running with openmp, if needed: 
! $ ulimit -s unlimited
!
!   DEBUG Options for compiling:
!   gfortran: -fbacktrace -fbounds-check -fcheck=all -g -lefence 

! ---- set these -----------------------------------------------------
! define voxel dim for entire volume at coarsest level
#define NNN 64

! levels, must be > 1
#define MAX_LEVEL 6

! -------------------------------------------------------------------

!   these are calculated from above
#ifdef USE_PARAMESH_AMR
#define MAX_BLOCKS (7*(MAX_LEVEL-1)+8)
#endif

#ifdef USE_PATCH_AMR
#define MAX_BLOCKS (6*(MAX_LEVEL-1)+1)
#endif

! ********************************************************************
      program t_AMRmpi
#ifdef _OPENMP
      USE OMP_LIB
#endif
      USE srend
      IMPLICIT NONE
      include 'mpif.h'

! structure to hold AMR geometry and data for each block
      type amr_type ! these are the target arrays for rendering
        integer*4 :: XN,YN,ZN,iNX,iNY,iNZ,Vdim        ! AMR geometry
        character*1,dimension(:,:,:),allocatable :: a ! data with boundary of 1
      end type amr_type
      type(amr_type),dimension(1:MAX_BLOCKS) :: fv
! string to pass for output filenames
      character*200,save :: filenames
! timing
      integer*8 it1, it2, itc, total0, total1
      real*4 rc
! flythrough parameter
      real*4, dimension(3),save :: E, E0, V
! AMR data and loop vars
      integer*4 :: jj,i,j,k,tt
      real*4 :: x,y,z,cr
! white wireframe
      integer*4 :: nx,ny,nz, xoff,yoff,zoff, N
! check AMR data
      real*4 :: volume
! MPI
      integer*4 MYer, MYid, MYn
! ccccccccc end variable declarations cccccccccccccccccccccccccccccccc

! init MPI
      call MPI_init(MYer)
      call MPI_comm_rank(MPI_COMM_WORLD, MYid, MYer)
      call MPI_comm_size(MPI_COMM_WORLD, MYn, MYer)
      call MPI_barrier(MPI_COMM_WORLD,MYer) ! all meet here
      print *,'MYid=',MYid,'Ranks=',MYn

      call system_clock(it1,itc)
      rc = 1.0 / itc

      print *,'max_blocks=', MAX_BLOCKS
      call create_AMR_blocks()
      print *,'done creating AMR blocks'
      
! make data needed for this rank, each piece created at its AMR level
! a pretty ball centered at (.5,.5,.5) in [0,1]**3
      volume = 0.0 ! initial
      do jj=1+MYid,MAX_BLOCKS,MYn
          print *,'jj=',jj,'vdim=',fv(jj)%Vdim,'xyz=',fv(jj)%XN,fv(jj)%YN,fv(jj)%ZN
          volume = volume + fv(jj)%XN * fv(jj)%YN * fv(jj)%ZN / (1.0*fv(jj)%Vdim)**3 
      
          allocate( fv(jj)%a(fv(jj)%iNX:fv(jj)%iNX+fv(jj)%XN+1, &
                             fv(jj)%iNY:fv(jj)%iNY+fv(jj)%YN+1, &
                             fv(jj)%iNZ:fv(jj)%iNZ+fv(jj)%ZN+1) )
          do k=fv(jj)%iNZ,fv(jj)%iNZ+fv(jj)%ZN+1
            z = 1. - (2.*k)/fv(jj)%Vdim
            do j=fv(jj)%iNY,fv(jj)%iNY+fv(jj)%YN+1
              y = 1. - (2.*j)/fv(jj)%Vdim
              do i=fv(jj)%iNX,fv(jj)%iNX+fv(jj)%XN+1
                x = 1. - (2.*i)/fv(jj)%Vdim
                cr = 4.0 + min(1.,1./(x*x+y*y+z*z + .1)**4)* & 
                     (124.+124./3.*(sin(1./(x*y+.1))+cos(1./(x+y*z+.1))+sin(1.0/(z*x+y+.1))**2) )
                fv(jj)%a(i,j,k) = char(floor(cr))
              end do
            end do
          end do
          print *,'allocated block',jj,'size=',size( fv(jj)%a) 
          
          xoff = fv(jj)%iNX; nx = fv(jj)%XN
          yoff = fv(jj)%iNY; ny = fv(jj)%YN
          zoff = fv(jj)%iNZ; nz = fv(jj)%ZN
          N = fv(jj)%Vdim
          
! wireframe white on edges of volume in array fv, tests so this is around entire volume  
      if(yoff==0    .AND. zoff==0   ) fv(jj)%a(:    ,0:1  ,0:1  ) = char(0)
      if(yoff==0    .AND. zoff+nz==N) fv(jj)%a(:    ,0:1  ,N:N+1) = char(0)
      if(yoff+ny==N .AND. zoff==0   ) fv(jj)%a(:    ,N:N+1,0:1  ) = char(0)
      if(yoff+ny==N .AND. zoff+nz==N) fv(jj)%a(:    ,N:N+1,N:N+1) = char(0)
      
      if(xoff==0    .AND. zoff==0   ) fv(jj)%a(0:1  ,:    ,0:1  ) = char(0)
      if(xoff+nx==N .AND. zoff==0   ) fv(jj)%a(N:N+1,:    ,0:1  ) = char(0)
      if(xoff==0    .AND. zoff+nz==N) fv(jj)%a(0:1  ,:    ,N:N+1) = char(0)
      if(xoff+nx==N .AND. zoff+nz==N) fv(jj)%a(N:N+1,:    ,N:N+1) = char(0)
      
      if(xoff==0    .AND. yoff==0   ) fv(jj)%a(0:1  ,0:1  ,:    ) = char(0)
      if(xoff+nx==N .AND. yoff==0   ) fv(jj)%a(N:N+1,0:1  ,:    ) = char(0)
      if(xoff==0    .AND. yoff+ny==N) fv(jj)%a(0:1  ,N:N+1,:    ) = char(0)
      if(xoff+nx==N .AND. yoff+ny==N) fv(jj)%a(N:N+1,N:N+1,:    ) = char(0)
          
      end do
      print *,'total volume covered =',volume,'(should be 1.0)'
      
!#ifdef NOTHING
! parameters for run
      E0 = (/0.5,1.3,1.7/)  ! initial eye position
      V = -E0               ! view vector

      call system_clock(it2)
      print *,'Prepped: span=',(it2-it1)*rc
      it1 = it2
      total0 = it2 ! for total walltime


! PPPPPPPPPPP parallel region PPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPPP
! export OMP_NUM_THREADS=4 , or however many threads should be used here      
!$OMP PARALLEL default(shared) private(i,tt)
      do tt= 1,4
      
!$OMP MASTER
          write(filenames,'("r/",I6.6,".ppm,")') tt
          print *,'pass = ',tt
          call system_clock(it1)
          E = E0 * (5.0-tt)/4.0      !move eye through volume 
!$OMP END MASTER

          do i=1+MYid,MAX_BLOCKS,MYn
!sync threads for entry .. particularly after first loop
!$OMP BARRIER
          call srend_render_load( &! rendering multiple blocks
          1,2,                       &! nV, nV_out
          fv(i)%Vdim,                &! voxels in full volume
          E,                         &! Eye position
          V,                         &! View vector
          (/  0.0,  1.0,  0.0/),     &! Up vector
          120.0, 120.0,              &! Alpha-horz, Beta-vert
          0.0,                       &! stereo: EyeRight
          -10.0,10.0,                &! clipping planes perpendicular to axes
          -10.0,10.0,                & 
          -10.0,10.0,                &! x0,x1,y0,y1,z0,z1
          180.0,                     &! farpolar clip
          (/.5,.5,.5/),.5,           &! center(x,y,z) and radius of sphere to clip about
          0.1, 10.0, 1,              &! near clip0,far clip1,nsh
          1,                         &! 0 spherical, 1 perspective
          0,                         &! 0=norm, 1=npole, 2=equator,3=spole
          0.25,                      &! sampling in cell units,(-)for out>in
          fv(i)%a,                           &! data array
          fv(i)%XN,fv(i)%YN,fv(i)%ZN,        &! XN,YN,ZN data array passed in
          1,                                 &! Bd
          fv(i)%iNX,fv(i)%iNY,fv(i)%iNZ,     &! offset
          1024, 1024,                &! rendering Width, Height in pixels
          1,                         &! nR
          (/48/),                    &! cotab_offset(1:nR),
          filenames,                 &! output file name masks
          2,2,                       &! tiles right, tiles down: usually 1,1
          (/0,1,2,3/))                ! target MPI rank    

         end do
!$OMP BARRIER
              
!$OMP MASTER
         call system_clock(it2)
         print *,'MYid=',MYid,' Rendered pass ',tt,' : span=',(it2-it1)*rc
         it1 = it2

         call srend_render_flush(1)

         call system_clock(it2)
         print *,'MYid=',MYid,' Flushed: span=',(it2-it1)*rc
         it1 = it2
         
         call srend_tile_finish( &
         1,                      & ! nV used for rendering
         2)                        ! nV_out, the view index used here (nV_out as used by render calls)
     
         call system_clock(it2)
         print *,'MYid=',MYid,'finished: tt=',tt,'span=',(it2-it1)*rc   
         it1 = it2

         call MPI_barrier(MPI_COMM_WORLD,MYer) ! all meet here     
!$OMP END MASTER

      end do ! tt pass loop
!$OMP END PARALLEL
! pppppppppp end parallel region ppppppppppppppppppppppppppppppppppppp

      call MPI_barrier(MPI_COMM_WORLD,MYer) ! all meet here
      call MPI_finalize(MYer)
      
      call system_clock(it2)
      total1 = it2
      if(MYid == 0) print *,'total runtime (node 0) =', (total1-total0)*rc
      
      STOP ! the program
! ********************************************************************
      contains
      
! create AMR structure -----------------------------------------
      subroutine create_AMR_blocks()
      integer*4 :: nn,inx,iny,inz,vdim,ilevel,ib
#ifdef USE_PARAMESH_AMR
      integer*4 :: i,j,k ! just for paramesh
      
      ib = 0
      do ilevel=1,MAX_LEVEL
        if(ilevel==1) then
          nn = NNN / 2
          inx = 0; iny = 0; inz = 0
          vdim = NNN
        else
          vdim = vdim * 2
          inx = inx*2; iny = iny*2; inz = inz*2
        end if
        
        do k=0,1
        do j=0,1
        do i=0,1
          if( (ilevel==1 .AND. i+j+k<3) .OR. (ilevel>1 .AND. i+j+k>0) .OR. (ilevel==MAX_LEVEL) ) then
            ib = ib + 1
            fv(ib)%XN = nn; fv(ib)%YN = nn; fv(ib)%ZN = nn
            fv(ib)%iNX = inx + i*nn
            fv(ib)%iNY = iny + j*nn
            fv(ib)%iNZ = inz + k*nn
            fv(ib)%vdim = vdim
          end if
        end do
        end do
        end do
        
        if( ilevel==1) then
          inx = nn; iny = nn; inz = nn
        end if
      end do
#endif
#ifdef USE_PATCH_AMR
      ib = 0
      vdim = NNN
      nn = vdim
      inx = 0; iny = 0; inz = 0
      do ilevel=1,MAX_LEVEL-1
        ib = ib+1
          fv(ib)%XN = nn; fv(ib)%YN = nn; fv(ib)%ZN = nn/4
          fv(ib)%iNX = inx; fv(ib)%iNY = iny; fv(ib)%iNZ = inz
          fv(ib)%vdim = vdim
        ib = ib+1
          fv(ib)%XN = nn; fv(ib)%YN = nn; fv(ib)%ZN = nn/4
          fv(ib)%iNX = inx; fv(ib)%iNY = iny; fv(ib)%iNZ = inz + 3*nn/4
          fv(ib)%vdim = vdim
          
        ib = ib+1
          fv(ib)%XN = nn; fv(ib)%YN = nn/4; fv(ib)%ZN = nn/2
          fv(ib)%iNX = inx; fv(ib)%iNY = iny; fv(ib)%iNZ = inz + nn/4
          fv(ib)%vdim = vdim
        ib = ib+1
          fv(ib)%XN = nn; fv(ib)%YN = nn/4; fv(ib)%ZN = nn/2
          fv(ib)%iNX = inx; fv(ib)%iNY = iny + 3*nn/4; fv(ib)%iNZ = inz + nn/4
          fv(ib)%vdim = vdim
          
        ib = ib+1
          fv(ib)%XN = nn/4; fv(ib)%YN = nn/2; fv(ib)%ZN = nn/2
          fv(ib)%iNX = inx; fv(ib)%iNY = iny + nn/4; fv(ib)%iNZ = inz + nn/4
          fv(ib)%vdim = vdim
        ib = ib+1
          fv(ib)%XN = nn/4; fv(ib)%YN = nn/2; fv(ib)%ZN = nn/2
          fv(ib)%iNX = inx + 3*nn/4; fv(ib)%iNY = iny + nn/4; fv(ib)%iNZ = inz + nn/4
          fv(ib)%vdim = vdim
          
        inx = 2*(inx + nn/4); iny = 2*(iny + nn/4); inz = 2*(inz + nn/4)
        nn = NNN
        vdim = vdim*2
      end do
      ib=ib+1
      fv(ib)%XN = nn; fv(ib)%YN = nn; fv(ib)%ZN = nn
      fv(ib)%iNX = inx; fv(ib)%iNY = iny; fv(ib)%iNZ = inz
      fv(ib)%vdim = vdim
#endif
      end subroutine create_AMR_blocks

      end program t_AMRmpi! program main
! ********************************************************************

