!===============================================================================
! Copyright 2022 Intel Corporation.
!
! This software and the related documents are Intel copyrighted  materials,  and
! your use of  them is  governed by the  express license  under which  they were
! provided to you (License).  Unless the License provides otherwise, you may not
! use, modify, copy, publish, distribute,  disclose or transmit this software or
! the related documents without Intel's prior written permission.
!
! This software and the related documents  are provided as  is,  with no express
! or implied  warranties,  other  than those  that are  expressly stated  in the
! License.
!===============================================================================

!  Content:
!      Intel(R) oneAPI Math Kernel Library (oneMKL)
!      FORTRAN OpenMP offload examples for DIMATCOPY_BATCH_STRIDED
!*******************************************************************************

include "mkl_omp_offload.f90"
include "common_blas.f90"

program dimatcopy_batch_strided_example
#if defined(MKL_ILP64)
use onemkl_blas_omp_offload_ilp64
#else
use onemkl_blas_omp_offload_lp64
#endif
use common_blas  

implicit none

character*1 :: ordering = 'C', trans = 'T'
integer :: row = 5, col = 3, batch_size = 10
integer :: lda, ldb, stride, passed
double precision :: alpha = 1.2
double precision,allocatable :: ab(:,:), ab_ref(:,:)

lda = row
ldb = col
stride = MAX(lda,ldb) * MAX(row,col)

allocate(ab(stride,batch_size))
allocate(ab_ref(stride,batch_size))

if (.not. allocated(ab)) goto 998
if (.not. allocated(ab_ref)) then
   deallocate(ab)
   goto 998
end if

! initialize matrices
call dinit_matrix('N', stride, batch_size, stride, ab)
call dcopy_matrix(stride, batch_size, stride, ab, ab_ref)

! Calling dimatcopy_batch_strided on the CPU
call mkl_dimatcopy_batch_strided(ordering, trans, row, col, alpha, ab_ref, lda, ldb, stride, batch_size)

! Calling dimatcopy_batch_strided on the GPU
!$omp target data map(ab)
#if defined(ONEMKL_USE_OPENMP_VERSION) && (ONEMKL_USE_OPENMP_VERSION >= 202011)
!$omp dispatch
#else
!$omp target variant dispatch device(0) use_device_ptr(ab)
#endif
call mkl_dimatcopy_batch_strided(ordering, trans, row, col, alpha, ab, lda, ldb, stride, batch_size)
#if !defined(ONEMKL_USE_OPENMP_VERSION) || (ONEMKL_USE_OPENMP_VERSION < 202011)
!$omp end target variant dispatch
#endif
!$omp end target data

! Compare result of CPU and GPU implementation

passed = dcheck_matrix(stride, batch_size, stride, ab, ab_ref)

deallocate(ab)
deallocate(ab_ref)

if (passed.ne.0) then
   goto 999
else
   print *, "PASSED"
end if


stop

998 print *, 'Error: cannot allocate matrices' 
999 stop 1
end program
