/****************************************************************************
 *  Levenberg - Marquardt non-linear minimization algorithm
 *  Copyright (C) 2004  Manolis Lourakis (lourakis@ics.forth.gr)
 *  Institute of Computer Science, Foundation for Research & Technology - Hellas
 *  Heraklion, Crete, Greece.
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 ****************************************************************************/

/***************************************************************************** 
 * Levenberg-Marquardt nonlinear minimization. The same core code is used with
 * appropriate #defines to derive single and double precision versions, see
 * also lm_core.c
 *****************************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <float.h>

#include "lm.h"

#define EPSILON       1E-12
#define ONE_THIRD     0.3333333334 /* 1.0/3.0 */

#define RCAT(a, b)    #a b
#define XCAT(a, b)    RCAT(a, b) /* force substitution */

#define BLOCKSZ       32 /* block size for cache-friendly matrix-matrix multiply. It should be
                          * such that BLOCKSZ^2*sizeof(LM_REAL) is smaller than the CPU (L1)
                          * data cache size. Notice that a value of 32 when LM_REAL=double assumes
                          * an 8Kb L1 data cache (32*32*8=8K). This is a concervative choice since
                          * newer Pentium 4s have a L1 data cache of size 16K, capable of holding
                          * up to 45x45 double blocks.
                          */
#define BLOCKSZ_SQ    (BLOCKSZ)*(BLOCKSZ)

#ifdef _MSC_VER
#define inline __inline /* MSVC */
#elif !defined(__GNUC__)
#define inline /* other than MSVC, GCC: define empty */
#endif


/* single precision (float) definitions */
#define LM_REAL float
#define LEVMAR_DER slevmar_der
#define LEVMAR_DIF slevmar_dif
#define FDIF_JAC_APPROX sfdif_jac_approx
#define TRANS_MAT_MAT_MULT strans_mat_mat_mult

#ifdef HAVE_LAPACK
#define AX_EQ_B_LU sAx_eq_b_LU
#define AX_EQ_B_CHOL sAx_eq_b_Chol
#define AX_EQ_B_QR sAx_eq_b_QR
#define AX_EQ_B_QRLS sAx_eq_b_QRLS
#define AX_EQ_B_SVD sAx_eq_b_SVD
#else
#define AX_EQ_B_LU sAx_eq_b_LU_noLapack
#endif /* HAVE_LAPACK */

#define LM_REAL_MAX FLT_MAX
#define LM_REAL_MIN FLT_MIN
#define SUBCNST(x) x##F
#define CNST(x) SUBCNST(x) /* force substitution */

#include "lm_core.c" /* read in core code */

#undef LM_REAL
#undef LEVMAR_DER
#undef LEVMAR_DIF
#undef FDIF_JAC_APPROX
#undef TRANS_MAT_MAT_MULT
#undef AX_EQ_B_LU
#undef AX_EQ_B_CHOL
#undef AX_EQ_B_QR
#undef AX_EQ_B_QRLS
#undef AX_EQ_B_SVD
#undef LM_REAL_MAX
#undef LM_REAL_MIN
#undef SUBCNST
#undef CNST

/* double precision definitions */
#define LM_REAL double
#define LEVMAR_DER dlevmar_der
#define LEVMAR_DIF dlevmar_dif
#define FDIF_JAC_APPROX dfdif_jac_approx
#define TRANS_MAT_MAT_MULT dtrans_mat_mat_mult

#ifdef HAVE_LAPACK
#define AX_EQ_B_LU dAx_eq_b_LU
#define AX_EQ_B_CHOL dAx_eq_b_Chol
#define AX_EQ_B_QR dAx_eq_b_QR
#define AX_EQ_B_QRLS dAx_eq_b_QRLS
#define AX_EQ_B_SVD dAx_eq_b_SVD
#else
#define AX_EQ_B_LU dAx_eq_b_LU_noLapack
#endif /* HAVE_LAPACK */

#define LM_REAL_MAX DBL_MAX
#define LM_REAL_MIN DBL_MIN
#define CNST(x) x

#include "lm_core.c" /* read in core code */

#undef LM_REAL
#undef LEVMAR_DER
#undef LEVMAR_DIF
#undef FDIF_JAC_APPROX
#undef TRANS_MAT_MAT_MULT
#undef AX_EQ_B_LU
#undef AX_EQ_B_CHOL
#undef AX_EQ_B_QR
#undef AX_EQ_B_QRLS
#undef AX_EQ_B_SVD
#undef LM_REAL_MAX
#undef LM_REAL_MIN
#undef CNST
