/* graph_sampler.c

   Written by Frederic Bois
   22 June 2014

   Copyright (c) 2014 Frederic Bois.

   This code is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   See the GNU General Public License at <http://www.gnu.org/licenses/> 

   -- Revisions -----
     Logfile:  %F%
    Revision:  %I%
        Date:  %G%
     Modtime:  %U%
      Author:  @a
   -- SCCS  ---------

   Samples either Bayesian networks or general directed graphs by MCMC, 
   given specified priors and a likelihood

   Priors are Bernoulli on edges, power on degree and
   beta-binomial on the proportion of E loops (A -> B -> C -> A) over the
   sum of E loops and F loops (A -> B <- C -> A)

   Note on indices: for a square matrix (N x N) with i indexing lines and
   j indexing columns, i and j starting at zero,
   positions z (starting at zero) corresponding to the first line are given by:
     z = i + j * N
   positions z (starting at zero) corresponding to the first column are:
     z = j + i * N
   given a position z (starting at 0) in the flattened matrix (flattened as
   (column 1, column 2...), the corresponding row i (starting at 0) is:
     i = z % N
   the corresponding column j (starting from zero) is:
     j = z / N ("/" being an integer division)
*/


/* ----------------------------------------------------------------------------
   Inclusions
*/

#include "graph_sampler.h"


/* ----------------------------------------------------------------------------
   Global definitions, private
*/

// core computational variables
int    *nParents;           // number of parents for each node
int    **index_parents;     // the list of current parents for each node

// stuff for basic Bernoulli prior
double **log_hyper_pB;      // point to prior edge log probability matrix
double **log_hyper_qB;      // point to prior edge 1-log probability matrix
BOOL   *bAllowed_parents;   // Boolean: are parents allowed for each node

// stuff for priors handling
double diff_logprior = 0;

// stuff for likelihood handling and computations
double diff_loglikelihood = 0;
double *current_ll_node = NULL;
double alpha_data    = 1.5;    // prior Gamma shape for data precision
double beta_data     = 1000;   // prior Gamma rate for data precision

// stuff for posterior handling
double diff_logposterior = 0;


/* ----------------------------------------------------------------------------
   AnnounceProgram

   Print a screen explanation of what we are doing.
*/
void AnnounceProgram (void)
{

  printf("\n");
  printf("*******************************\n");
  printf(">>> This is a graph sampler <<<\n");
  printf("*******************************\n");
  printf("\n");
    
} /* AnnounceProgram */


/* ----------------------------------------------------------------------------
   CleanupMemory

   Releases (some!) pointers.
*/
void CleanupMemory (void)
{

  if (current_degrees)   free(current_degrees);
  
  if (current_ll_node)   free(current_ll_node);

  if (pdWorkMatrixSizeN) free(pdWorkMatrixSizeN);

} /* CleanupMemory */


/* ----------------------------------------------------------------------------
   GetCmdLineArgs

   Retrieves filenames from the command line arguments passed to
   the program.

   The command line syntax is:

     graph_sampler [input-file [output-files prefix]]

   Missing names are replaced by defaults .
*/
void GetCmdLineArgs (int cArg, char *const *rgszArg, char **pszFileIn, 
                     char **pszPrefixOut)
{
  *pszFileIn = *pszPrefixOut = (char *) NULL;

  switch (cArg) { // filenames
    case 3: // output and input file specificed
      *pszPrefixOut = rgszArg[2];
      if (strlen(rgszArg[2]) > MAXFILENAME) {
        printf("Error: output file name must be less that %d "
               "caracter long - Exiting.\n\n", MAXFILENAME);
        exit(0);
      }
      // Fall through!

    case 2: // input file specified
      *pszFileIn = rgszArg[1];
      break;

    case 1: // no file names specified
      *pszFileIn = vszFileInDefault;
      *pszPrefixOut = NULL;
      break;

    default:
      /* ShowHelp ("Usage"); */ /* disabled for now */
      exit (-1);
      break;
  } // switch

} /* GetCmdLineArgs */


/* ----------------------------------------------------------------------------
   InitArrays

   Initialize various arrays if they have not been initialized in input.
*/
void InitArrays (void)
{
  int i, j, k;

  // if current_adj is not defined it is initialized here to 0
  if (!current_adj) {
    current_adj = InitiMatrix(nNodes, nNodes); 
    printf ("Setting current_adj to default value.\n");

    for (i = 0; i < nNodes; i++)
      for (j = 0; j < nNodes; j++)
        current_adj[i][j] = 0;
  }

  // for dynamic BN we need extra current adjacencies
  if (bDBN) {

    // init a set of n = nData adjacency matrices
    if (!(adj = (int ***) malloc(nData * sizeof(int **))))
      lexerr ("out of memory in InitArrays");

    for (i = 0; i < nData; i++)
      adj[i] = InitiMatrix(nNodes, nNodes);

    // fill them with the (same) input adjacency matrix
    for (i = 0; i < nData; i++)
      for (j = 0; j < nNodes; j++)
        for (k = 0; k < nNodes; k++)
          adj[i][j][k] = current_adj[j][k];

    // set the current adjacency matrix to adj[0]
    current_adj = adj[0];
  }

  if (bDBN) {
    // dynamic Bayesian networks are not yet fully implemented
    lexerr ("dynamic BNs not yet implemented");
  }

  // initialize space for the best (maximum probability) adjacency matrix
  if (bsave_best_graph) {
    best_adj = InitiMatrix(nNodes, nNodes);
    for (i = 0; i < nNodes; i++)
      memcpy(best_adj[i], current_adj[i], nNodes * sizeof(int));
  }

  // if hyper_pB is not defined it is initialized here to 0.5 for
  // all elements (except the diagonal for BNs)
  if (!hyper_pB) {
    hyper_pB = InitdMatrix(nNodes, nNodes); 
    printf ("Setting hyper_pB to default value.\n");

    for (i = 0; i < nNodes; i++) {
      for (j = 0; j < nNodes; j++) {
        if ((i == j) && (bBN))
          hyper_pB[i][j] = 0.0;
        else
          hyper_pB[i][j] = 0.5;
      }
    }
  }
  log_hyper_pB = InitdMatrix(nNodes, nNodes);
  log_hyper_qB = InitdMatrix(nNodes, nNodes);

  for (i = 0; i < nNodes; i++) { // ith line
    for (j = 0; j < nNodes; j++) { // jth column
      log_hyper_pB[i][j] = log(hyper_pB[i][j]);
      log_hyper_qB[i][j] = log(1 - hyper_pB[i][j]);
    }
  }

  // initialize matrix edge_requirements to zero by default
  if (bPriorConcordance && !edge_requirements) {
    edge_requirements = InitiMatrix(nNodes, nNodes);
    printf ("Setting edge_requirements to default value.\n");

    for (i = 0; i < nNodes; i++) {
      for (j = 0; j < nNodes; j++) {
        if ((i == j) && (bBN))
          edge_requirements[i][j] = -1;
        else
          edge_requirements[i][j] = 0;
      }
    }
  }

  // initialize a table of the degree counts and a table for the
  // cumul of those
  if (bsave_the_degree_counts) {
    degree_count    = InitdVector(nNodes + nNodes);
    cumdegree_count = InitdVector(nNodes + nNodes);
    for (i = 0; i < (nNodes+nNodes); i++) {
      degree_count[i] = 0;
      cumdegree_count[i] = 0;
    }
  }

  // initialize motifs cumulants
  if (bsave_the_motifs_probabilies) {
    cum_nEloops = 0;
    cum_nFloops = 0;
  }

  /* initialize a Boolean array recording which nodes have been assigned
     a zero probability of having parents (a column of zero in the binomial
     prior matrix). Those nodes are typically used as special "control" 
     nodes for which the likelihood will not be computed */
  if (bBN) {
    bAllowed_parents = InitiVector(nNodes);
    for (j = 0; j < nNodes; j++) { // for each column (node) 
      i = 0;
      while ((i < nNodes) && (hyper_pB[i][j] == 0)) {
        i++;
      }
      bAllowed_parents[j] = (i == nNodes ? FALSE : TRUE);
    } // end for
  } // end if

  /* initialize a table of parents for each node
     (this is pricy in storage, would be better with pointers) */
  nParents = InitiVector(nNodes);
  index_parents = InitiMatrix(nNodes, nNodes);

  for (j = 0; j < nNodes; j++) {
    nParents[j] = 0;
    for (i = 0; i < nNodes; i++)
      if (current_adj[i][j]) { // we have a parent, store it
        index_parents[j][nParents[j]] = i;
        nParents[j] += 1;
      }
    if ((bZellner) && (nParents[j] >= nData) && nData > 0) { 
      printf ("Error: node %d has more parents than data: "
              "conflict with Zellner's score - Exiting.\n\n", j);
      exit(0);
    }
  }

  // initialize the list of node for fast topological sorting
  // beware: this assumes that nParents is initialized as above
  if ((bBN) && (!IsDAG_w_topo_list_incremental(nNodes, current_adj)))
    lexerr ("initial graph is not a DAG");
  
  // initialize an edge summation matrix
  if (bsave_the_edge_probabilies) {
    mat_sum = InitdMatrix(nNodes, nNodes);
    if (nBurnin == 0)
      for (i = 0; i < nNodes; i++)  // ith line
        for (j = 0; j < nNodes; j++)  // jth column
          mat_sum[i][j] = current_adj[i][j];
  }

  /* initialize a working matrix of nNodes by nNodes,
     will be used in InvertMatrix for example */
  if (bData)
    pdWorkMatrixSizeN = InitdMatrix(nNodes, nNodes);

} /* InitArrays */


/* ----------------------------------------------------------------------------
   Loglikelihood_diff
   
   Difference in log-likelihood for a node, given a change in its parenthood.
   Inputs:
     parent : the ID # of the parent node
     child: the ID # of the node for which we compute the likelihood
     diff: -1 for deletion, +1 for addition, of the parent-child link
     pData : a data structure
   Output:
     logLdiff: a pointer to the difference of the loglikelihood after and
               before change. 
               
   Beware: this changes globally the number and index of the parents of "child".
*/
void Loglikelihood_diff (int parent, int child, int diff, 
                         double **pData, double *logLdiff)
{
  BOOL bFound;
  int  i, iTmp;
  
  // change parenthood; beware of indices...
  if (diff < 0) { 
    // remove parent
    if (parent == index_parents[child][nParents[child]-1]) { 
      // last, just decrease the count of parents
      nParents[child] -= 1;
    }
    else {
      /* not last: first exchange with the last parent  
         then decrease the count of parents. Note that this shuffles 
         the order of parents in list */
      bFound = FALSE;
      i = -1;
      do {
        i++;
        bFound = (parent == index_parents[child][i]);
      } while (!bFound);
      iTmp = index_parents[child][i];
      index_parents[child][i] = index_parents[child][nParents[child]-1];
      index_parents[child][nParents[child]-1] = iTmp;
      nParents[child] -= 1;
    }
  }
  else {
    // add parent
    index_parents[child][nParents[child]] = parent;
    nParents[child] += 1;
  }

  if (bZellner)
    *logLdiff = ZLoglikelihood_node(child, pData) - current_ll_node[child];
  else {
    if (bDirichlet)
      *logLdiff = DLoglikelihood_node(child, pData) - current_ll_node[child];
    else 
      *logLdiff = GLoglikelihood_node(child, pData) - current_ll_node[child];
  }
  
} /* Loglikelihood_diff */


/* ----------------------------------------------------------------------------
   Loglikelihood_full
   
   Full log-likelihood for a given graph (specified by its adjacency matrix).
   The arrays nParents and index_parents are supposed to be meaningfully
   initialized using the starting or current adjacency matrix.
   The likelihood of "Control" nodes, for which bAllowed_parents is FALSE,
   is nor computed (those nodes are jumped over). However, they are still used
   as parents of their children and then take as values the data they have been
   given in the inut file.
   Inputs:
     N : number of nodes
     adjacency : a pointer to the adjacency matrix
     pData : a data structure
   The adjacency matrix on which it operates is global and actually not
   used here, but below (in the individual nodes likelihood calculations).
*/
double Loglikelihood_full (int N, double **pData)
{
  double cumLL = 0;
  int    i;

  if (!current_ll_node) 
    current_ll_node = InitdVector(N);

  // sum up the log-likelihoods of each node
  for (i = 0; i < N; i++) {
    if (bAllowed_parents[i]) {
     if (bZellner)
       current_ll_node[i] = ZLoglikelihood_node (i, pData);
     else {
       if (bDirichlet)
         current_ll_node[i] = DLoglikelihood_node (i, pData);
       else
         current_ll_node[i] = GLoglikelihood_node (i, pData);
     }
     
     cumLL += current_ll_node[i];
    }
  }

  return (cumLL);

} /* Loglikelihood_full */


/* ----------------------------------------------------------------------------
   GLoglikelihood_node
   
   Computes the log-likelihood of the data for one node, given a normal-gamma
   prior and the values of its parents (taken as regressors X)
   The prior hyperparameter are (see Bernardo's book appendix)
    mean of regression parameters : all null
    precision matrix of the regression params joint distribution: 1 * I
    alpha (shape, named alpha_data) of the gamma distribution for lambda 
     (precision of measurements): 1.5
    beta (rate, beta_data) of the same gamma distribution: 1000
   Inputs:
    node: node number
    pData: data array
*/
double GLoglikelihood_node (int node, double **pData)
{
  int i, j, k;
  static double **pdM1 = NULL;
  static double **pdM2 = NULL;

  double df, LL;

  // the vector mu of prior data expectations is null, so forget it 

  // we need the data precision matrix (1 - x inv(x'x + n0) x') alpha / beta
  // see Bernardo, appendix
  // form t(X) * X + n_zero, for X: the design matrix */
  // X is implicit given the parents of the node considered, otherwise it
  // would have a column of 1 and the data of the parents as other columns.
  // Remember that n_zero is set to the identify matrix */

  if (!pdM1) { // stupidly large arrays
    int dim = (nNodes > nData ? nNodes : nData);
    pdM1 = InitdMatrix(dim, dim); 
    pdM2 = InitdMatrix(dim, dim);  
  }

  // nzero is taken for now to be the identity matrix
  
  // do t(X) * X + n_zero  

  pdM1[0][0] = 1 + nData; // n_zero element + n * 1
  
  for (j = 0; j < nParents[node]; j++) { // take care of the line of t(X) * X
    pdM1[0][j+1] = 0; // set it to n_zero off diagonal element
    for (k = 0; k < nData; k++) {
      pdM1[0][j+1] += pData[index_parents[node][j]][k];
    }
    pdM1[j+1][0] = pdM1[0][j+1];
  }
	
  for (i = 0; i < nParents[node]; i++) {
    for (j = i; j < nParents[node]; j++) {
      if (i == j)
        pdM1[i+1][j+1] = 1; // set it to n_zero diagonal
      else
        pdM1[i+1][j+1] = 0;   // set it to n_zero off diagonal
      for (k = 0; k < nData; k++) {
        pdM1[i+1][j+1] += pData[index_parents[node][i]][k] * 
                          pData[index_parents[node][j]][k];
      }
      pdM1[j+1][i+1] = pdM1[i+1][j+1];
    }
  }

  // invert t(X) * X + n_zero, that is pdM1
  if (nParents[node] == 0) {
    pdM1[0][0] = 1 / pdM1[0][0];
  }
  else 
    InvertMatrix(pdM1, 1+nParents[node]);

  // premultiply by X, store the result in pdM2
  for (i = 0; i < nData; i++) {
    for (j = 0; j < nParents[node]+1; j++) {
      pdM2[i][j] = pdM1[0][j]; // no need to multiply by 1
      for (k = 1; k < nParents[node]+1; k++) {
        pdM2[i][j] += pData[index_parents[node][k-1]][i] *
                      pdM1[k][j];
      }
    }
  }

  /* postmutiply by t(X), subtract from I and multiply by alpha / beta;
     use pdM1 to store the result */
  for (i = 0; i < nData; i++) {
    for (j = i; j < nData; j++) {
      if (i == j) {
        pdM1[i][j] = 1 - pdM2[i][0]; // no need to multiply by 1
      }
      else {
        pdM1[i][j] =   - pdM2[i][0]; // no need to multiply by 1
      }
      for (k = 1; k < nParents[node]+1; k++) {
        pdM1[i][j] -= pdM2[i][k] *
                      pData[index_parents[node][k-1]][j];
      }
      pdM1[i][j] *= alpha_data / beta_data;
      pdM1[j][i] = pdM1[i][j];
    }
  }

  // degrees of freedom
  df = 2 * alpha_data;

  LL = LnMultivariateT(pData[node], nData, pdM1, df);
  
  return(LL);
 
} /* GLoglikelihood_node */


/* ----------------------------------------------------------------------------
   This is the famous heapsort
*/
void sort(long n, double *vect)
{
  int i, j, k, l;
  double temp;

  k = (n >> 1) + 1;
  l = n;
  for (;;) {
    if (k > 1)
      temp = vect[--k - 1];
    else {
      temp = vect[l-1];
      vect[l-1] = vect[0];
      if (--l == 0) {
        vect[0] = temp;
        return;
      }
    }
    i = k;
    j = k << 1;
    while (j <= l) {
      if (j < l && vect[j-1] < vect[j]) ++j;
      if (temp < vect[j-1]) {
        vect[i-1] = vect[j-1];
        j += (i = j);
      }
      else j = l+1;
    }
    vect[i-1] = temp;
  }
} /* sort */


/* ----------------------------------------------------------------------------
   DLoglikelihood_node
   
   Computes the log-likelihood of the data for one node, given a Dirichlet
   prior and multinomial data.
   Inputs:
    node: node number
    pData: data array, with levels coded 0, 1, ...

   For a good explanation see:
   - Laskey and Myers, 2003, Machine Learning, 50:175-196.
   For some more detail see:
   - Heckerman et al., 1994, in Proceedings of Tenth Conference on Uncertainty
     in Artificial Intelligence, Seattle, WA, p. 293-301. Morgan Kanfmann.
   - Heckerman et al., 1995, Machine Learning, 20, 197-243
*/
double DLoglikelihood_node (int node, double **pData)
{
  int    i, j, nConfigs;
  
  double LL, N_prime_ij, N_prime_ijk;
  
  static int    *piCardConfig      = NULL;
  static double *pdCodesP          = NULL;
  static double *pdCodesPE         = NULL;
  static double *pdCumConfigNumber = NULL;
  static double *pdIndexConfig     = NULL;

  if (!pdCodesPE) {
    pdCodesP          = InitdVector(nData);
    pdCodesPE         = InitdVector(nData);
    pdIndexConfig     = InitdVector(nData);
    piCardConfig      = InitiVector(nData);
    pdCumConfigNumber = InitdVector(nNodes);
  }

  // Dirichlet prior sample size of any given configuration of parents values.
  // case no parents or uniform:
  N_prime_ijk = 1.0;

  // a further possibility, if there are parents, is to set it at 
  // 1 / number of configurations of parents = 1 / prod_(pDataLevels[parents]).
  // That should penalize higher number of parents
#ifdef NDEF
  for (i = 0; i < nParents[node]; i++)
    N_prime_ijk /= (double) pDataLevels[index_parents[node][i]]; 
    // in any case, that calculation can be omitted if pdCumConfigNumber
    // is assigned to one, below
#endif
  
  // marginal prior sample size on node: pDataLevels[node] * N_prime_ijk.
  // the actual detailed calculation is the sum from 1 to pDataLevels[node] 
  // of the prior sample sizes for each configuration of parents
  N_prime_ij = N_prime_ijk * pDataLevels[node];
  
  // cumulated products of levels for configurations encoding
  pdCumConfigNumber[0] = pDataLevels[node];
  for (i = 0; i < nParents[node]; i++)
    pdCumConfigNumber[i+1] = pdCumConfigNumber[i] * 
                             pDataLevels[index_parents[node][i]];

  // encoding of node and parents configurations:
  for (i = 0; i < nData; i++) {
    pdCodesPE[i] = pData[node][i];
    for (j = 0; j < nParents[node]; j++)
      pdCodesPE[i] += pData[index_parents[node][j]][i] * pdCumConfigNumber[j];
  }

  // form the codes of just the parents configurations to form the marginals
  // do this before sorting pdCodesPE!
  // sort the parents configurations if needed
  if (nParents[node] > 0) {
    for (i = 0; i < nData; i++)
      pdCodesP[i] = pdCodesPE[i] - pData[node][i];
    sort(nData, pdCodesP);
  }

  // sort the various node and parents configurations
  sort(nData, pdCodesPE);

  // count (tabulate) the nConfigs unique node and parents configurations 
  j = 0;
  pdIndexConfig[j] = pdCodesPE[0];
  piCardConfig[j] = 1;
  for (i = 1; i < nData; i++) {
    if (pdCodesPE[i] == pdIndexConfig[j])
      piCardConfig[j]++;
    else {
      j++;
      pdIndexConfig[j] = pdCodesPE[i];
      piCardConfig[j] = 1;
    }      
  }
  nConfigs = j + 1;
  
  LL = 0;

  // term for updated counts
  for (i = 0; i < nConfigs; i++)
    LL += LnGamma(N_prime_ijk + piCardConfig[i]);

  // term for prior, saving time if LnGamma is zero
  if ((N_prime_ijk != 1) && (N_prime_ijk != 2)) 
    LL -= nConfigs * LnGamma(N_prime_ijk);
  
  // now deal with the marginal terms:
  // count (tabulate) the nConfigs unique parents configurations 
  if (nParents[node] == 0) {
    piCardConfig[0] = nData;
    nConfigs = 1;
  }
  else {
    j = 0;
    pdIndexConfig[j] = pdCodesP[0];
    piCardConfig[j] = 1;
    for (i = 1; i < nData; i++) {
      if (pdCodesP[i] == pdIndexConfig[j])
        piCardConfig[j]++;
      else {
        j++;
        pdIndexConfig[j] = pdCodesP[i];
        piCardConfig[j] = 1;
      }      
    }
    nConfigs = j + 1;
  }

  // term for updated marginal counts
  for (i = 0; i < nConfigs; i++)
    LL -= LnGamma(N_prime_ij + piCardConfig[i]);

  // term for marginal prior, saving time if LnGamma is zero
  if ((N_prime_ij != 1) && (N_prime_ij != 2)) 
    LL += nConfigs * LnGamma(N_prime_ij);

  return(LL);
 
} /* DLoglikelihood_node */


/* ----------------------------------------------------------------------------
   ZLoglikelihood_node
   
   Computes the log-likelihood of the data for one node, given a Zellner
   prior and the values of its parents (taken as regressors X)
   The Zellner prior is improper and cannot be computed if there is the same
   number of or more parents than data points for the node considered.
   Inputs:
    node: node number
    pData: data array
    
   Computation proceeds by forming
   mx = Y' * Y - g_z/(g_z + 1) * (Y' * X) * inv((X' * X)) * (X' * Y)
   where g_z is a tuning parameter
   and then
   Loglikelihood = -(eta + 1)/2 * log(1 + n) - n/2 * log(mx)
   were Y are the data for the node considered and X the design matrix
   (a column of 1 for the constant term and a column of data for each parent,
   the data of the parents being taken as regressors), n is the number of data
   points for node, eta is the number of parents of the node considered.
*/
double ZLoglikelihood_node (int node, double **pData)
{
  int i, j, k;
  static double **pdM1 = NULL;
  static double *pdV1  = NULL;
  static double *pdV2  = NULL;
  double mx; 
  double LL;

  if (!pdM1) { // stupidly large arrays
    int dim = (nNodes > nData ? nNodes : nData); // the largest
    pdM1 = InitdMatrix(dim, dim); 

    dim = (nNodes > nData ? nData : nNodes); // the smallest
    pdV1 = InitdVector(dim);  
    pdV2 = InitdVector(dim);  
  }

  // start with mx = Y' * Y
  mx = 0;
  for (i = 0; i < nData; i++) {
    mx += pow(pData[node][i], 2);
  }

  // do Y' * X
  
  // all elements of the first column of X are at 1
  pdV1[0] = pData[node][0];
  for (i = 1; i < nData; i++) {
    pdV1[0] += pData[node][i];
  }
  for (j = 0; j < nParents[node]; j++) {
    pdV1[j+1] = pData[node][0] * pData[index_parents[node][j]][0];
    for (i = 1; i < nData; i++) {
      pdV1[j+1] += pData[node][i] * pData[index_parents[node][j]][i];
    }
  }

  // do X' * X 

  pdM1[0][0] = nData; // n * 1
  
  for (j = 0; j < nParents[node]; j++) { // take care of the line of t(X) * X
    pdM1[0][j+1] = 0;
    for (k = 0; k < nData; k++) {
      pdM1[0][j+1] += pData[index_parents[node][j]][k];
    }
    pdM1[j+1][0] = pdM1[0][j+1];
  }
	
  for (i = 0; i < nParents[node]; i++) {
    for (j = i; j < nParents[node]; j++) {
      pdM1[i+1][j+1] = 0;
      for (k = 0; k < nData; k++) {
        pdM1[i+1][j+1] += pData[index_parents[node][i]][k] * 
                          pData[index_parents[node][j]][k];
      }
      pdM1[j+1][i+1] = pdM1[i+1][j+1];
    }
  }

  // invert X' * X, that is: invert pdM1
  if (nParents[node] == 0) {
    pdM1[0][0] = 1 / pdM1[0][0];
  }
  else 
    InvertMatrix(pdM1, 1+nParents[node]);

  // do (Y' * X) * inv((X' * X)), that is pdV1 * pdM1 
  
  for (j = 0; j <= nParents[node]; j++) {
    pdV2[j] = pdV1[0] * pdM1[0][j];
    for (i = 1; i <= nParents[node]; i++) {
      pdV2[j] += pdV1[i] * pdM1[j][i];
    }
  }

  // do (Y' * X) * inv((X' * X)) * (X' * Y), that is pdV2 * t(pdV1) 
  
  pdV2[0] = pdV2[0] * pdV1[0];
  for (i = 1; i <= nParents[node]; i++) {
    pdV2[0] += pdV2[i] * pdV1[i];
  }

  // finish mx as mx + pdV2 * t(pdV1) 
  mx = mx - gamma_zellner / (gamma_zellner + 1) * pdV2[0];
  
  LL = 0.5 * (-nParents[node] - 1) * log(1 + nData) - 0.5 * nData * log(mx);

  return(LL);
 
} /* ZLoglikelihood_node */


/* ----------------------------------------------------------------------------
   Logprior_diff
   Computes the difference in density, according to
   2 priors: (eventually) degree and (eventually) motif
   Inputs:
    adjacency_current:  adjacency of the current graph
    parent_node
    child_node
    diff: the edge change proposed
     -1: deletion
     +1: creation
   Outputs:
    *logPdiff: total CHANGE in prior log-density.
*/
void Logprior_diff (int **adjacency_current, int parent_node, int child_node,
                    int diff, double *logPdiff)
{
  
  if (bPriorConcordance)
    *logPdiff = Logprior_diff_concordance(parent_node, child_node, diff);
  else
    *logPdiff = 0;

  if (bPriorDegreeNode)
    *logPdiff += Logprior_diff_degree(parent_node, child_node, diff);

  if (bPriorMotif || bsave_the_motifs_probabilies) {
    UpdateCountTriangles(adjacency_current, parent_node, child_node, diff,
                         &diff_nEloops, &diff_nFloops);
  }
  
  if (bPriorMotif) {
    proposd_motif_prior = LnBB(current_nEloops + diff_nEloops,
                               current_nEloops + current_nFloops +
                               diff_nEloops + diff_nFloops,
                               alpha_motif, beta_motif);

    *logPdiff += proposd_motif_prior - current_motif_prior;
  }

} /* Logprior_diff */


/* ----------------------------------------------------------------------------
   Logprior_diff_bernoulli
   Computes the difference in density, according to
   the beta-binomial (Bernoulli) prior.
   Inputs:
    parent_node
    child_node
    diff: the edge change proposed (DO NOT CALL IT WITH DIFF == 0)
     -1: deletion
     +1: creation
   Returns:
    CHANGE in prior log-density.
*/
double Logprior_diff_bernoulli (int parent_node, int child_node, int diff)
{
  double logPdiff;
  
  // prior change under Bernoulli
  if (diff == 1)
    logPdiff = log_hyper_pB[parent_node][child_node] -
               log_hyper_qB[parent_node][child_node];
  else
    logPdiff = log_hyper_qB[parent_node][child_node] -
               log_hyper_pB[parent_node][child_node];

  return(logPdiff);

} /* Logprior_diff_bernoulli */


/* ----------------------------------------------------------------------------
   Logprior_full
   
   Full log-prior for a given graph (specified by its adjacency matrix)
   Inputs:
     N : number of nodes
     adjacency : a pointer to the adjacency matrix
   This assumes that some globals have been set up:
     hyper_pB
     hyper_qB
   It also sets up useful globals and should be called at only at start, unless
   resetting is wanted.
*/
double Logprior_full (int N, int **adjacency)
{

  int i, j, diff;
  int **budding_adj; // temporary adjacency matrix

  double pr = 0;     // init prior log density
  double cumLD;      // sum of log degrees

  // Bernoulli on edges: always done
  for (i = 0; i < N; i++)
    for (j = 0; j < N; j++)
      if (adjacency[i][j] == 1)
        pr += log_hyper_pB[i][j]; // log(hyper_pB[i][j])
      else
        pr += log_hyper_qB[i][j]; // log(1 - hyper_pB[i][j])

  // now for the concordance prior
  if (bPriorConcordance)
    for (i = 0; i < N; i++)
      for (j = 0; j < N; j++) // count only the disagreements
        if (Logprior_diff_concordance(i, j, (adjacency[i][j] ? 1 : -1)) < 0)
          pr -= lambda_concord;

  // now for degrees
  if (bsave_the_degree_counts || (bPriorDegreeNode == TRUE)) {
    // set up the global table for the number of edges for each node
    current_degrees = InitiVector(N);
    for (i = 0; i < N; i++) {
      current_degrees[i] = 0;
    }

    // Compute the table for the number of edges for each node
    for (i = 0; i < N; i++) {
      for (j = 0; j < N; j++) // sum over the ith line
        current_degrees[i] += adjacency[i][j];

      for (j = 0; j < N; j++) // sum over the ith column
        if (j != i) // do not count the node itself twice
          current_degrees[i] += adjacency[j][i];
    }
  } // end bsave_the_degree

  if (bPriorDegreeNode == TRUE) {
    /* get the log-density of the current degrees under the
       power law */
    cumLD = 0; // cumulate the log degrees
    for (i = 0; i < N; i++) {
      if (current_degrees[i] != 0)
        cumLD += log(current_degrees[i]);
    }
    pr += -gamma_degree * cumLD;
  }

  // now for motifs
  if (bsave_the_motifs_probabilies || (bPriorMotif == TRUE)) {
    /* counters for the two loop types; global initialize !
       E loops are A->B->C->A
       F (frustrated) loops are A->B->C<-A
       they will be initalized in the following at the values for the
       current graph
       other types of motifs could be added */
    current_nEloops = 0;
    current_nFloops = 0;

    /* to count the loops we simply reconstruct the adjacency matrix given
       (starting from an empty matrix) and count the loops as they are
       being formed */
    budding_adj = InitiMatrix(N, N); // start empty
    for (i = 0; i < N; i++)
      for (j = 0; j < N; j++)
        budding_adj[i][j] = 0;

    // note: i is always parent of j

    diff = 1; /* be explicit: we are only looking at nodes linked to
                 each other */
    for (i = 0; i < N; i++) { // for each node
      for (j = 0; j < N; j++) { // for each potential child
        if (adjacency[i][j] == 1) { // skip the zeros...
          UpdateCountTriangles(budding_adj, i, j, diff,
                               &diff_nEloops, &diff_nFloops);
          budding_adj[i][j] = 1;
          current_nEloops += diff_nEloops;
          current_nFloops += diff_nFloops;
        } // end if
      } // end for j
    } // end for i
  } // end bsave_the_motifs_probabilies

  if (bPriorMotif == TRUE) {
    // compute prior component and store it globally 'cause it's expensive
    current_motif_prior = LnBB(current_nEloops,
                               current_nEloops + current_nFloops,
                               alpha_motif, beta_motif);
    pr += current_motif_prior;
  }

  return (pr);

} /* Logprior_full */


/* ----------------------------------------------------------------------------
   ReadScript_Bison

   Read the simulation settings from a script file. The syntax is defined
   using lex and yacc. Meaningful input is then checked and default values
   are specified.
*/
void ReadScript_Bison (char *const filename)
{
  int i, j;
  extern FILE *yyin;

  yyin = fopen(filename, "r");
  if (yyin) {
    printf("Reading from file %s.\n\n", filename);
  }
  else
    lexerr("no input file");

  // set non-zero default values for scalar predefined variables here
  lambda_concord = 1;
  gamma_degree   = 1;
  gamma_zellner  = 1;
  alpha_motif    = 1;
  beta_motif     = 1;
  iter           = 1000000000;

  // printf("starting script reading...\n");
  yyparse();

  fclose (yyin);

  // check nNodes value
  if (nNodes == 0)
    lexerr ("nNodes cannot be zero");

  if (nNodes > sqrt(INT_MAX))
    // if N too large you have to switch to longs
    lexerr (" nNodes too large for 'int' indexing");

  // check inconsistencies
  if (bBN && autocycle)
    lexerr ("autocycles are forbidden in a BN");

  if (bBN) {
    if (bPriorConcordance && edge_requirements)
      for (i = 0; i < nNodes; i++)
        if (edge_requirements[i][i] == 1)
          lexerr ("a concordance prior used with BNs cannot require an "
                   "edge on the diagonal");
          
    if (bPriorMotif)
      lexerr ("the motifs currently implemented in graph_sampler "
              "are incompatible with BNs");
  }

  bData = (nData > 0);

  if (bData && !pData)
    lexerr ("nData > 0 but data values are not provided");

  if (!bBN && !bDBN && bData)
    printf ("Warning: the data provided will not be used (bBN is 0).\n\n");

  // likelihoods cannot conflict
  if ((bZellner) && (bDirichlet))
    lexerr ("Zellner and Dirichlet scores are incompatible");

  // likelihoods cannot be computed without data
  if ((bZellner) && (!bData))
    lexerr ("Zellner score requires data");

  if (bDirichlet) {
    if (!bData)
      lexerr ("Dirichlet score requires data");

    if (!pDataLevels)
      lexerr ("Dirichlet score requires that data levels be specified");
    
    for (i = 0; i < nNodes; i++)
      for (j = 0; j < nNodes; j++)
        if (pData[i][j] != (int) pData[i][j])
          lexerr ("Dirichlet score requires integer data");
  }
  
  SetSeed(seed);

  bsave_some_graphs = (n_saved_adjacency > 0);
 
  // printf ("done reading script.\n");

} /* ReadScript_Bison */


/* ----------------------------------------------------------------------------
   SetPriorHyperParam

   Set the hyper parameters of the priors by looking at some general features 
   of the data.
*/
void SetPriorHyperParam (void)
{
  printf("SetPriorHyperParam to do...\n\n");
  /* for example the lambda should be commensurate or larger than the raw data
     variance . Actually check the precision story.
     The variance (precision) of the reg param should be commensurate or 
     larger with the range of values those reg params can take (???) etc */

} /* SetPriorHyperParam */


/* ----------------------------------------------------------------------------
   UndoDiff
   
   Undoes the change in global number of parents of node "child".
*/
void UndoDiff (int parent, int child, int diff)
{
  if (diff < 0) { 
    // just add parent that was removed by increasing the count of parents
    nParents[child] += 1;
  }
  else {
    // remove parent that was added
    nParents[child] -= 1;
  }
  
} /* UndoDiff */


/* ----------------------------------------------------------------------------
   UpdateBestGraph

   Update the motifs accounting tables.
*/
void UpdateBestGraph (void)
{
  int i;

  if (bsave_best_graph && (iter > nBurnin - 1)) {
    if (dBestPosterior < current_logposterior) {
      dBestPosterior  = current_logposterior;
      dBestPrior      = current_logprior;
      dBestLikelihood = current_loglikelihood;
      for (i = 0; i < nNodes; i++)
        memcpy(best_adj[i], current_adj[i], nNodes * sizeof(int));
    }
  }
  
} /* UpdateBestGraph */


/* ----------------------------------------------------------------------------
   UpdateEdgeP

   Just do that.
*/
void UpdateEdgeP (void)
{
  int i, j;

  if (iter == nBurnin)
    for (i = 0; i < nNodes; i++)
      for (j = 0; j < nNodes; j++)
        mat_sum[i][j] = current_adj[i][j];
  if (iter > nBurnin)
    for (i = 0; i < nNodes; i++)
      for (j = 0; j < nNodes; j++)
        mat_sum[i][j] += current_adj[i][j];

} /* UpdateEdgeP */


/* ----------------------------------------------------------------------------
*/
int main (int nArgs, char *const *rgszArg)
{
  BOOL   bEdge;
  
  double logp;
  int    diff_location;
  char   *szFileIn, *szPrefixOut;
  
  AnnounceProgram();

  GetCmdLineArgs (nArgs, rgszArg, &szFileIn, &szPrefixOut);

  ReadScript_Bison(szFileIn);
 
  InitArrays();

  InitOutputs(szPrefixOut);

  // compute the prior of the initial network, 
  // that initializes also book-keeping for fast computations of priors
  current_logprior = Logprior_full(nNodes, current_adj);

  if (bBN && bData) {
    // SetPriorHyperParam(); disabled
    current_loglikelihood = Loglikelihood_full(nNodes, pData);
  }
  else 
    current_loglikelihood = 0;

  current_logposterior = current_logprior + current_loglikelihood;
  
  if (isnan(current_logprior) || !isfinite(current_logprior))
    lexerr ("initial network has prior with null probability");

  dBestPrior      = current_logprior;
  dBestLikelihood = current_loglikelihood;
  dBestPosterior  = current_logposterior;

  /* -------------------
     The sampler is here
  */
  printf ("\nDoing %ld iterations.\n\n...\n\n", nRuns);

  // initialize parent and child node for systematic scan
  parent = -1;
  child  =  0;
  
  for (iter = 0; iter < nRuns; iter++) {

    logp = log(Randoms());

    label_Redo_it:

    // if BN sought but the proposed graph was not a DAG, come back here

    // to create a proposal graph take 2 nodes, scanning systematically
    if (parent < (nNodes-1)) {
      parent = parent + 1;
    }
    else {
      parent = 0;
      if (child < (nNodes-1))
        child = child + 1;
      else
        child = 0;
    }

    // in case of BN or no autocycle: skip the diagonal
    if ((!autocycle) && (parent == child)) { 
      if (parent < (nNodes-1)) {
        parent = parent + 1;
      }
      else {
        parent = 1;
        child = 0;
      }
    }

    // sample a move from the baseline Bernoulli prior
    bEdge = (Randoms() < hyper_pB[parent][child]); // 0 or 1
    if (bEdge == current_adj[parent][child]) {
      diff = 0;
    }
    else {
      if (bEdge == 1) {
        diff = 1;

        // for BNs we have to check that it's still a DAG
        if ((bBN) && (!Check_DAG_Edge(current_adj, parent, child)))
          goto label_Redo_it; // forget it completely

        // for Zellner likelihood there should not be more parents than data
        if ((bZellner) && (nParents[child] >= nData))
          goto label_Redo_it; // forget it completely
          
      }
      else {
        diff = -1; // removing an edge to a DAG gives a DAG
      }
    }

    // compute log-probabilities proposal - log-probabilities current
    if (diff != 0) {
      Logprior_diff(current_adj, parent, child, diff, &diff_logprior);

      if (bBN && bData)
        Loglikelihood_diff(parent, child, diff, pData, &diff_loglikelihood);

      diff_logposterior = diff_logprior + diff_loglikelihood;
    }
    else { // no change
      diff_logposterior = 0;
    }

    // check acceptation
    if ((diff_logposterior >= 0) || (logp < diff_logposterior)) { // accept

      UpdateDegrees_if_accept();
      UpdateMotifs_if_accept();

      /* find flattened location of the sampled edge;
         go down columns, to be compatible with R;
         must start from 1 to leave 0 as indicator of no change by convention;
         then signed by the difference between adjacency matrices */
      diff_location = (parent + child * nNodes + 1) * diff;

      // eventually write the location to output file
      if (bsave_the_chain)
        SaveChain(diff_location);

      /* now you can update the graph adjacency matrice,
         the parenthood of child has been already changed */
      if (diff != 0) {
        current_adj[parent][child] = !(current_adj[parent][child]);

        // eventually update the total prior
        if (bsave_best_graph || bsave_some_graphs) {
          current_logprior += diff_logprior +
                              Logprior_diff_bernoulli(parent, child, diff);
        }

        // eventually update likelihood and posterior
        if (bBN && bData) {
          current_ll_node[child] += diff_loglikelihood;

          if (bsave_best_graph || bsave_some_graphs) {
            current_loglikelihood +=  diff_loglikelihood;
            current_logposterior   = current_logprior + current_loglikelihood;
          }
        }

        // eventuall update the best graph
        if (bsave_best_graph)
          UpdateBestGraph();

      } // end diff != 0
    } // end of accept

    else { // reject

      // no differences between adjacency matrices: just write out zero
      if (bsave_the_chain)
        SaveChain(0);

      UpdateDegrees_if_reject();
      UpdateMotifs_if_reject();

      /* no update of adjacency is needed,
         but the change in parenthood of child needs to be undone */
      UndoDiff(parent, child, diff);
      
    } // end of reject

    // cumulate the adjacency matrices, i.e. cumulate edge counts
    if (bsave_the_edge_probabilies)
      UpdateEdgeP();

    // save eventually the graph
    SaveGraph();
    
  } // end iter
  // End of sampler

  // final results if asked for:

  // edge probability matrix
  SaveEdgeP(pEdgeFile);

  // best graph
  SaveBestGraph();

  // save cumulated degree counts
  SaveDegreeCounts();
  
  // motifs probabilities
  // SaveMotifsP(stdout); is a screen alternative
  SaveMotifsP(pMotifFile);

  CloseOutputs(szPrefixOut);
  
  CleanupMemory();

  printf ("Done.\n\n");

  return (0);

} /* end */
