#include <map>
#include <vector>
#include <fstream>
#include <cstdlib>
#include <getopt.h>
#include "sentence.hpp"
#include "models.hpp"
#include "unlabeled_edge_feature_extractor_interface.hpp"
#include "parser_learner_interface.hpp"
#include "eda_unlabeled_edge_feature_extractor.hpp"
// #include "liblbfgs_parser_learner.hpp"
#include "lbfgs_hpp_parser_learner.hpp"
#include "log_linear_sgd_parser_learner.hpp"
#include "simple_edge_label_feature_extractor.hpp"

using namespace std;
using namespace maeda;

void PrintUsageAndDie(ostream &out) {
  out << "Usage: train-eda [Options]" << endl;
  out << "Options:" << endl;
  out << "  -i, --in-format FORMAT   input format of training data(eda/conll)" << endl;
  out << "  -m, --model FILE         output file for a model" << endl;
  out << "  -c, --corpus FILE        training corpus" << endl;
  out << "  -a  ALGORITHM            learning algorithm" << endl;
  out << "      --cutoff INTEGER     feature cut-off parameter" << endl;
  out << "      --labeling           build a model for labeling" << endl;
  out << "      --left-to-right      All dependencies go from left to right" << endl;
  out << "Available learning algorithms" << endl;
  out << "  llvm: variable metric method for log-linear models" << endl;
  out << "  llsgd: stochastic gradient descent method for log-linear models" << endl;
  exit(1);
}

enum Algorithm {
  ALG_LLVM, ALG_LLSGD,
};

int main(int argc, char *argv[]) {
  string model_file = "tmp.model";

  int cutoff = 2;   // åȥ

  vector<string> training_corpora;   // ؽѥ

  StreamReader reader = ReadFromEdaDependencyStream;

  bool labeling_p = false;

  bool left_to_right = false;

  Algorithm algorithm = ALG_LLVM;

  while (1) {
    enum Opt {
      OPT_IN_FORMAT = 1,  OPT_MODEL, OPT_CORPUS, OPT_CUTOFF,
      OPT_LABELING, OPT_LEFT_TO_RIGHT,
    };
    static struct option longopts [] = {
      {"in-format"        , required_argument, 0, OPT_IN_FORMAT},
      {"model"            , required_argument, 0, OPT_MODEL},
      {"corpus"           , required_argument, 0, OPT_CORPUS},
      {"cutoff"           , required_argument, 0, OPT_CUTOFF},
      {"labeling"         , no_argument      , 0, OPT_LABELING},
      {"left-to-right"    , no_argument      , 0, OPT_LEFT_TO_RIGHT},
      {0, 0, 0, 0},
    };
    const char *shortopts = "i:m:c:a:";

    int option_index = 0;

    int c = getopt_long_only(argc, argv, shortopts, longopts, &option_index);

    if (c == -1) {
      break;
    } else {
      switch (c) {
      case 'a':
        if (string(optarg) == "llvm") {
          algorithm = ALG_LLVM;
        } else if (string(optarg) == "llsgd") {
          algorithm = ALG_LLSGD;
        } else {
          cerr << "Error: no such algorithm: " << optarg << endl;
          exit(1);
        }
        break;
      case 'i':
      case OPT_IN_FORMAT:
        if (string(optarg) == "eda") {
          reader = ReadFromEdaDependencyStream;
        } else if (string(optarg) == "conll") {
          reader = ReadFromCoNLLDependencyStream;
        } else {
          cerr << "Error: invalid format: " << optarg << endl;
          exit(1);
        }
        break;
      case 'm':
      case OPT_MODEL:
        if (optarg == NULL) {
          cerr << "Error: model filename expected" << endl;
          exit(1);
        } else {
          model_file = string(optarg);
        }
        break;
      case 'c':
      case OPT_CORPUS:
        if (optarg == NULL) {
          cerr << "Error: corpus filename expected" << endl;
          exit(1);
        } else {
          training_corpora.push_back(optarg);
        }
        break;
      case OPT_CUTOFF:
        if (optarg == NULL) {
          cerr << "Error: cutoff value expected" << endl;
          exit(1);
        } else {
          cutoff = atoi(optarg);
        }
        break;
      case OPT_LABELING:
        labeling_p = true;
        break;
      case OPT_LEFT_TO_RIGHT:
        left_to_right = true;
        break;
      default:
        PrintUsageAndDie(cout);
      }
    }
  }

  if (training_corpora.size() == 0) {
    PrintUsageAndDie(cout);
  }

  // ؽѥʸ
  std::vector<Sentence> sentence_list;

  for (vector<string>::const_iterator it = training_corpora.begin();
       it != training_corpora.end(); ++it) {
    ifstream ifs(it->c_str());
    if (ifs) {
      cerr << "Reading: " << *it << endl;
      while (ifs) {
        Sentence sentence;
        if (reader(ifs, &sentence)) {
          sentence_list.push_back(sentence);
        }
      }
    } else {
      cerr << "Warning: Could not open " << *it << ", skipped." << endl;
    }
  }


  if (labeling_p) {
    EdgeLabelModel model(new SimpleEdgeLabelFeatureExtractor);
    cerr << "Start training." << endl;
    if (model.Train(sentence_list)) {
      model.Save(model_file);
    }
  } else {
    cerr << "Start training." << endl;
    cerr << "Cutoff: " << cutoff << endl;
    cerr << "Algorithm: ";
    switch (algorithm) {
    case ALG_LLVM:  cerr << "llvm"  << endl; break;
    case ALG_LLSGD: cerr << "llsgd" << endl; break;
    }

    // SentenceʸФ饹
    UnlabeledEdgeFeatureExtractorInterface *extractor =
        new EdaUnlabeledEdgeFeatureExtractor;

    // ؽ르ꥺ
    ParserLearnerInterface *learner = NULL;
    switch (algorithm) {
    case ALG_LLVM:
      //learner = new LibLBFGSParserLearner(double(1.0));
      learner = new LBFGS_HPP_ParserLearner(double(1.0));
      break;
    case ALG_LLSGD:
      learner = new LogLinearSGDParserLearner();
      break;
    }

    UnlabeledParseModel model(extractor, left_to_right);

    if (model.Train(sentence_list, learner, cutoff)) {
      model.Save(model_file);
      //model.SaveFeatureVectors("tmp.feateure_vector");
    }
  }

  return 0;
}
