/*  GNU Ocrad - Optical Character Recognition program
    Copyright (C) 2003, 2004 Antonio Diaz Diaz.

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

#include <cstdio>
#include <vector>
#include "common.h"
#include "rectangle.h"
#include "vrhomboid.h"
#include "track.h"
#include "block.h"
#include "character.h"
#include "textline.h"
#include "textblock.h"


// Build the vertical composite characters.
void Textblock::join_characters() throw()
  {
  for( int current_line = 0; current_line < lines(); ++current_line )
    {
    Textline & line1 = line( current_line );
    for( int i = 0 ; i < line1.characters() - 1; )
      {
      Character & c1 = line1.character( i );
      bool joined = false;
      for( int j = i + 1 ; j < line1.characters(); ++j )
        {
        Character & c2 = line1.character( j );
        if( !c1.h_overlaps( c2 ) ) continue;
        Character *cup, *cdn;
        if( c1.vcenter() >= c2.vcenter() ) cup = &c2, cdn = &c1;
        else cup = &c1, cdn = &c2;
        if( cdn->includes_hcenter( *cup ) || cup->includes_hcenter( *cdn ) ||
            ( cdn->top() > cup->bottom() && cdn->hcenter() < cup->hcenter() ) )
          {
          if( cdn == &c1 ) { c1.join( c2 ); line1.delete_character( j ); }
          else { c2.join( c1 ); line1.delete_character( i ); }
          joined = true; break;
          }
        }
      if( !joined ) ++i;
      }
    }
  }


Textblock::Textblock( const Rectangle & r, const Charset & charset,
           const std::vector< std::vector< Block > > & block_matrix ) throw()
  : Rectangle( r )
  {
  int cuts = block_matrix.size();
  int mean_height[cuts];
  std::vector< Block > pending[cuts], pending_tall[cuts], pending_short[cuts];

  // Classify blocks.
  for( int cut = 0; cut < cuts; ++cut )
    {
    const std::vector< Block > & block_vector = block_matrix[cut];
    if( !block_vector.size() ) continue;
    unsigned int samples = 0;
    std::vector< int > height_distrib;
    for( unsigned int i = 0; i < block_vector.size(); ++i )
      {
      unsigned int h = block_vector[i].height();
      unsigned int w = block_vector[i].width();
      if( h < 10 || w >= 3 * h ) continue;
      if( h >= height_distrib.size() ) height_distrib.resize( h + 1 );
      ++height_distrib[h]; ++samples;
      }
    if( !height_distrib.size() )
      for( unsigned int i = 0; i < block_vector.size(); ++i )
        {
        unsigned int h = block_vector[i].height();
        if( h >= height_distrib.size() ) height_distrib.resize( h + 1 );
        ++height_distrib[h]; ++samples;
        }
    mean_height[cut] = 0;
    int valid_samples = 0;
    for( unsigned int i = 0, count = 0; i < height_distrib.size(); ++i )
      {
      int a = height_distrib[i];
      if( 10 * ( count + a ) >= samples && 10 * count < 9 * samples )
        { mean_height[cut] += a * i; valid_samples += a; }
      count += a;
      }
    if( valid_samples ) mean_height[cut] /= valid_samples;

    for( unsigned int i = 0; i < block_vector.size(); ++i )
      {
      const Block & b = block_vector[i];
      if( b.height() >= 2 * mean_height[cut] )
        pending_tall[cut].push_back( b );
      else if( 2 * b.height() <= mean_height[cut] )
        pending_short[cut].push_back( b );
      else pending[cut].push_back( b );
      }
    }

  // Assign normal blocks to characters and create lines.
  int min_line = 0;	// first line of current cut
  add_line();
  for( int cut = 0; cut < cuts; ++cut )
    {
    if( pending[cut].size() )
      {
      if( line( lines() - 1 ).characters() ) add_line();
      int current_line = min_line = lines() - 1;
      line( current_line ).shift_character( Character( pending[cut][0] ) );
      for( unsigned int i = 1; i < pending[cut].size(); ++i )
        {
        Character c( pending[cut][i] );
        current_line = std::max( min_line, current_line - 2 );
        while( true )
          {
          const Character *cl = 0, *cr = 0;
          for( int j = line( current_line ).characters() - 1; j >= 0; --j )
            {
            const Character & cj = line( current_line ).character( j );
            if( !c.includes_hcenter( cj ) && !cj.includes_hcenter( c ) )
              { if( c.h_precedes( cj ) ) cr = &cj; else { cl = &cj; break; } }
            }
          if( ( cl && ( cl->includes_vcenter( c ) || c.includes_vcenter( *cl ) ) ) ||
              ( cr && ( cr->includes_vcenter( c ) || c.includes_vcenter( *cr ) ) ) )
            { line( current_line ).shift_character( c ); break; }
          else if( ( cl && cl->bottom() < c.top() ) || ( cr && cr->bottom() < c.top() ) )
            {
            if( ++current_line >= lines() )
              { add_line(); current_line = lines() - 1;
              line( current_line ).shift_character( c ); break; }
            }
          else if( ( cl && cl->top() > c.bottom() ) || ( cr && cr->top() > c.bottom() ) )
            {
            insert_line( current_line );
            line( current_line ).shift_character( c ); break;
            }
          else if( ( cl && cl->v_overlaps( c ) ) || ( cr && cr->v_overlaps( c ) ) )
            { line( current_line ).shift_character( c ); break; }
          else break;
          }
        }
      }
    }

  join_characters();

  // Create tracks of lines.
  for( int i = 0; i < lines(); ++i ) line( i ).initialize_track();

  // Insert tall blocks.
  // Seek up, then seek down, needed for slanted or curved lines.
  for( int current_line = 0, cut = 0; cut < cuts; ++cut )
    {
    for( unsigned int i = 0; i < pending_tall[cut].size(); ++i )
      {
      Character c( pending_tall[cut][i] );
      while( current_line > 0 &&
             c.bottom() < line( current_line ).vcenter( c.hcenter() ) )
        --current_line;
      while( current_line < lines() &&
             c.top() > line( current_line ).vcenter( c.hcenter() ) )
        ++current_line;
      if( current_line >= lines() ) { --current_line; continue; }
      if( c.height() > 2 * line( current_line ).mean_height() )
        { if( c.hcenter() < line( current_line ).character(0).left() )
            line( current_line ).big_initial( c ); }
      else line( current_line ).shift_character( c );
      }
    }

  // Insert short blocks.
  // Seek up, then seek down, needed for slanted or curved lines.
  for( int current_line = 0, cut = 0; cut < cuts; ++cut )
    {
    for( unsigned int i = 0; i < pending_short[cut].size(); ++i )
      {
      Character c( pending_short[cut][i] );
      while( current_line > 0 &&
             c.bottom() < line( current_line ).top( c.hcenter() ) )
        --current_line;
      int temp = current_line;
      while( current_line < lines() &&
             c.top() > line( current_line ).bottom( c.hcenter() ) )
        ++current_line;
      if( current_line >= lines() )
        {
        const Textline & l = line( --current_line );
        if( c.top() > l.bottom( c.hcenter() ) + l.height() ) continue;
        else temp = current_line;
        }
      if( current_line - temp > 1 ) temp = current_line - 1;
      if( current_line != temp &&
          2 * ( c.top() - line( temp ).bottom( c.hcenter() ) ) <
          line( current_line ).top( c.hcenter() ) - c.bottom() )
        current_line = temp;
      line( current_line ).shift_character( c );
      }
    }

  // Second pass. Join lines of i-dots and tildes.
  for( int current_line = 0; current_line < lines() - 1; )
    {
    bool joined = false;
    Textline & line1 = line( current_line );
    Textline & line2 = line( current_line + 1 );
    if( line1.characters() <= 2 * line2.characters() &&
        2 * line1.mean_height() < line2.mean_height() )
      for( int i1 = 0; !joined && i1 < line1.characters(); ++i1 )
        {
        Character & c1 = line1.character( i1 );
        if( c1.height() * 2 >= line2.mean_height() ) continue;
        for( int i2 = 0; !joined && i2 < line2.characters(); ++i2 )
          {
          Character & c2 = line2.character( i2 );
          if( c2.right() < c1.left() ) continue;
          if( c2.left() > c1.right() ) break;
          if( ( c2.includes_hcenter( c1 ) || c1.includes_hcenter( c2 ) )
              && c2.top() - c1.bottom() < line2.mean_height() )
            {
            joined = true; line2.join( line1 ); delete_line( current_line );
            }
          }
        }
    if( !joined ) ++current_line;
    }

  join_characters();

  // Fourth pass. Add spaces between characters.
  for( int current_line = 0; current_line < lines(); ++current_line )
    {
    Textline & line1 = line( current_line );
    int mw = line1.mean_width();
    int mg = line1.mean_gap_width();
    if( mw < 2 || mg < 1 ) continue;
    for( int i = 1 ; i < line1.characters(); ++i )
      {
      Character & c1 = line1.character( i - 1 );
      Character & c2 = line1.character( i );
      int dist = ( c2.left() > c1.right() ) ? c2.left() - c1.right() - 1 : 0;
      if( dist >= mw || ( (3 * dist) > mw && dist > (mg * 2) ) )
        if( line1.insert_space( i ) ) ++i;
      }
    }

  // Fifth pass. Add blank lines.
  if( lines() >= 3 )
    {
    int mean_vdistance = (line(lines()-1).mean_vcenter() - line(0).mean_vcenter()) / (lines() - 1);
    if( mean_vdistance > 0 )
      for( int current_line = 0; current_line + 1 < lines(); ++current_line )
        {
        Textline & line1 = line( current_line );
        Textline & line2 = line( current_line + 1 );
        int vdistance = line2.mean_vcenter() - line1.mean_vcenter();
        if( vdistance <= 0 ) continue;
        int newlines = (vdistance + (mean_vdistance / 2)) / mean_vdistance;
        for( int i = 1; i < newlines; ++i ) insert_line( ++current_line );
        }
    }

  for( int i = 0; i < lines(); ++i )
    {
    // First pass. Recognize the easy characters.
    line(i).recognize1( charset );
    // Second pass. Use context to clear up ambiguities.
    line(i).recognize2( charset );
    }
  }


int Textblock::characters() const throw()
  {
  int total = 0;
  for( int i = 0; i < lines(); ++i ) total += line(i).characters();
  return total;
  }


void Textblock::insert_line( int i ) throw()
  {
  if( i < 0 || i > lines() )
    Ocrad::internal_error( "insert_line, index out of bounds" );
  data.insert( data.begin() + i, Textline() );
  }


void Textblock::delete_line( int i ) throw()
  {
  if( i < 0 || i >= lines() )
    Ocrad::internal_error( "delete_line, index out of bounds" );
  data.erase( data.begin() + i );
  }


Textline & Textblock::line( int i ) const throw()
  {
  if( i < 0 || i >= lines() )
    Ocrad::internal_error( "line, index out of bounds" );
  return data[i];
  }


void Textblock::print( const Control & control ) const throw()
  {
  for( int i = 0; i < lines(); ++i ) line(i).print( control );
  std::fputs( "\n", control.outfile );
  }


void Textblock::dprint( const Control & control, bool graph, bool recursive )
								const throw()
  {
  std::fprintf( control.outfile, "%d lines\n\n", lines() );

  for( int i = 0; i < lines(); ++i )
    {
    std::fprintf( control.outfile, "%d characters in line %d\n",
                  line(i).characters(), i+1 );
    line(i).dprint( control, graph, recursive );
    }
  std::fputs( "\n", control.outfile );
  }


void Textblock::xprint( const Control & control ) const throw()
  {
  std::fprintf( control.exportfile, "lines %d\n", lines() );

  for( int i = 0; i < lines(); ++i )
    {
    std::fprintf( control.exportfile, "line %d chars %d height %d\n", i + 1,
                  line(i).characters(), line(i).mean_height() );
    line(i).xprint( control );
    }
  }
