///////////////////////////////////////////////////////////////////////////// 
/* 
  Copyright 2001 Ronald S. Burkey 
 
  This file is part of GutenMark. 
 
  GutenMark is free software; you can redistribute it and/or modify 
  it under the terms of the GNU General Public License as published by 
  the Free Software Foundation; either version 2 of the License, or 
  (at your option) any later version. 
 
  GutenMark is distributed in the hope that it will be useful, 
  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
  GNU General Public License for more details. 
 
  You should have received a copy of the GNU General Public License 
  along with GutenMark; if not, write to the Free Software 
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 
  Filename:	MarkByLineHeuristic.c 
  Purpose:	This handles just things high-level aspects of the line-level
  		analysis, using data provided from LineAnalysisPass.  In other
		words, it does things like detect headings, paragraph starts,
		paragraph ends, verse, etc., and supplies the appropriate
		markup. 
  Mods:		12/30/01 RSB	Split off from MarkBody.c, where it was
  				formerly a large chunk of the MarkBody function. 
				
  This function is (functionally) equivalent to the MarkByLineNeural function.
  It embodies the original approach I used in GutenMark.  However, while it 
  doesn't do a BAD job, in many ways, I soon came to realize that a different
  approach based on neural nets might do a better job.  Therefore, there are
  two functionally equivalent functions, though hopefully not identical in
  performance, for the same job.  Which one is actually used depends on
  command-line switches.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "AutoMark.h"

//---------------------------------------------------------------------- 
// After a certain number of these headings match the ones found in the 
// prefatory area, we no longer allow any new headings that DON'T match 
// unless they are pretty convincing in some other way. 
static int
NowMatching (AnalysisDataset * Dataset, char *s, LineRecord * Line)
{
  int InHeader1 = 1, j;
  j = MatchesPrefatoryLine (Dataset, s);
  if (Dataset->NumPrefatoryMatched > PREFATORY_MATCH_CEILING)
    {
      if (!j && !MatchSpecialWords (Dataset, s, Line) && !Line->AllCaps
	  && !Line->ReallyShort)
	InHeader1 = 0;
    }
  else
    {
      if (j)
	(Dataset->NumPrefatoryMatched)++;
    }
  return (InHeader1);
}

//-------------------------------------------------------------------- 
// Detect a separating line, such as a line of all '*' or '-'. 
static int
IsSeparatorLine (char *s, LineRecord * NextLine)
{
  char *ss;
  if (NextLine->Empty)
    {

      // First, check for all stars. 
      for (ss = s; *ss; ss++)
	if (!isspace (*ss) && *ss != '*')
	  break;
      if (*ss == 0)
	return (1);

      // Now, check for all dashes. 
      for (ss = s; *ss; ss++)
	if (!isspace (*ss) && *ss != '-')
	  break;
      if (*ss == 0)
	return (1);

      // ... and underlines. 
      for (ss = s; *ss; ss++)
	if (!isspace (*ss) && *ss != '_')
	  break;
      if (*ss == 0)
	return (1);

      // .. and dots.  . 
      for (ss = s; *ss; ss++)
	if (!isspace (*ss) && *ss != '.')
	  break;
      if (*ss == 0)
	return (1);
    }
  return (0);
}

//--------------------------------------------------------------------- 
// As complex as this function is, at base it's trying to do something
// pretty simple:  Just to locate headings, starts and ends of paragraphs,
// versified areas, etc.
// Returns:
//      0       Success
//      5       Disk error
//      -1      At end of file 

int
MarkByLineHeuristic (AnalysisDataset * Dataset, MarkStatus * Status,
		     int LineNum, char *s)
{
  char *ss;
  int j, k, n;

  Status->FirstWordArea = Status->LastWasHeader1;	// 12/09/01 RSB.        
  Status->InPreface = (LineNum < Dataset->LowestNonPrefatoryLine);
  if (Status->LineInfo[0].Empty)	// 12/09/01 RSB
    Status->SentenceStart = 1;	// 12/09/01 RSB

  // Analyze.  In case it isn't obvious, Status->LineInfo[0] contains the 
  // LineRecord for the current line.  Status->LineInfo[-1], ...,  
  // Status->LineInfo[-PRE_OR_POST_LINES] are for the prior lines.   
  // Status->LineInfo[1], ..., Status->LineInfo[PRE_OR_POST_LINES] 
  // are for the succeeding lines. 
  // Locate PG file-ender.  We define this as any line within 
  // 500 characters of the file-end that contains the words 
  // "end", and "project gutenberg" or "PG". 
  if (ftell (Dataset->InputFile) > Dataset->FileEnderRegion)
    {
      int End = 0, PG = 0;
      for (ss = s; *ss; ss++)
	{
	  if (!strncasecmp (ss, "end ", 4))
	    {
	      End = 1;
	      if (PG)
		break;
	    }
	  else if (!strncasecmp (ss, " PG ", 4)
		   || !strncasecmp (ss, "project gutenberg", 17))
	    {
	      PG = 1;
	      if (End)
		break;
	    }
	}
      if (End && PG)
	goto AtEnd;
    }

  // *** Do stuff pertaining to the line properties, such as  
  // paragraph begin/end, headers, and verse *** 
  if (Status->InBlockquote)
    {
      if (Status->LineInfo[0].Empty || !Status->LineInfo[0].BeginsWhite)
	{
	  Status->InBlockquote = 0;
	  Status->BlockIndentation = 0;
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkEndBlockquote, 0))
	    goto DiskError;
	}
      // 12/16/01 RSB.  The following conditionals handle the first or
      // last lines of block quotes, if indented differently.
      else if (!Status->LineInfo[-1].Empty
	       && Status->LineInfo[-1].Leading > Status->BlockIndentation)
	{
	  if (AddMarkup (Dataset, Status->LastFirstSpace, MarkBreak, 0))
	    goto DiskError;
	}
      else if (Status->LineInfo[0].Leading > Status->BlockIndentation
	       && !Status->LineInfo[-1].Empty)
	{
	  if (AddMarkup (Dataset, Status->LastFirstSpace, MarkBreak, 0))
	    goto DiskError;
	  for (j = Status->BlockIndentation; j < Status->LineInfo[0].Leading;
	       j++)
	    if (AddMarkup (Dataset, Status->LineInfo[0].Offset, MarkNbsp, 0))
	      goto DiskError;
	}
    }
  if (Status->InSubtitle)
    {
      if (Status->LineInfo[0].Empty)
	{
	  Status->InSubtitle = 0;
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkEndSubtitle, 0))
	    goto DiskError;
	}
    }
  if (Status->InTable)
    {
      if (Status->LineInfo[0].Empty)
	{
	  Status->InTable = 0;
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkEndTable, 0))
	    goto DiskError;
	}
    }
  if (Status->InParagraph)
    {
      if (Status->LineInfo[0].Empty /* || Status->LineInfo[0].BeginsWhite */ )
	{

	  // End the paragraph.  For safety's sake, assume that  
	  // italicizing doesn't cross paragraphs, if it's of the  
	  // delimited kind.  An exception for the delimited kind  
	  // is if <I> was used.  We assume in that case 
	  // that the user had enough on the ball to get it right. 
	  if (Status->Italicizing != 0 && Status->Italicizing != 'A'
	      && Status->Italicizing != 'H')
	    {
	      Status->Italicizing = 0;
	      if (AddMarkup
		  (Dataset,
		   Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
		   MarkEndItalics, 0))
		goto DiskError;
	    }
	  Status->InParagraph = 0;
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkEndParagraph, 0))
	    goto DiskError;
	}
      else if (Status->Versifying)
	{
	  if (Status->LineInfo[0].VerseCap
	      || !Status->LineInfo[0].BeginsWhite
	      || (!Status->LineInfo[0].VerseCap
		  && Status->LineInfo[0].Leading > 1))
	    {
	      if (AddMarkup
		  (Dataset,
		   Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
		   MarkBreak, 0))
		goto DiskError;
	      for (j = Status->BlockIndentation;
		   j < Status->LineInfo[0].Leading; j++)
		if (AddMarkup
		    (Dataset, Status->LineInfo[0].Offset, MarkNbsp, 0))
		  goto DiskError;
	    }
	}
      else if (Status->LineInfo[-1].Leading > 1
	       || Status->LineInfo[0].Leading > 1
	       || (Status->LineInfo[-1].Leading == 1
		   && Status->LineInfo[0].Leading == 1)
	       || (Status->LineInfo[0].Leading == 1
		   && !Status->LineInfo[-1].Empty
		   && Status->LineInfo[-2].Leading == 1)
	       || (Status->LineInfo[0].Leading == 1
		   && !Status->LineInfo[1].Empty
		   && Status->LineInfo[2].Leading == 1)
	       || (Status->LineInfo[-1].Leading == 1
		   && !Status->LineInfo[-2].Empty
		   && Status->LineInfo[-3].Leading == 1)
	       || (Status->LineInfo[-1].Leading == 1
		   && !Status->LineInfo[0].Empty
		   && Status->LineInfo[1].Leading == 1))
	{

	  // The point of this to to detect intentionally indented lines 
	  // in the middle of a paragraph.  But we assume that a single  
	  // line with a single space at the front -- isolated, without 
	  // other such lines in the vicinity -- is a simple mistake. 
	  // However, some verse has every OTHER line indented, so we  
	  // try to detect that. 
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkBreak, 0))
	    goto DiskError;
	  if (!Status->Centering)
	    {
	      for (j = Status->BlockIndentation;
		   j < Status->LineInfo[0].Leading; j++)
		if (AddMarkup
		    (Dataset, Status->LineInfo[0].Offset, MarkNbsp, 0))
		  goto DiskError;
	    }
	}
      else if (LineNum < Dataset->LowestNonPrefatoryLine)
	{
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkBreak, 0))
	    goto DiskError;
	}
      else if (!Status->LineInfo[-1].Empty && Status->LineInfo[-1].Short
	       && (Status->LineInfo[-2].Empty || Status->LineInfo[-2].Short)
	       && (Status->LineInfo[0].Short
		   || (Status->LineInfo[1].Empty
		       || Status->LineInfo[1].Short))
	       && Status->ParagraphType != MarkBeginJustifiedParagraph)
	{
	  if (AddMarkup
	      (Dataset,
	       Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	       MarkBreak, 0))
	    goto DiskError;
	}
      else if (Status->LineInfo[0].VerseCap)
	{

	  // Hopefully, what with the "Status->Versifying" variable (which  
	  // didn't exist previously), this code shouldn't be needed.   
	  // But heck, it was here!  What we do here is to try and  
	  // detect a line of verse which is nevetheless longer than  
	  // the arbitrary limit of Status->LineInfo[0].Short.  (This is  
	  // actually a pretty rare condition, but heck!) 
	  // We do this by noting that lines of verse are generally  
	  // initially capitalized, so we look for a whole block of  
	  // lines that meet this criterion.  
	  k = 1;
	  for (j = -1; j >= -PRE_OR_POST_LINES; j--)
	    if (Status->LineInfo[j].VerseCap)
	      k++;
	    else if (!Status->LineInfo[j].Empty)
	      break;
	  if (j >= -PRE_OR_POST_LINES && !Status->LineInfo[j].Empty)
	    k = -1000;
	  for (j = 1; j <= PRE_OR_POST_LINES; j++)
	    if (Status->LineInfo[j].VerseCap)
	      k++;
	    else if (!Status->LineInfo[j].Empty)
	      break;
	  if (j <= PRE_OR_POST_LINES && !Status->LineInfo[j].Empty)
	    k = -1000;
	  if (k >= 4)
	    {
	      if (AddMarkup
		  (Dataset,
		   Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
		   MarkBreak, 0))
		goto DiskError;
	    }
	}
    }
  if (Status->InHeader1 && Status->LineInfo[-1].Empty)
    {
      Status->InHeader1 = 0;
      Status->LastWasHeader1 = 1;
      if (AddMarkup
	  (Dataset, Status->LastFirstSpace /* Status->LineInfo[0].Offset */ ,
	   MarkEndHeader1, 0))
	goto DiskError;
    }
  if (!Status->InParagraph && !Status->InBlockquote && !Status->InHeader1
      && !Status->InSubtitle && !Status->InTable
      && !Status->LineInfo[0].Empty)
    {

      // Status->LastVersifying is a variable that grows slowly while versifying,  
      // but decays quickly when not.  It allows a sort of persistence of 
      // detecting verse when intervening stuff is thrown in (like 
      // footnotes, dividers "****", and so on). 
      if (Status->Versifying)
	Status->LastVersifying++;
      else if (Status->LastWasHeader1)
	Status->LastVersifying--;
      else
	Status->LastVersifying /= 2;
      Status->Versifying = 0;
      // If we find stuff we know is going to be uninterpretable, go
      // into pre-formatted mode.
      if (Status->LineInfo[0].WeirdSequences
	  || Status->LineInfo[1].WeirdSequences)
	{
	  Status->InTable = 1;
	  if (AddMarkup
	      (Dataset, Status->LineInfo[0].Offset, MarkBeginTable, 0))
	    goto DiskError;
	  goto ParagraphBegun;
	}
      if (Status->LineInfo[0].BeginsWhite)
	{

	  // If the line began with whitespace, we never count it as a  
	  // potential heading, unless it matches certain string patterns. 
	  j = Status->LastWasHeader1;
	  if (LineNum > Dataset->LowestNonPrefatoryLine)
	    if (MatchesPrefatoryLine (Dataset, s)
		|| MatchSpecialWords (Dataset, s, &Status->LineInfo[0]))
	      {
		if (IsHeader
		    (Status->BufferedLines, &Status->LastWasHeader1, LineNum,
		     Dataset, s))
		  {
		    if (NowMatching (Dataset, s, &Status->LineInfo[0]))
		      goto StartHeader1;
		  }
		else if (j)
		  {
		    j = 0;
		    if (MatchesPrefatoryLine (Dataset, s)
			|| IsHeader (Status->BufferedLines, &j, LineNum,
				     Dataset, s))
		      {
		      BeginSubtitle:Status->InSubtitle =
			  1;
			for (ss = s; isspace (*ss); ss++);
			if (AddMarkup
			    (Dataset, Status->LineInfo[0].Offset + ss - s,
			     MarkBeginSubtitle, 0))
			  goto DiskError;
			goto ParagraphBegun;
		      }
		  }
	      }

	  // Here we try to detect block quotes.  We do this by  
	  // determining that all of the lines are indented the  
	  // same amount, and that they don't begin all capitalized. 
	  k = Status->LineInfo[0].Leading;
	  if (Status->LineInfo[0].BeginsQuote
	      && Status->LineInfo[1].Leading == k + 1)
	    k++;
	  n = Status->LineInfo[0].VerseCap;
	  for (j = 1; j <= PRE_OR_POST_LINES; j++)
	    {
	      if (Status->LineInfo[j].Empty)
		break;
	      if (Status->LineInfo[j].Leading != k)
		break;
	      n += Status->LineInfo[j].VerseCap;
	    }
	  if ((j > PRE_OR_POST_LINES || Status->LineInfo[j].Empty) && j > 3
	      && n != j)
	    {
	      Status->InBlockquote = 1;
	      Status->BlockIndentation = k;
	      if (AddMarkup
		  (Dataset, Status->LineInfo[0].Offset, MarkBlockquote, 0))
		goto DiskError;
	      goto ParagraphBegun;
	    }

	  // Well, not recognized as a block quote.  
	  Status->Type = MarkBeginRaggedParagraph;
	StartParagraph:if (!Status->Versifying)
	    {

	      // Is it the start of a table?  We'd LIKE to detect  
	      // these simply as paragraphs all of whose lines are  
	      // "PossibleTable" data. However, it's possible that the  
	      // column-headers might occupy multiple rows, and if one 
	      // of the column-headers is one or more lines longer than  
	      // the others, it can't be recognized as tabular data.   
	      // So we have to count these specially.  For tables  
	      // containing blank lines -- well, we're just out of  
	      // luck there. 
	      for (j = 0, k = -1; j <= PRE_OR_POST_LINES; j++)
		if (Status->LineInfo[j].Empty)
		  break;
		else if (!Status->LineInfo[j].PossibleTable)
		  {
		    if (k == j - 1)
		      k++;
		    else
		      break;
		  }
	      if (j > k + 2
		  && (j > PRE_OR_POST_LINES || Status->LineInfo[j].Empty))
		{
		  Status->InTable = 1;
		  if (AddMarkup
		      (Dataset, Status->LineInfo[0].Offset, MarkBeginTable,
		       0))
		    goto DiskError;
		  goto ParagraphBegun;
		}
	    }

	  // No, just a regular paragraph.   
	  Status->InParagraph = 1;

	  // Try to detect centered text.  There are two conditions we 
	  // apply.  First, at least one line in the current contiguous 
	  // group is ragged.  Second, we count the number of ragged lines 
	  // in the vicinity, and determine that the count is greater than 
	  // a certain number.  An exception to the latter is if every 
	  // line in the contiguous group is ragged.  Lastly, if the  
	  // paragraph contains a single line, the RaggedStart criterion  
	  // is too limiting, so in that case we look for any white space.   
	  // All of this applies only if the sole line that's indented is  
	  // NOT the first line of the paragraph.  Oh, and don't center  
	  // verse: 
	  if (Status->Versifying)
	    {
	      Status->Type = MarkBeginRaggedParagraph;
	      goto TypeKnown;
	    }

	  // Here we detect a simple indented paragraph -- we think! 
	  if (Status->LineInfo[0].BeginsWhite && !Status->LineInfo[1].Empty)
	    {
	      for (j = 1; j <= PRE_OR_POST_LINES; j++)
		if (Status->LineInfo[j].Empty
		    || Status->LineInfo[j].BeginsWhite)
		  break;
	      if (j > PRE_OR_POST_LINES || Status->LineInfo[j].Empty)
		{

		  // Okay, it's just an indented paragraph.   
		  // Go to the regular 
		  // analysis. 
		  Status->LineInfo[0].BeginsWhite = 0;
		  Status->LineInfo[0].Leading = 0;
		  goto RegularParagraph;
		}
	    }

	  // Here we look for a line whose purpose is simply to  
	  // separate blocks of text. 
	  if (IsSeparatorLine (s, &Status->LineInfo[1]))
	    {
	      Status->Type = MarkBeginCenteredParagraph;
	      goto TypeKnown;
	    }

	  // Here we look for a single contiguous group of lines  
	  // that is ragged.     
	  for (k = j = 0; j < PRE_OR_POST_LINES; j++)
	    {
	      if (Status->LineInfo[j].Empty)
		break;
	      if (Status->LineInfo[j].RaggedStart
		  || (j == 0 && Status->LineInfo[j].BeginsWhite))
		k++;
	    }
	  if (k == j)		// Here, EVERY line was ragged. 
	    {

	      // One problem is that the set of contiguous lines  
	      // might have been really short -- maybe just one line.   
	      // Maybe it was just an indented paragraph, if it's only  
	      // one line.  In that case, we check the vicinity for  
	      // additional ragged lines, unless the indentation is  
	      // really big! 
	      if (j == 1)
		{
		  if (Status->LineInfo[0].Leading > 10)
		    Status->Type = MarkBeginCenteredParagraph;
		  else
		    goto MoreRaggedChecking;
		}
	      else
		Status->Type = MarkBeginCenteredParagraph;
	    }
	  else if (k > 0)	// Here, just SOME lines were ragged. 
	    {
	    MoreRaggedChecking:

	      // We check additional lines from the vicinity, but  
	      // possibly outside the contiguous group, to see if  
	      // they're ragged too. 
	      for (k = 0, j = -PRE_OR_POST_LINES; j <= PRE_OR_POST_LINES; j++)
		{
		  if (Status->LineInfo[j].RaggedStart
		      || (j == 0 && Status->LineInfo[j].BeginsWhite))
		    k++;
		}
	      if (k >= 2)
		Status->Type = MarkBeginCenteredParagraph;
	    }
	TypeKnown:
	  // 12/16/01 RSB.  The label above implies that the type 
	  // of paragraph we want to begin is now known.  But I 
	  // discovered a bug in which if you have a block quote
	  // (i.e., all lines indented by the same amount) EXCEPT
	  // with the first/last lines indented differently, then
	  // it would be erroneously treated as a centered region.
	  // We therefore have to intercept this case.
	  if (Status->Type == MarkBeginCenteredParagraph)
	    {
	      // Count the number of lines in this block.
	      for (j = 0; j <= PRE_OR_POST_LINES; j++)
		if (Status->LineInfo[j].Empty)
		  break;
	      // We need a certain number of lines to work with.
	      if (j >= 4)
		{
		  int NewIndentation;
		  NewIndentation = Status->LineInfo[2].Leading;
		  // Treat the first two lines and last line differently.
		  // If they meet the criteria, then check the intervening
		  // lines
		  if ((Status->LineInfo[0].Leading >= NewIndentation
		       || (Status->LineInfo[0].BeginsQuote
			   && Status->LineInfo[0].Leading ==
			   NewIndentation - 1))
		      && Status->LineInfo[j - 1].Leading >= NewIndentation
		      && (Status->LineInfo[1].Leading == NewIndentation
			  || (Status->LineInfo[1].BeginsQuote
			      && Status->LineInfo[1].Leading ==
			      NewIndentation - 1)))
		    {
		      for (k = 3; k < j - 1; k++)
			if (Status->LineInfo[k].Leading != NewIndentation)
			  break;
		      // Finally, meets all of the criteria for a 
		      // block quote!    
		      if (k == j - 1)
			{
			  Status->BlockIndentation = NewIndentation;
			  Status->Type = MarkBlockquote;
			  Status->ParagraphType = MarkBlockquote;
			  Status->Centering = 0;
			  if (AddMarkup
			      (Dataset, Status->LineInfo[0].Offset,
			       Status->ParagraphType, 0))
			    goto DiskError;
			  Status->InBlockquote = 1;
			  for (j = Status->BlockIndentation;
			       j < Status->LineInfo[0].Leading; j++)
			    if (AddMarkup
				(Dataset, Status->LineInfo[0].Offset,
				 MarkNbsp, 0))
			      goto DiskError;
			  goto ParagraphBegun;
			}
		    }
		}
	    }
	  // Okay, now we REALLY DO know what type we're using.     
	  Status->Centering = (Status->Type == MarkBeginCenteredParagraph);
	  Status->ParagraphType = Status->Type;
	  if (AddMarkup
	      (Dataset, Status->LineInfo[0].Offset, Status->ParagraphType, 0))
	    goto DiskError;
	  if (!Status->Centering)
	    {
	      //for (ss = s; isspace (*ss); ss++)
	      for (j = Status->BlockIndentation;
		   j < Status->LineInfo[0].Leading; j++)
		if (AddMarkup
		    (Dataset, Status->LineInfo[0].Offset, MarkNbsp, 0))
		  goto DiskError;
	    }
	  Status->InParagraph = 1;
	ParagraphBegun:;
	}
      else
	{
	  if (LineNum > 0 && LineNum == Dataset->LowestNonPrefatoryLine)
	    Status->InHeader1 = 1;
	  else if (LineNum < Dataset->LowestNonPrefatoryLine)
	    Status->InHeader1 = 0;
	  else
	    {
	      j = Status->LastWasHeader1;
	      Status->InHeader1 =
		IsHeader (Status->BufferedLines, &Status->LastWasHeader1,
			  LineNum, Dataset, s);
	      if (j && !Status->InHeader1)
		{
		  j = 0;
		  if (MatchesPrefatoryLine (Dataset, s)
		      || IsHeader (Status->BufferedLines, &j, LineNum,
				   Dataset, s))
		    goto BeginSubtitle;
		}
	    }
	  if (Status->InHeader1)
	    Status->InHeader1 =
	      NowMatching (Dataset, s, &Status->LineInfo[0]);
	  if (Status->InHeader1)
	    {
	    StartHeader1:Status->InHeader1 = 1;

	      // NOTE THAT THE FOLLOWING MARKUP HAS THE POTENTIAL TO BE  
	      // OUT OF ORDER IF SOME TYPES OF CODE CHANGES OCCUR LATER! 
	      for (ss = s; isspace (*ss); ss++);
	      if (AddMarkup
		  (Dataset, Status->LineInfo[0].Offset + ss - s,
		   MarkHeader1, 0))
		goto DiskError;
	    }
	  else
	    {
	    RegularParagraph:

	      // Here's where the analysis starts for a plain-Jane  
	      // unindented Gutenberg paragraph. 
	      // We know that we want to start a paragraph, but we don't  
	      // know if it should be justified or ragged.   
	      // Here we look for a line whose purpose is simply to  
	      // separate blocks of text.  
	      if (IsSeparatorLine (s, &Status->LineInfo[1]))
		{
		  Status->Type = MarkBeginCenteredParagraph;
		  goto TypeKnown;
		}

	      // Let's check to see if the lines seem to be shorter  
	      // than expected.  Note that we count some conditions  
	      // twice as much as those that are simply short. 
	      // We do it intentionally. 
	      if (LineNum < Dataset->LowestNonPrefatoryLine)
		Status->Type = MarkBeginRaggedParagraph;
	      else
		{

		  // Here we look for long chunks of obvious verse,  
		  // which we detect as complete paragraphs (to the  
		  // extent we can determine such a thing),  
		  // every line of which begins with caps.  However,  
		  // we reject short paragraphs with lines beginning 
		  // with quotes from the count. 
		  for (j = k = 0; j < PRE_OR_POST_LINES; j++)
		    if (Status->LineInfo[j].Empty)
		      break;
		    else if (Status->LineInfo[j].VerseCap)
		      k++;
		  if (k == j)
		    {
		      if (j > 5)
			Status->Versifying = 1;
		      else if (j > 3 && Status->LastVersifying)
			Status->Versifying = 1;
		      else if (j > 2 && Status->LastVersifying > 2)
			Status->Versifying = 1;
		    }

		  // Sort of a catch-all heuristic thing to detect verse 
		  // that wasn't so obvious as the above. 
		  if (!Status->Versifying)
		    for (j = 0, k = (Status->LastVersifying > 0);
			 j < PRE_OR_POST_LINES; j++)
		      {
			if (Status->LineInfo[j].Empty)
			  break;
			if (Status->LineInfo[j].BeginsWhite
			    || (Status->LineInfo[j].ReallyShort
				&& !Status->LineInfo[j + 1].Empty
				&& Status->LineInfo[j].VerseCap))
			  k += 2;
			else if (Status->LineInfo[j].Short
				 && !Status->LineInfo[j + 1].Empty
				 && Status->LineInfo[j].VerseCap)
			  k++;
			if (!Status->LineInfo[j].BeginsWhite
			    && !Status->LineInfo[j].VerseCap)
			  k--;
		      }

		  // The above rules are fine, but OCCASIONALLY 
		  // catch a line of dialog, because when there's  
		  // dialog there are usually a lot of 1- or 2-line 
		  // paragraphs, possibly short, all beginning with 
		  // caps.  The line below just reduces that tendency 
		  // a little.         
		  if (j <= 2 && !Status->LineInfo[0].BeginsWhite &&
		      (Status->LineInfo[0].BeginsQuote
		       || Status->LineInfo[-2].BeginsQuote
		       || Status->LineInfo[2].BeginsQuote
		       || (Status->LineInfo[2].Empty
			   && Status->LineInfo[3].BeginsQuote)
		       || Status->LineInfo[1].Empty))
		    k--;
		  if (Status->Versifying || (k >= 2 && k >= (j - 1) / 2)
		      || (j == 1 && Status->LineInfo[0].Short) || k == j
		      || k == (j - 1))
		    {
		      Status->Versifying = 1;
		      Status->Type = MarkBeginRaggedParagraph;
		    }
		  else
		    Status->Type = MarkBeginJustifiedParagraph;
		}
	      goto StartParagraph;
	    }
	}
    }

  return (0);
DiskError:
  return (5);
AtEnd:
  return (-1);

}
