#!/usr/bin/perl

# $Id: texexpand,v 1.11 1997/06/15 18:26:00 JCL Exp $
#
# texexpand for LaTeX2HTML 96.2

# Based on texexpand by Robert Thau, MIT AI lab, including modifications by
# Franz Vojik <vojik@de.tu-muenchen.informatik>
# Nikos Drakos <nikos@cbl.leeds.ac.uk>
# Sebastian Rahtz <spqr@uk.ac.tex.ftp>
# Maximilian Ott <max@com.nec.nj.ccrl>
# Martin Boyer
# Herbert Swan
# Jens Lippmann

# Recognizes \documentclass, \documentstyle, \usepackage, \RequirePackage,
# \begin{verbatim}...\end{verbatim}, %begin{latexonly}...%end{latexonly},
# \begin{latexonly}...\end{latexonly}, \input, \include, \verb, \latex
# \endinput, \end{document}
# \includecomment, \excludecomment
# \begin{"to exclude"}, \end{"to exclude"}
# %begin{"to exclude"}, %end{"to exclude"}

###############################################################################
# Notes:
#
# General translation mechanism:
#
#
# The main program latex2html calls texexpand with the document name
# in order to expand some of its \input and \include statements, here
# also called 'merging', and to write a list of sensitized style, class,
# input, or include file names.
# When texexpand has finished, all is contained in one file, TMP_foo.
# (assumed foo.tex is the name of the document to translate).
# 
# In this version, texexpand cares for following environments
# that may span include files / section boundaries:
#  a) \begin{comment}
#  b) %begin{comment}
#  c) \begin{any}  introduced with \excludecomment
#  d) %begin{any}
#  e) \begin{verbatim}
#  f) \begin{latexonly}
#  g) %begin{latexonly}
# 
# a)-d) cause texexpand to drop its contents, it will not show up in the
# output file. You can use this to 'comment out' a bunch of files, say.
# 
# e)-g) prevent texexpand from expanding input files, but the environment
# content goes fully into the output file.
# 
# Together with each merging of \input etc. there are so-called %%%texexpand
# markers accompanying the boundary.
# 
# When latex2html reads in the output file, it uses these markers to write
# each part to a separate file, and process them further.
#
#
#
# Detailed technical notes:
#
# 1. %begin{latexonly} and %end{latexonly} have to be on a separate line.
#    Anything between these tags (including the tags) is discarded.
# 2. \begin{latexonly} and \end{latexonly} have to be on a separate line.
#    Anything between these tags (including the tags) is not expanded.
# 3. [%\]begin{"to exclude"} and [%\]end{"to exclude"} have to be on a
#    separate line.
#    Anything between these tags (including the tags) is discarded.
# 4. \begin{verbatim/verbatim*} and \end{verbatim/verbatim*} have to be
#    on a separate line.
#    Anything between these tags (including the tags) is not expanded.
# 5. The scope of any such tags may extend over several files.
#    The opening tag for latexonly may occur on a different include level
#    than the closing tag.
#    The opening tag for verbatim/"to exclude" must occur within the same
#    file than the closing tag.
# 6. Warnings are printed when the document has been parsed and open
#    tags remain.
# 7. When in a "to exclude"/verbatim environment, texexpand won't recognize
#    ANY command except the corresponding closing tag.
#    There cannot be any nested constructions.
#    This behaviour is identical to that of LaTeX.
# 8. \begin{latexonly},\end{latexonly} may be nested, whereas
#    %begin{latexonly},%end{latexonly} may not be nested.
# 9. A "%" tag cannot close a "\" tag, and vice versa.
# 10. Every \document(class|style), \usepackage, \input and \include command
#     has to be on a separate line.
# 11. Everything behind a `%' that isn't preceded by a `\' is regarded as
#     a comment, i.e. it is printed but not interpreted.
# 12. If any command listed in 10. is preceded by an occurence of `\verb' or
#    `\latex' then it is NOT interpreted. This crashes on lines like this:
#        blah blah \verb+foo foo+ \input{bar} % bar won't be loaded!
# 13. Packages provided via \usepackage are handled the same way as
#    `options' in \document(class|style), i.e. they are included when
#    -auto_exclude is off, the package isn't in @dont_include *OR* the
#    package is in @do_include (new). They are added to the style file 
#    together with their options if the file itself hasn't been merged.
#    \documentclass[options]{class} searches for every option.clo,
#    \documentstyle[options]{style} searches for every option.sty.
#    \usepackage[options]{packages} searches for every package.sty.
# 14. Each texinputs directory is searched for input files/styles. If it
#    ends in `//', the whole subdirectory tree is searched.
# 15. \input / \include merge the given file (if found under the given
#    name or with .tex extension) if its basename is in @do_include or if it
#    isn't in @dont_include or if the given filename doesn't end in 
#    .sty/.clo/.cls when -auto_exclude is set.
#
###############################################################################
# History:
#   mro = Marek Rouchal <marek@saftsack.fs.uni-bayreuth.de>
#   jcl = Jens Lippmann <lippmann@cdc.informatik.th-darmstadt.de>
#
# $Log: texexpand,v $
# Revision 1.11  1997/06/15 18:26:00  JCL
# Now texexpand will only merge files that exist *and* are readable.
# (Trying to merge a void link caused it to crash on my site.)
#
# Revision 1.10  1997/06/06 14:13:54  RRM
# This is the texexpand for V97.1.
#
#     only dofference is that it is quieter under  -debug .
#     use  -verbosity <num>  as well, to get all the previous messages,
#     when  <num> is at least 2.
#
# Revision 1.9  1997/03/24 12:26:15  RRM
# Implemented a new class of environments: $keepcomments .
# This allows environments of TeX-like code to be preserved verbatim,
# and passed to LaTeX for processing: e.g. picture, makeimage, xy  etc.
# Also, fixed the bug which loses any code on the same line as, but preceding
# an  \input  or  \include  command.
#
# Revision 1.8  1997/03/03 20:35:42  JCL
# added some comments
#
# Revision 1.7  1996/12/21 20:30:00  JCL
# - small changes to get verbatim parsed separately from verbatim*
# - provided expand test for regression suite
# - bound diagnostic status messages to debug level
#
#   texexpand is operational
#
# Revision 1.6  1996/12/20 20:27:08  JCL
# fixed severe bug with my $DS variable :-[
#
# Revision 1.5  1996/12/20 18:51:54  JCL
# *** empty log message ***
#
# Revision 1.4  1996/12/20 01:29:39  JCL
# Moved initialisation tokens for @dont_include to latex2html.config,
# to have a more central place to control them.
#
# Revision 1.3  1996/12/18 04:36:58  JCL
# substantial changes to allow for environments grouping several files
#  o chunked code into more functions
#  o revised documentation
#  o designed new parsing logic
#  o introduced parsing of \includecomment, \excludecomment to care
#    for self-defined comment environments
#  o handles default "comment" environment as known from html.sty
#  o and much more (see comments)
#
#
# V96.2a6 Fixed bug in recursive directory search for texinputs. Thanks to
#         Marcus Harnisch <harnisch@hhi.de> for reporting the bug.
#         Included possibility of adding extensions to $TEXE_DONT_INCLUDE
#         e.g. '.psfig', so that all files ending in .psfig won't be
#         \input or \include 'ed. Same for $TEXE_DO_INCLUDE. Added `o' 
#         option to some regexps.
# -------
# V96.2a5 Followed suggestions by Jens Lippmann regarding file inclusion
#         logic. Added \RequirePackage. Some minor changes.
# -------
# V96.2a4 Fixed severe bugs in comments regexp and usepackage logic.
#         Thanks to Ross Moore <roos@mpce.mq.edu.au> for reporting them.
#         Added support for LaTeX2e .clo filename extension (see 7. above)
#         Cleaned up some code, added more comments
#         Added command line option -do_include
# -------
# V96.2a3 Fixed bugs & typos
# -------
# V96.2a2 Following suggestions made by 
#         Jens Lippmann <lippmann@cdc.Informatik.TH-Darmstadt.DE>
#         Added recursive directory search for include files.
#         Added @do_include: Forces inclusion of packages (when found)
#         Some bug fixes
# -------
# V96.2a1 released Thu Oct 24 16:51:36 MET 1996
# -------
# 21-NOV-96 mro
# Almost complete rewrite by Marek Rouchal <marek@saftsack.fs.uni-bayreuth.de>
#
###############################################################################

# Path separator. Will be configured sometimes.
    $DS = '/';

# Initialize texinputs
    @texinputs = (".");

    if(defined $ENV{'TEXINPUTS'}) {
	foreach $dir (split(/:/,$ENV{'TEXINPUTS'})) {
	    push (@texinputs, $dir) if($dir =~ /\S+/); # save only if non-empty
	}
    }

# Expand paths with `~'
    $homeDir = (getpwuid($<))[7];
    grep(s|^~$DS|$homeDir$DS|, @texinputs);
    grep((m|^~([^$DS]+)$DS|) && ($homeDir = (getpwnam($1))[7]) && (s||$homeDir$DS|), @texinputs);

# Initialize styles to be excluded (if any).
# This is a sanity setup in case the \d is garbled during shell
# variable handling.
# The initialisation really comes from latex2html.config.
    @dont_include = ('\d+pt');

# These are the extensions to be auto-excluded
    $dont_include_ext_rx = 'sty|cls|clo';

    if(defined $ENV{'TEXE_DONT_INCLUDE'}) {
	foreach(split(/:/,$ENV{'TEXE_DONT_INCLUDE'})) {
	    &process_dont_include($_);
	}
    }

# Initialize styles to be included (if any). This overrides @dont_include
# These are the extensions to be auto-included
    $do_include_ext_rx = '';

    if(defined $ENV{'TEXE_DO_INCLUDE'}) {
	foreach(split(/:/,$ENV{'TEXE_DO_INCLUDE'})) {
	    &process_do_include($_);
	}
    }

# Parse arguments
    do {
	print "texexpand for LaTeX2HTML V97.1\n";
	exit(0);
    } unless(@ARGV);

    while ($ARGV[0] =~ /^-/ && $ARGV[0] !~ /^--$/) {
	$_ = shift;
	if (/^-dont_include$/) { &process_dont_include(shift); }
	elsif (/^-do_include$/) { &process_do_include(shift); }
	elsif (/^-w$/) {} ### Compatibility hack.
	elsif (/^-auto_exclude$/) { $auto_exclude++}
	elsif (/^-save_styles$/) { $style_file = shift; }
	elsif (/^-debug$/) { $debug++; }
	elsif (/^-verbose$/) { $debug=2; }
	elsif (/^-o$/) { $outfile = shift; }
	else { print STDERR "%--- WARNING:  Unrecognized option: $_ \n"; }
    }
    die "texexpand: No input file\n" unless($infile = shift);


&initialise;
&main;
exit(0);


sub initialise {

# Create generic regexp's:
# If this matches before a command, the command is ignored.
    $ignore_cmd_rx = '(\\latex\W|\\verb)';
# This matches a square bracket pair (typically an option list).
    $options_rx = '(\[[^\]]*\])?';
# This matches a single argument.
    $arg_rx = '\{([^\}]*)\}';
    $fakeenv_rx = '(comment)';
    $keepcomments_rx = '(picture|makeimage|xy|diagram)';


# Print environments
    if ($debug) {
	print STDERR "%TeX inputs are in:\n";
	foreach $dir (@texinputs) { print STDERR "%--- $dir\n"; }

	if ($debug>1) {
	    print STDERR "\n%Special names (not to be input or included):\n";
	    foreach $name (@dont_include) { print STDERR "%--- $name\n"; }
	    print STDERR "\n%Extensions of files not to be input or included: "
		. "$dont_include_ext_rx\n";

	    print STDERR "\n%Special names (to *be* input or included):\n";
	    foreach $name (@do_include) { print STDERR "%--- $name\n"; }
	    print STDERR "\n%Extensions of files to *be* input or included: "
		. "$do_include_ext_rx\n";
	}
    }
    print STDERR "\n%--- Expanding $infile\n" if ($debug>1);
}


sub main {
# Note that verbatim/latexonly may split over different files!
# $verbatim is 1 if inside a verbatim environment,
# $latexonly is > 0 if inside latexonly environments
# $includelevel indicates the depth of include/input
    local($includelevel) = 0;
    local($verbatim,$verbatimname) = (0,"");
    local($latexonly,$latexonlytype) = (0,"");
    local($fakeenv,$fakeenvname,$fakeenvtype) = (0,"","");
    local($keepcomments,$keepcommentsname) = (0,"");
    local($active,$mute) = (1,0);

# Main procedure
    $dont_include_rx = join("|",@dont_include);
    $do_include_rx = join("|",@do_include);

    if($style_file) {
	open(STYLES,">$style_file") || die "%--- Cannot open style file\n";
    }
    if($outfile) {
	open(OUT,">$outfile") || die "%--- Cannot open output file\n";
    }
    else {
	open(OUT,">&STDOUT");
    }

    &process_file($infile); # the workhorse...

    close(OUT) if ($outfile);
    close(STYLES) if ($style_file);

    print STDERR "%--- Warning: No ${latexonlytype}end\{latexonly\} found.\n"
	if ($latexonly);
    print STDERR "%--- Warning: No ${fakeenvtype}end\{$fakeenvname\} found.\n"
	if ($fakeenv);
    print STDERR "%--- Warning: No \\end\{$keepcommentsname\} found.\n"
	if ($keepcomments);
    print STDERR "%--- Warning: No \\end{verbatim} found.\n"
	if ($verbatim);
}


# Include and parse a file.
# This routine is recursive, see also &find_and_process_file,
# &process_document_header, and &process_package_cmd.
#
# Two global flags control the modes of texexpand.
#  o $active is true if we should interprete the lines to expand
#    files, check for packages, etc.
#  o $mute is true if we should prevent the lines from going
#    into the out file.
#
# We have three general modes of texexpand:
#  1) interprete the lines and pass them to the out file
#     This is the normal case.
#     Corresponding: $active true, $mute false
#  2) interprete minimal and suppress them
#     This is when parsing inside a comment environment, which
#     also would retain its body from LaTeX.
#     => $active false, $mute true
#  3) interprete minimal and pass the lines to the out file
#     This is inside a verbatim or latexonly environment.
#     The line of course must be at least interpreted to
#     determine the closing tag.
#     => $active false, $mute false
#
# Any environment may extend over several include files.
# Any environement except verbatim and latexonly may have its
# opening or closing tag on different input levels.
# The comment and verbatim environments cannot be nested, as
# is with LaTeX.
# We must at least parse verbatim/comment environments in
# latexonly environments, to catch fake latexonly tags.
#
# Bugs:
# o Since the latexonly environment is not parsed, its contents
#   might introduce environments which are not recognized.
# o The closing tag for latexonly is not found if hidden inside
#   an input file.
# o One environment tag per line, yet!
# o If I would have to design test cases for this beast I would
#   immediately desintegrate into a logic cloud.
#
sub process_file {
    local($infile) = @_;
    local(*IN);
    local($comments,$before,$orig);


    # Keep track of input/include level
    $includelevel++;

    open(IN,"<$infile") || die "%--- Cannot open $infile\n";
    print STDERR "%--- Processing $infile\n" if ($debug > 1);

    # if we don't include this file marker LaTeX2HTML won't split
    # the document at this point
    print OUT "%%% TEXEXPAND: INCLUDED FILE MARKER $infile\n"
	if ($includelevel > 1 && $active);

    while(<IN>) {
	#for debugging
	$orig = $_;

	# lift comments from line
	$comments = "";
	if ($keepcomments) { $comments = '' }
	else {
	    s/(^|[^\\])(\\\\)*(%.*)/$comments = $3; $1.$2/e
	}

	# Deal with latexonly environment(s)
	# begin/end tags must be on single line
	if (!$fakeenv && !$verbatim && !$latexonly &&
	    $comments =~ /%\s*begin\s*\{\s*latexonly\s*\}/) {

	    # A comment latexonly environment. May not be nested.
	    $latexonly = 1;
	    $latexonlytype = "%";
	    $active = 0;
	    $mute=1;
	}
	elsif (!$fakeenv && !$verbatim &&
	       (!$latexonly || $latexonlytype eq "\\") &&
	       /^\s*\\begin\s*\{\s*latexonly\s*\}/) {

	    # A latexonly environment. LaTeX types may be nested,
	    # but discard them as long as we are in a latexonly
	    # comment part.
	    # We definitely don't like to push the "\\", "%" types
	    # onto a stack to keep track of them in alternating types.
	    # On the other hand we won't allow for a comment type
	    # part to close a LaTeX environment, eg.
	    $latexonly++;
	    $latexonlytype = "\\";
	    $active = 0;
	}
	elsif (!$fakeenv && !$verbatim &&
	       $comments =~ /%\s*begin\s*\{\s*$fakeenv_rx\s*\}/) {
	    # Begin of a fake comment part. May not be nested.
	    $fakeenv=1;
	    $fakeenvtype="%";
	    # Remember the part name.
	    $fakeenvname = $1;
	    $active=0;
	    $mute=1 unless $latexonly;
	}
	elsif (!$fakeenv && !$verbatim && /^\s*\\begin\s*\{\s*$fakeenv_rx\s*\}/) {
	    # Begin of a fake environment. May not be nested.
	    $fakeenv="1";
	    $fakeenvtype="\\";
	    # Remember the environment name.
	    $fakeenvname = $1;
	    $active=0;
	    $mute=1 unless $latexonly;
	}
	elsif (!$fakeenv && !$verbatim && /^\s*\\begin\s*\{\s*$keepcomments_rx\s*\}/) {
	    # Begin of a keepcomments environment. May be nested.
	    if (! $keepcomments) {
		$keepcomments = 1;
		# Remember the environment name.
	        $keepcommentsname = $1;
	    } elsif ($keepcommentsname eq $1) {
		$keepcomments++;
	    }
	    $active=1;
	    $mute=1 unless $latexonly;
	}
	elsif (!$fakeenv && !$verbatim && /\\begin\s*\{\s*verbatim(\*)?\s*\}/) {
	($before,$verbatimname) = ($`,$1);
	($active,$verbatim) = (0,1)
		unless ($before =~ /$ignore_cmd_rx/o);
	}

	print STDERR "%--line::${orig}%--      active=$active mute=$mute ".
	    "latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim ".
	    "keepcomments=$keepcomments\n"
		if ($debug > 1) && $orig =~ /\\begin|%\s*begin/;

	# Interprete the single line, care for file to merge,
	# locate new comment environments, etc.
	# This one does recursive calls.
	# Stop this file if we are told so.
	last
	    unless &interprete($_, $comments);

	last if $end_document;

	# Sorry for that ifs...
	if (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "%" &&
	    $comments =~ /%\s*end\s*\{\s*latexonly\s*\}/) {

	    # only %end{latexonly} can close the part
	    $latexonly=0;
	    $active = 1;
	    $mute = 0;
	}
	elsif (!$fakeenv && !$verbatim && $latexonly && $latexonlytype eq "\\" &&
	    /^\s*\\end\s*\{\s*latexonly\s*\}/) {

	    # only \end{latexonly} can close the environment
	    $latexonly--;
	    $active = ($latexonly ? 0 : 1);
	}
	elsif ($fakeenv && $fakeenvtype eq "%" &&
	       $comments =~ /%\s*end\s*\{\s*$fakeenv_rx\s*\}/) {

	    # only a matching %end{name} can close the part
	    if ($1 eq $fakeenvname) {
		$fakeenv=0;
		$active = ($latexonly ? 0 : 1);
		$mute=0
		    unless $latexonly && $latexonlyenv eq "%";
	    }
	}
	elsif ($fakeenv && $fakeenvtype eq "\\" &&
	       /^\s*\\end\s*\{\s*$fakeenv_rx\s*\}/) {

	    # only a matching \end{name} can close the environment
	    if ($1 eq $fakeenvname) {
		$fakeenv=0;
		$active = ($latexonly ? 0 : 1);
		$mute=0;
	    }
	}
	elsif ($keepcomments &&
	       /^\s*\\end\s*\{\s*$keepcomments_rx\s*\}/) {

	    # only a matching \end{name} can close the part
	    if ($1 eq $keepcommentsname) {
		$keepcomments--;
		$keepcommentsname = '' unless ($keepcomments);
		$active = ($latexonly ? 0 : 1);
		$mute=0
		    unless $latexonly && $latexonlyenv eq "%";
	    }
	}
	elsif ( /\\end\s*\{\s*verbatim(\*)?\s*\}/) {
	    if ($1 eq $verbatimname) {
		$verbatim=0;
		$active = ($latexonly ? 0 : 1);
	    }
	}
	print STDERR "%--line::${orig}%--      active=$active mute=$mute ".
	    "latexonly=$latexonly fakeenv=$fakeenv verbatim=$verbatim\n"
		if ($debug > 1) && $orig =~ /\\end|%\s*end/;

    }
    print OUT "%%% TEXEXPAND: END FILE $infile\n"
	if ($includelevel > 1 && $active);
    close(IN);
    $includelevel--;
}

# Handle the three states here.
# The state 'interprete minimal and suppress' does not require
# further actions, just do nothing.
#
sub interprete {
    local($_,$comments) = @_;
    local($line) = $_;
    $line =~ s/\n/$comments\n/;

    if ($active) {
	#looses $comments on successful input/include, document header,
	#or usepackage/RequirePackage

	if (/\\(input|include)\W/) { 
	    local($after) = '';
	    $before = $`;
	    $after = $&.$';
	    if($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
                if ($before) {
		    print OUT $before."\%\n";
		    $line = $after;
                    $before =~ s/(\W)/\\$1/g;
                    s/$before//;
		}
		print OUT $line
		    #may re-enter &process_file
		    unless &find_and_process_file($_);
	    }
	}
	elsif (/\\endinput/) {
	    $before=$`;
	    return(0) #stop this file
		if ($includelevel > 1 && $before !~ /$ignore_cmd_rx/o);
	}
	elsif (/\\document(class|style)\s*$options_rx\s*$arg_rx/o) {
	    $before = $`;
	    if ($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
		&process_document_header($_);
	    }
	}
	elsif (/\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/o) {
	    $before = $`;
	    if($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
		&process_package_cmd($_);
	    }
	}
	# Print the first /end{document}, only.  Truncate anything after it.
	elsif (/^(.*\\end\{document\})/) {
	    $before = $1;
	    if ($before =~ /$ignore_cmd_rx/o) {
		print OUT $line;
	    }
	    else {
		print OUT "$before\n";
		$end_document++;
	    }
	}
	elsif (/\\(in|ex)cludecomment\s*$arg_rx/o) {
	    local($mode,$env) = ($1,$2);

	    $env =~ s/\s//g; #strip space
	    # escape special chars (such as "*"), but reject "|"
	    $env =~ s/(\W)/\\$1/g;
	    unless ($env =~ /\|/) {
		$fakeenv_rx =~ /\((.*)\)/;
		# might also be empty
		local(@envs) = split(/\|/,$1);

		if ($mode eq "ex") {
		    push(@envs,$env);
		}
		else {
# a dumb try to forget the comment environment if redefined
		    $env =~ s/\\/\\\\/g;
		    #must not use $_ inside grep pattern!
		    @envs = grep(!/$env/,@envs);
		}
		$fakeenv_rx = "\(".join("|",@envs)."\)";
	    }
	}
	else {
	    print OUT $line;
	}
    }
    elsif (! $mute) {
	# print line if in verbatim/comment mode
	print OUT $line;
    }
    return(1);		#continue if not $end_document
}


sub find_and_process_file {
    local($_) = @_;
    local($before,$after,$class,$styles);

    print STDERR "%--- Found include at level $includelevel: $_\n"
	if($debug);

# Get filename
    local($filename) = "";

    if (/(\\input|\\include)(\s*$arg_rx|\s+(\S+)\s)/o) {
	# $class serves as temporary storage
	($before,$after,$class,$filename) = ($`, $', $1.$2, $3.$4);
	$filename =~ s/\s//g;
    }
    else {
	print STDERR "%--- COULDN'T FIND FILENAME\n" if($debug);
    }

    if ($filename) {
	# Get base name
	$styles = $filename;
	$styles =~ s|.*$DS||; # strip path
	$styles =~ s/\.[^.]*$//; # strip extension

	# Sorry for the next if-statement... (hmm,ok)
	if ($styles !~ /^($do_include_rx)$/o &&
	    $filename !~ /\.($do_include_ext_rx)$/o &&
	    ($styles =~ /^($dont_include_rx)$/o || 
	     ($auto_exclude && $filename =~ /\.($dont_include_ext_rx)$/o))) {
	    print STDERR "%--- ignoring $filename\n" if($debug);
	    print STYLES "$styles\n" if($style_file);
	}
	else {
	    local($fname) = &find_file($filename);

	    # notify anyway that a file is found, to allow a Perl
	    # module loaded for this specific file
#	    print STYLES "$styles\n" if($style_file);

	    if($fname) {
		print OUT "$before";

		# recursive call
		&process_file($fname);

		print OUT $after if($after =~ /\S+/);
		print STDERR "%--- successfully included $filename\n"
		    if($debug > 1);

		return(1); #merge
	    }
	    else {
		print STDERR "%--- include $filename failed. Reinserting $before command\n";
	    }
	}
    }
    return(0); #no merge
}


sub process_document_header {
    local($_) = @_;
    local(%style_include,@print_styles,$key);

    /\\document(class|style)\s*$options_rx\s*$arg_rx/o;
    local($before, $latextype, $styles, $class, $after) = ($`, $1, $2, $3, $');

    print STDERR "%--- Found $1: $_\n" if($debug);
    $styles =~ s/\[(.*)\]/$1/; # Strip braces
    $class =~ s/\s//g;	# Strip spaces
    # the class cannot be included, so stuff it in the style file
    print STYLES "$class\n" if($style_file);

    foreach $key (split(/,/, $styles)) {
	$key =~ s/\s//g; # strip spaces
	push(@print_styles,$key);
	if (&should_include($key)) {
# mark the style for inclusion and search for the
# corresponding .clo (LaTeX2e) or .sty (LaTeX209)
# &find_file gives the filename or undef.
	    $style_include{$key} =
		&find_file($key . (($latextype =~ /class/) ? '.clo' : '.sty'));
	}
    }
    $styles = '';
    foreach $key (@print_styles) {
	if(!$style_include{$key}) {
# put style back into command and save it to the style file
	    print STYLES "$key\n" if($style_file);
	    $styles .= ',' . $key;
	}
    }
    if ($styles) {
	$styles =~ s/^,//;
	$styles = '[' . $styles . ']';
    }
    print OUT join('', $before, "\\document", $latextype, $styles,
		   '{', $class, '}', $after);
    # Include styles after the \document(class|style) command
    foreach $key (@print_styles) {
	if($style_include{$key}) {
	    &process_file($style_include{$key});
	}
    }
}


sub process_package_cmd {
    local($_) = @_;
    local(%style_include,@print_styles,$key);

    /\\(usepackage|RequirePackage)\s*$options_rx\s*$arg_rx/o;
    local ($before,$class,$options,$styles,$after) = ($`, $1, $2, $3, $');

    print STDERR "%--- Found \\$class: $_\n" if($debug > 1);
    $options =~ s/\[(.*)\]/$1/o; # strip braces

    foreach $key (split(/,/,$styles)) {
	$key =~ s/\s//g; # strip spaces
	# Remember each package and check whether to merge it
	push(@print_styles,$key);
	if (&should_include($key)) {
	    $style_include{$key}=&find_file($key . '.sty');
	}
    }
    $styles = '';
    foreach $key (@print_styles) {
	if (!$style_include{$key}) {
	    # print to style file and reinsert into command
	    # if package is not to be merged
	    print STYLES "$key $options\n" if ($style_file); 
	    $styles .= ',' . $key;
	}
    }
    if($styles) {
	# Reconstruct command
	$styles =~ s/^,//;
	$options = '[' . $options . ']' if($options =~ /\S+/);
	print OUT $before . '\\' . $class . $options .
	    '{' . $styles . '}' . $after;
    }
    else { print OUT $before . $after; }
    foreach $key (@print_styles) {
	if($style_include{$key}) {
			# merge style files
	    &process_file($style_include{$key});
	}
    }
}


sub process_dont_include {
    local($item) = @_;
    if($item =~ s/^\.//) { # starts with `.'? Then it's an extension
	$dont_include_ext_rx .= '|' . $item;
    }
    else {
	push(@dont_include,$item);
    }
}

sub process_do_include {
    local($item) = @_;
    if($item =~ s/^\.//) { # starts with `.'? Then it's an extension
	$do_include_ext_rx .= (($do_include_ext_rx eq '') ? '' : '|') . $item;
    }
    else {
	push(@do_include,$item);
    }
}

sub should_include {
    local($style) = @_;
    # returns true if style has to be included, i.e.:
    #  1. The style is found in do_include *or*
    #  2. Automatic exclusion is disabled and the style is *not* found in
    #     dont_include
    return($style =~ /^($do_include_rx)$/o ||
	   (!$auto_exclude && $style !~ /^($dont_include_rx)$/o ));
}

sub find_file {
    local($file) = @_;
    local($fname,$dname);
    local($found)=0;
    print STDERR "%--- checking for $file\n" if($debug);      

    if ($file =~ m|^$DS|) {
	$fname=$file;
	if(&file_or_ext) { $found=1; }
    }
    else {
	# search input directories
	foreach $dir (@texinputs) {
	    ($dname = $dir) =~ s|[$DS]+$||; # Remove slashes at the end
	    if (-d $dname) {
		if ($fname = &dir_search($dir,$file)) {
		    $found = 1;
		    last;
		}
	    }
	    else {
		print STDERR "%--- Warning: \"$dname\" is no directory\n"
		    if ($debug);
	    }
	}
    }
    if ($found) {
	print STDERR "%--- found $fname\n" if ($debug);
	return($fname);
    }
    else {
	print STDERR "%--- file not found\n" if ($debug);      
	return(undef);
    }
}


sub dir_search {	# search directory recursively
    local($dir,$file) = @_;
    local(*SUBDIR);	# make file pointer local
    local($dname,$found,$recursive) =('',0,0);

    if ($dir =~ m|$DS$DS$|) { # does dir end in `//'?
	$recursive = 1;
    }
    $dir =~ s|[$DS]+$||; # Remove any slashes at the end
    local($fname) = join ($DS, $dir, $file);

    print STDERR "%--- looking for $fname\n" if($debug);
    # Does file exist in this directory?
    if (&file_or_ext) {
	return($fname);
    }
    elsif ($recursive) { # descend into subdirectories?
	# search directory for subdirectories
	opendir(SUBDIR,$dir); # open directory
	while ($_=readdir(SUBDIR)) { # read dir-entries
	    next if(/^\./); # do not check dotfiles
	    $dname = join ($DS, $dir, $_);
	    if ((-d $dname) && ($fname = &dir_search($dname.$DS.$DS,$file))) {
		$found = 1;
		last;
	    }
	}
	close(SUBDIR);
	if ($found) {
	    return($fname);
	}
    }
    return(0);
}


sub file_or_ext {
    # Modifies $fname
    # if $fname exists return success otherwise
    # if $fname.tex exists, then bind $fname to $fname.tex and return success
    # else fail

    return 1 if -f $fname && -r $fname;# && -s $fname;
    return 0 if $fname =~ /\.tex$/;
    $fname .= ".tex";
    return 1 if -f $fname && -r $fname;# && -s $fname;
    return 0;
}

