#!/usr/pkg/bin/perl -w
# $Id: htmlfix.pl,v 1.3 2002/06/19 10:26:13 abs Exp $
#
# Fixup html files
# Depends on HTML::FixEntities - just get p5-HTML-FixEntities from pkgsrc

=head1 NAME

htmlfix - convert latin1 to HTML entities, or fix line endings

=head1 SYNOPSIS

htmlfix will convert latin1 characters to their equivalent HTML
entities, and/or ensure the line endings are in UNIX, DOS or MacOS
format.

If given a directory name or names, htmlfix will recursively search
for *.html or *.shtml files.

=head1 USAGE

    htmlfix [opts] path ...

        -a   Include all files - defaults to just *.html and *.shtml
	-d   Ensure files are in DOS '\\r\\n' format
	-f   Fix html - replace latin1 characters with html entities
	     (ignores text inside <? and ?> - for php)
	-h   This help
        -m   Ensure files are in MacOS '\\r' format
	-n   No action - just list files that would be changed
        -u   Ensure files are in UNIX '\\n' format
	-v   List lines that affected by -f
	-V   Display version ($version)

=cut


use File::Find;
use HTML::FixEntities;
use Getopt::Std;
use strict;

my( @files,	# List of files matching pattern
    %opt,	# Command line option switches
    $version,	# Version number
    );

$version = '1.03';

if (!getopts('afdhnuvV', \%opt) || $opt{h} || (!@ARGV && !$opt{V}))
    { usage_and_exit(); }
if ($opt{V})
    { print "$version\n"; exit; }
if ( (defined $opt{d}) + (defined $opt{u}) + (defined $opt{m}) > 1)
    { usage_and_exit("Cannot specify more than one of -d, -m, or -u"); }
if ( !defined $opt{d} && !defined $opt{u} && !defined $opt{m} &&
							!defined $opt{f})
    { usage_and_exit("Must specify one of of -d, -f, -m, or -u"); }

find(\&wanted, @ARGV);
my($file);
foreach $file (sort @files)
    {
    my($orig, $updated);

    $orig = $updated = readfile($file);
    if ($opt{f})
	{
	my($fix) = new HTML::FixEntities ($orig);
	$updated = $fix->text;
	}
    if ($opt{d})
	{ $updated =~ s/(\r\n|\n|\r)/\r\n/g; }
    elsif ($opt{u})
	{ $updated =~ s/(\r\n|\n|\r)/\n/g; }
    elsif ($opt{m})
	{ $updated =~ s/(\r\n|\n|\r)/\r/g; }

    if ($updated ne $orig)
	{
	if ($opt{n})
	    { print "DIFFER $file\n"; }
	else
	    {
	    print "UPDATE $file\n";
	    safe_write($file, $updated);
	    }
	if ($opt{v})
	    {
	    my(@orig) = split(/[\n\r]+/, $orig);
	    my(@updated) = split(/[\n\r]+/, $updated);
	    while (@orig)
		{
		if ($orig[0] ne $updated[0])
		    { print "< $orig[0]\n> $updated[0]\n"; }
		shift @orig;
		shift @updated;
		}
	    print "\n";
	    }
	}
    }
exit;

sub fail
    { print "** ABORTING: @_\n"; exit 1; }

sub readfile
    {
    my($file) = @_;
    my($data);
    if (!open(FILE, "<$file"))
	{ fail("Unable to read '$file': $!"); }
    read(FILE, $data, -s FILE);
    close(FILE);
    $data;
    }

sub safe_write
    {
    my($file, @data)=@_;

    if (! open(FILE,">$file.tmp.$$"))
        { return undef; }
    if (! print(FILE @data) || ! close(FILE))
        {
        unlink("$file.tmp.$$");
        return undef;
        }
    if (! rename("$file.tmp.$$", $file))
        {
        unlink("$file.tmp.$$");
        return undef;
        }
    1;
    }

sub usage_and_exit
    {
    if (@_)
	{ print "** @_\n"; }
    print "Usage: htmlfix [opts] [files]
opts:	-a   Include all files - defaults to just *.html and *.shtml
	-d   Ensure files are in DOS '\\r\\n' format
	-f   Fix html - replace latin1 characters with html entities
	-h   This help
        -m   Ensure files are in MacOS '\\r' format
	-n   No action - just list files that would be changed
        -u   Ensure files are in UNIX '\\n' format
	-v   List lines that affected by -f
	-V   Display version ($version)

htmlfix updates '.html' and '.shtml' files. If directories are
given then they are recursively searched for '.html' or '.shtml'
files.  Symlinks are ignored.
";
    exit;
    }

sub wanted
    {
    if (($opt{a} || /\.s?html$/) && (-f $_) && (! -l $_))
	{ push(@files, $File::Find::name); }
    }
