#!/usr/bin/perl
#
# bookmarks2csv - Translates Netscape's (Communicator's) bookmarks file to a
# Palm Database file (PDB) suitable for List by Andrew Low <roo@magma.ca>,
# <http://www.magma.ca/~roo>. This code includes modified code of csv2lsdb by
# Darren Dunham <add@netcom.com>. Could not have done without it!
#
# Copyright  2000-2001 J.I. van Hemert
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
#
# Author: J.I. van Hemert <jvhemert@cs.leidenuniv.nl>
#
# Physical mail:
# LIACS - Leiden University
# Niels Bohrweg 1
# 2333 CA Leiden
# The Netherlands
#

my $version = "0.2";

#use strict; # strict does not work with Darren's code :(

my $filehandle;

if (defined($ENV{'QUERY_STRING'}))
{
	require CGI;
	$filehandle = CGI::param('file');
}
elsif ($ARGV[0])
{
	open $filehandle, "<$ARGV[0]" or die $!;
}
else
{
	print STDERR 'Palm-Bookmarks version '.$version.' Copyright  2000-2001 J.I. van Hemert
This piece of software comes with ABSOLUTELY NO WARRANTY
This is free software, and you are welcome to redistribute it
under certain conditions, see the file COPYING for details.
For more information read the documentation that comes with the
distribution.

Usage: '.$0.' bookmarks.html > bookmarks.pdb

Alternatively this script can be used through a webserver, see
the documentation for details.

';
	exit (1);
}


my @bookmarks;
my @current_category = "Top";
my @all_categories = "Top";
while (<$filehandle>)
{
	my $line = $_;
	chomp($line);

	# Bookmark folder - begin
	if ($line =~ s/^    <DT><H3.*>(.*)<\/H3>/$1/g)
	#1#if ($line =~ s/^.*<DT><H3.*>(.*)<\/H3>/$1/g)
	{
		if (@all_categories <= 15)
		{
			$line =~ s/,/\\,/g;		# No comma's!
			$line = substr $line,0,15;	# Safety net for bug in csv2lsdb
			push(@current_category, $line);
			push(@all_categories, $line);
		}
		else
		{
			warn "Too many categories (>15) while reading $line.\n";
			push(@current_category, "Top");
		}
	}

	# Bookmark folder - end
	if ($line =~ s/^    <\/DL><p>/$1/g)
	#1#if ($line =~ s/^.*<\/DL><p>/$1/g)
	{
		pop(@current_category);
	}
	

	# Bookmark entry
	if ($line =~ s/.*<DT><A HREF="([^"]*).*>(.*)<\/A>/$1\t$2/g)
	{
		$line =~ s/['",&]/\\$&/g;
		my ($url, $title) = split ("\t", $line);
		push (@bookmarks, "@current_category[$#current_category],$title,$url");
	}
	# Bookmark description last entry
	if ($line =~ s/.*<DD>(.*)/$1/g)
	{
		$line =~ s/['",&]/\\$&/g;
		@bookmarks[$#bookmarks] .= ",$line";
	}
}


my $csvheader = "Bookmarks,Title,URL";
foreach my $i (@all_categories)
{
	$csvheader .= ",$i";
}



# BEGIN code of Darren

# List DB converter.
# Darren Dunham (add@netcom.com)
# Copyright 1999

# Written to duplicate the converter 'listdb', part of
# the 'List' application for the Palm platform, in perl for 
# greater portability.
# 'List' by Andrew Low (roo@magma.ca, http://www.magma.ca/~roo)

# As a derivative work, this script is subject to any and all
# licensing restrictions as 'listdb' from that package.


# 1.0.0 1999/8/8   Beta version complete.
# 0.5.0 1999/7/20  Alpha version complete.


use Text::ParseWords;


@header = quotewords(",",0,$csvheader);

# header must contain title, f1, f2, and 1 to 15 categories.
if (@header < 4 or @header > 18)
{
    die "$0: Header line contains illegal number of fields.\n";
}

$databasename = shift(@header);
die "$0: Database name must be between 1 and 31 characters.\n"
    if (length($databasename) < 1 or length($databasename) > 31);

$field1 = shift(@header);
die "$0: Field name #1 must be between 1 and 15 characters.\n"
    if (length($databasename) < 1 or length($databasename) > 15);

$field2 = shift(@header);
die "$0: Field name #2 must be between 1 and 15 characters.\n"
    if (length($databasename) < 1 or length($databasename) > 15);

# now fill in categories, and Unfiled gets '0';
$category2index{Unfiled} = 0;
$index = 0;
foreach $category (@header)
{
    $index++;
    if (exists $category2index{$category})
    { die "$0: Category $category seen twice on header line.\n"; }
    
    $category2index{$category} = $index;
}

# Done with header. proceed to parse remaining entries

$line = 1;
foreach $item (@bookmarks)
{
    $line++;
    @line = quotewords(",",0,$item);
    if (@line < 3)
    {
 die "$0: Too few fields on line $line.\n";
    }
    $category = shift(@line);
    unless ($category2index{$category})
    {
 die "$0: Category $category on line $line not recognized from header.\n";
    }
    push @category, $category2index{$category};


    push @f1, (substr shift @line,0,63 ); # silently truncate to 63 chars
    push @f2, (substr shift @line,0,63 );
 
    # @line can be empty.
    push @note, (substr join(chr(10), @line),0,1023);  # max 1023 chars.
   # turns commas in note to <CR>
   # can't use /n for multi-platform.
}



foreach $key (keys %category2index)
{
    $index2category{$category2index{$key}} = $key;
}
$records = @f1;
$categories = keys %category2index;





#####    OUTPUT    ####
# Database header consists of the following structure
# Name (32)
# File Attributes (2)  0x0008 implies no conduit (make backup)
# Version (2)
# CreationDate (4) Seconds since 1904/1/1
# ModificationDate (4)
# LastBackupDate (4)  Leave at 0 to install
# Modification Number (4) Set to 0
# AppInfoArea (4) Offset where the AppInfoArea is found
# SortInfoArea (4) Offset for SortInfoArea.  We don't have one.
# Database Type (4)  DATA
# Creator ID (4) LSdb
# Unique ID Seed (4) Unknown.  Set to zero
# NextRecordListID (4) only used in memory, not externally.  zero.
# Number of Records (2) This one's easy.

# After Database header is the RecordList
# Each record has the following format.
# Record Data Offset (4) Offset where the record is found.
# Record Attributes (1) Lower 4 bits represent the category used.
# Unique ID (3) Should we use it?

# After RecordList is the AppInfoArea
# Renamed records (2)

$header = "";
# 32 char for a name
$header .= pack "a32", $databasename;  #name
$header .= pack "H4", "0008";          # Backup bit
$header .= pack "H4", "0000";          # version

$time = time;                          # seconds since 1970/1/1
$time = $time - 2082844800;            # seconds since 1904/1/1
$header .= pack "N", $time;            # creation date
$header .= pack "N", $time;            # modifiation time
$header .= pack "xxxx";                # last backup date
$header .= pack "xxxx";                # modification number
# AppInfo starts after recordlist.  Recordlist is 6 bytes plus 8 bytes
# for each record, then 2 pad bytes.
# Prior to recordlist is a 72 byte header.
$listappstart = 72 + 6 + (8 * $records) + 2;
$header .= pack "N", $listappstart;
$header .= pack "xxxx";                # no sort area.
$header .= pack "a4", "DATA";          # type
$header .= pack "a4", "LSdb";          # creator
$header .= pack "xxxx";                # unique id seed (not used)

$recordlist = pack "xxxx";                 # next record?
$recordlist .= pack "n", $records;          # number of records


$datastart = $listappstart + 512;      #ListApp size will be 512.
$nextrecord = $datastart;


###
# records
# now we're going to start calculating record sizes.  This is needed
# for the offsets to the record, and to the fields in the records

foreach $index (0..($records-1))
{
    $sizef1[$index] = length($f1[$index]);
    $sizef2[$index] = length($f2[$index]);
    $sizenote[$index] = length($note[$index]);
    $sizerec[$index] = $sizef1[$index] + $sizef2[$index] + $sizenote[$index] +
6;
        # The 6 comes from the 3 offsets at the start of the record plus
        # the 3 trailing nulls after each field, not part of length.

    $recordlist .= pack "N", $nextrecord;
    $recordlist .= pack "C", $category[$index];
    $recordlist .= pack "xxx";
    
    $nextrecord += $sizerec[$index];
}
$recordlist .= pack "a2", "XX" ; # 2 byte filler.

###
# now time for the AppInfo area.  Should be exactly 512 bytes in length.
$appinfo = "";
$appinfo .= pack "n", 14;                # Renumbered categories

foreach $index (0..15)  # I know there's only 15 max, but fill to 16.
{
    local $^W = 0;  #don't warn when arg below is undefined.
    $appinfo .= pack "a16", $index2category{$index};
}


# DEBUG
#$appinfo = "";

# now fill in category ids.
# first "unfiled";
$appinfo .= pack "x";
if ($categories > 1)
{
    foreach $index (1 .. ($categories - 1))
    {
 $appinfo .= pack "C", ($index + 15);
    }
}
# can't really have 16 categories, so following will always work.
foreach $index ($categories .. 15)
{
    $appinfo .= pack "C", ($index);
}

$appinfo .= pack "C", (($categories - 1)+ 15);      # Last ID used.
$appinfo .= pack "C", 128;              # Display style (?)
$appinfo .= pack "x";                   # Write protect (?)
$appinfo .= pack "x";                   # Last Category (?)
$appinfo .= pack "a16", $field1;
$appinfo .= pack "a16", $field2;
$appinfo .= pack "x202";                # Null to pos #511.

$recordinfo = "";
foreach $index (0 .. ($records - 1))
{
    $recordinfo .= pack "C", 3;         # size of offset bytes
    $recordinfo .= pack "C", (3 + $sizef1[$index] + 1); 
    $recordinfo .= pack "C", (3 + $sizef1[$index] + $sizef2[$index] + 2);
    $recordinfo .= $f1[$index];
    $recordinfo .= pack "x";
    $recordinfo .= $f2[$index];
    $recordinfo .= pack "x";
    $recordinfo .= $note[$index];
    $recordinfo .= pack "x";
}

if (defined $ENV{'QUERY_STRING'})
{
	print "Content-type: application/pdb\nContent-Disposition: filename=bookmarks.pdb\n\n";
}
print $header, $recordlist, $appinfo, $recordinfo;

# END code of Darren

exit 0;

#############
# ChangeLog #
#############
#
# 0.2
# 
# - Added ability of commandline use
# - Fixed bug of commas in categories (would end up directly in csv)
#
# 0.1
#
# - First public release
#
