#!/usr/bin/env perl
# @(#) extract javadoc style comments from DDL SQL file and generate appropriate "COMMENT ON" commands.
#
# Copyright (c) 2007..2008 Dirk Jagdmann <doj@cubic.org>
#
# This software is provided 'as-is', without any express or implied
# warranty. In no event will the authors be held liable for any damages
# arising from the use of this software.
#
# Permission is granted to anyone to use this software for any purpose,
# including commercial applications, and to alter it and redistribute it
# freely, subject to the following restrictions:
#
#     1. The origin of this software must not be misrepresented; you
#        must not claim that you wrote the original software. If you use
#        this software in a product, an acknowledgment in the product
#        documentation would be appreciated but is not required.
#
#     2. Altered source versions must be plainly marked as such, and
#        must not be misrepresented as being the original software.
#
#     3. This notice may not be removed or altered from any source
#        distribution. */

=head1 NAME

B<pgcomment> - extract javadoc style comments from DDL SQL file and generate appropriate "COMMENT ON" commands.

=head1 SYNOPSIS

B<pgcomment> [B<--html>|B<--latex>] < I<ddl.sql> > I<outputfile>

=head1 OPTIONS

=over

=item B<--html>

generate a stub html file.

=item B<--latex>

generate a stub LaTeX file.

=back

=cut

my $html=$ARGV[0] eq '--html';
my $latex=$ARGV[0] eq '--latex';

=head1 ENVIRONMENT

=over

=item B<LATEX_SEC>

define the LaTeX section/chapter type. Defaults to "section".

=item B<HTML_SEC>

define the HTML headline type. Defaults to "H2".

=back

=cut

$ENV{LATEX_SEC}||='section';
$ENV{HTML_SEC}||='h2';

while(<STDIN>)
{
    chomp;
    if(/\/\*\*\s+(TABLE|COLUMN|AGGREGATE|CAST|CONSTRAINT|CONVERSION|DATABASE|DOMAIN|FUNCTION|INDEX|LARGE OBJECT|OPERATOR CLASS|OPERATOR FAMILY|OPERATOR|PROCEDURAL LANGUAGE|LANGUAGE|RULE|ROLE|SCHEMA|SEQUENCE|TEXT SEARCH CONFIGURATION|TEXT SEARCH DICTIONARY|TEXT SEARCH PARSER|TEXT SEARCH TEMPLATE|TRIGGER|TYPE|VIEW)\s+(.+)/i)
    {
	extract_comment(uc($1), $2);
    }
}

sub extract_comment
{
    my ($type, $sig) = @_;
    return unless $type;
    return unless $sig;

    my $t='';
    while(<STDIN>)
    {
	chomp;

	$t.="$_\n";
	last if /\*\//;
    }

    $t =~ s/\*\//\n\n/;		# remove comment end
    if($html)
    {
	$t =~ s/\&/\&amp;/g;
	$t =~ s/</\&lt;/g;
	$t =~ s/>/\&gt;/g;
	$t =~ s/'/\&apos;/g;
	$t =~ s/"/\&quot;/g;
    }
    elsif($latex)
    {
	$t = latex_escape($t);
	$type = latex_escape($type);
	$sig = latex_escape($sig);
    }
    else
    {
	$t =~ s/'/''/g;		# substitute '
    }

    print "<$ENV{HTML_SEC}>$type $sig</$ENV{HTML_SEC}>\n" if $html;
    print "\n\\$ENV{LATEX_SEC}\{$type: $sig\}\n\n" if $latex;
    my $html_extra='';
    my $latex_extra='';

    if($type eq 'TABLE')
    {
	while($t =~ /\@column\s+(\S+)\s+(.+?)(?=\@column|$)/gs)
	{
	    my $col=$1;
	    my $desc=$2;
	    if($html)
	    {
		$desc =~ s!\@(\w+)\s+(\S+)\s+!<br/><b>$1 <i>$2</i>:</b> !gs;
		$html_extra.="<b>Column <i>$col</i>:</b> $desc<br/>\n";
	    }
	    elsif($latex)
	    {
		$desc =~ s!\@(\w+)\s+(\S+)\s+!\n\n\\quad\\textbf\{$1 \\textit\{$2\}:\}\\quad\\ !gs;
		$latex_extra.="\n\n\\textbf\{Column $col\:}\\quad\\ $desc\n\n";
	    }
	    else
	    {
		print "COMMENT ON COLUMN ${sig}.$col IS '$desc';\n";
	    }
	}
	$t =~ s/\@column\s+(\S+)\s+(.+)(?=\@column|$)//gs;
    }
    elsif($type eq 'USER')
    {
	$type='ROLE';
    }

    if($html)
    {
	foreach (split(/\n/, $t))
	{
	    s!\@return\s+(.+)!<br/><b>return:</b> $1!;
	    s!\@(\w+)\s+(\S+)\s+(.+)!<br/><b>$1 <i>$2</i>:</b> $3!;
	    print "$_\n";
	}

	print "<br/>$html_extra\n" if $html_extra;
    }
    elsif($latex)
    {
	foreach (split(/\n/, $t))
	{
	    s!\@return\s+(.+)!\n\n\\textbf\{return:\} $1\n!;
	    s!\@(\w+)\s+(\S+)\s+(.+)!\n\n\\textbf\{$1 \\textit\{$2\}:\}\\quad\\ $3\n!;
	    print "$_\n";
	}

	print $latex_extra;
    }
    else
    {
	$t =~ s/^\s+|\s+$//gs;	# strip ws
	print "COMMENT ON $type $sig IS '$t';\n";
    }
}

sub latex_escape
{
    local ($_) = @_;
    s/_/\\_/g;
    s/#/\\#/g;
    s/"/''/g;
    s/</\$<\$/g;
    s/>/\$>\$/g;
    return $_;
}

=head1 DESCRIPTION

B<pgcomment> excepts a SQL text on STDIN which should contain DDL command
along with specially formatted comments. Those comments are written as
"COMMENT ON" commands to STDOUT. You can then execute the created SQL
commands in PostgreSQL to have object descriptions (comments)
available inside the database.

In this pre-release version the comments in the DDL SQL have to start
with the /** start and end with */. The object "CREATE" statement
which the comment describes must immediately follow the comment. After
the comment start characters you must indicate the comment/SQL object
type by repeating the object type of the CREATE statement (this
requirement will be dropped in future releases of B<pgcomment>). Like SQL
B<pgcomment> is case-insensitive when recognizing keywords.

The "TABLE" object type is special, since it can contain comments for
the columns. These have to be preceded by a "@column" tag and the
column comment must be separated by newlines.

=head1 EXAMPLE

The following input SQL DDL are the contents of the file example.sql:

 /** table example
   The table "example" shows how to set the column comments in a table.

   @column nr the primary key as artifical autoassigned integer

   @column name a person's name

   @column job a person's job description
 */
 create table example (
   nr   serial,
   name text not null unique,
   job  text not null
 );

 /** FUNCTION normalize_name(name text)
 normalizes "name".  Only lowercase characters, digits, dash and underscore survive the normalization.
 @param name an arbitrary string.
 @return name parameter normalized.
 */
 CREATE OR REPLACE FUNCTION normalize_name(name text)
 RETURNS TEXT
 AS $$
 BEGIN
   RETURN regexp_replace(lower(name), E'[^\\w\\d\\-_]', '', 'g');
 END;
 $$ LANGUAGE plpgsql IMMUTABLE STRICT;

An execution of B<pgcomment> will produce the following output:

 COMMENT ON COLUMN example.nr IS 'the primary key as artifical autoassigned integer';
 COMMENT ON COLUMN example.name IS 'a person''s name';
 COMMENT ON COLUMN example.job IS 'a person''s job description';
 COMMENT ON TABLE example IS 'The table "example" shows how to set the column comments in a table.';
 COMMENT ON FUNCTION normalize_name(name text) IS 'normalizes "name".  Only lowercase characters, digits, dash and underscore survive the normalization.
 @param name an arbitrary string.
 @return name parameter normalized.';

(Note that there is currently no support for the javadoc style
function parameter and return value description.)

=head1 SEE ALSO

L<http://www.postgresql.org/docs/8.3/interactive/sql-commands.html>

=head1 AUTHOR

Dirk Jagdmann <doj@cubic.org>
L<http://pgfoundry.org/projects/pg-toolbox/>

=head1 LICENSE

zlib/libpng license: L<http://en.wikipedia.org/wiki/Zlib_License>

=cut
