#!/usr/bin/perl
# BEGIN BPS TAGGED BLOCK {{{
#
# COPYRIGHT:
#
# This software is Copyright (c) 1996-2011 Best Practical Solutions, LLC
#                                          <sales@bestpractical.com>
#
# (Except where explicitly superseded by other copyright notices)
#
#
# LICENSE:
#
# This work is made available to you under the terms of Version 2 of
# the GNU General Public License. A copy of that license should have
# been provided with this software, but in any event can be snarfed
# from www.gnu.org.
#
# This work is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 or visit their web page on the internet at
# http://www.gnu.org/licenses/old-licenses/gpl-2.0.html.
#
#
# CONTRIBUTION SUBMISSION POLICY:
#
# (The following paragraph is not intended to limit the rights granted
# to you to modify and distribute this software under the terms of
# the GNU General Public License and is only of importance to you if
# you choose to contribute your changes and enhancements to the
# community by submitting them to Best Practical Solutions, LLC.)
#
# By intentionally submitting any modifications, corrections or
# derivatives to this work, or any other work intended for use with
# Request Tracker, to Best Practical Solutions, LLC, you confirm that
# you are the copyright holder for those contributions and you grant
# Best Practical Solutions,  LLC a nonexclusive, worldwide, irrevocable,
# royalty-free, perpetual, license to use, copy, create derivative
# works based on those contributions, and sublicense and distribute
# those contributions and any derivatives thereof.
#
# END BPS TAGGED BLOCK }}}
use strict;
use warnings;
no warnings 'once';

# fix lib paths, some may be relative
BEGIN {
    require File::Spec;
    my @libs = ("/usr/local/libdata/perl5/site_perl", "/usr/local/libdata/perl5/site_perl");
    my $bin_path;

    for my $lib (@libs) {
        unless ( File::Spec->file_name_is_absolute($lib) ) {
            unless ($bin_path) {
                if ( File::Spec->file_name_is_absolute(__FILE__) ) {
                    $bin_path = ( File::Spec->splitpath(__FILE__) )[1];
                }
                else {
                    require FindBin;
                    no warnings "once";
                    $bin_path = $FindBin::Bin;
                }
            }
            $lib = File::Spec->catfile( $bin_path, File::Spec->updir, $lib );
        }
        unshift @INC, $lib;
    }
}

BEGIN {
    use RT;
    RT::LoadConfig();
    RT::Init();
};
use RT::Interface::CLI ();

my %DB = (
    type           => scalar RT->Config->Get('DatabaseType'),
    user           => scalar RT->Config->Get('DatabaseUser'),
    admin          => undef,
    admin_password => undef,
);

my %OPT = (
    help        => 0,
    ask         => 1,
);

my %DEFAULT;
if ( $DB{'type'} eq 'Pg' ) {
    %DEFAULT = (
        table  => 'Attachments',
        column => 'ContentIndex',
    );
}
elsif ( $DB{'type'} eq 'mysql' ) {
    %DEFAULT = (
        table => 'AttachmentsIndex',
        column => 'Content',
    );
}
elsif ( $DB{'type'} eq 'Oracle' ) {
    %DEFAULT = (
        prefix => 'rt_fts_',
    );
}

use Getopt::Long qw(GetOptions);
GetOptions(
    'h|help!'        => \$OPT{'help'},
    'ask!'           => \$OPT{'ask'},

    'dba=s'          => \$DB{'admin'},
    'dba-password=s' => \$DB{'admin_password'},
);

if ( $OPT{'help'} || (!$DB{'admin'} && $DB{'type'} eq 'Oracle' ) ) {
    show_help( !$OPT{'help'} );
}


my $dbh = $RT::Handle->dbh;
$dbh->{'RaiseError'} = 1;
$dbh->{'PrintError'} = 1;

if ( $DB{'type'} eq 'mysql' ) {
    my $dbh = $RT::Handle->dbh;

    check_sphinx();
    my $table = prompt(
        message => 'Enter name of a DB table that will be used to connect to the sphinx server',
        default => $DEFAULT{'table'},
        silent  => !$OPT{'ask'},
    );
    my $url = prompt(
        message => 'Enter URL of the sphinx search server, it should be sphinx://<server>:<port>/<index name>. Simple config for this sphinx instance will be generated for you.',
        default => 'sphinx://localhost:3312/rt',
        silent  => !$OPT{'ask'},
    );

    my $schema = <<END;
CREATE TABLE $table (
    id     INTEGER NOT NULL,
    weight INTEGER NOT NULL,
    $DEFAULT{'column'}  VARCHAR(3072) NOT NULL,
    INDEX($DEFAULT{'column'})
) ENGINE=SPHINX CONNECTION="$url"
END

    print_rt_config( Table => $table, Column => $DEFAULT{'column'} );
    insert_schema( $schema );

    require URI;
    my $urlo = URI->new( $url );
    my $host  = $urlo->host;
    my $port  = $urlo->port;
    my $index = $urlo->path;

    my $var_path = RT->var_path;

    my %sphinx_conf = ();
    $sphinx_conf{'host'} = RT->config->get('DatabaseHost');
    $sphinx_conf{'db'}   = RT->config->get('DatabaseName');
    $sphinx_conf{'user'} = RT->config->get('DatabaseUser');
    $sphinx_conf{'pass'} = RT->config->get('DatabasePassword');

    print "Here is simple sphinx config, you can use it to index text/plain attachments in your DB."
        ." This config is not ideal. You should read Sphinx docs to get better ideas.";
    print <<END

source rt {
    type            = mysql

    sql_host        = $sphinx_conf{'host'}
    sql_db          = $sphinx_conf{'db'}
    sql_user        = $sphinx_conf{'user'}
    sql_pass        = $sphinx_conf{'pass'}

    sql_query       = \
        SELECT a.id, a.content FROM Attachments a
        JOIN Transactions txn ON a.transaction_id = txn.id AND txn.object_type = 'RT::Model::Ticket' \
        JOIN Tickets t ON txn.object_id = t.id \
        WHERE a.content_type = 'text/plain' AND t.Status != 'deleted'

    sql_query_info  = SELECT * FROM Attachments WHERE id=\$id
}

index \$index {
    source                  = rt
    path                    = $var_path/sphinx/index
    docinfo                 = extern
    charset_type            = utf-8
}

indexer {
    mem_limit               = 32M
}

searchd {
    port                    = $port
    log                     = $var_path/sphinx/searchd.log
    query_log               = $var_path/sphinx/query.log
    read_timeout            = 5
    max_children            = 30
    pid_file                = $var_path/sphinx/searchd.pid
    max_matches             = 1000
    seamless_rotate         = 1
    preopen_indexes         = 0
    unlink_old              = 1
}

END

}
elsif ( $DB{'type'} eq 'Pg' ) {
    my $dbh = $RT::Handle->dbh;

    my $table = prompt(
        message =>
            'Enter name of a DB table that will be used to store Pg tsvector.'
            . ' Attachments table can be used.'
        ,
        default => $DEFAULT{'table'},
        silent  => !$OPT{'ask'},
    );
    my $column = prompt(
        message => 'Enter name of a column that will be used to store Pg tsvector',
        default => $DEFAULT{'column'},
        silent  => !$OPT{'ask'},
    );

    my $schema;
    if ( lc($table) eq 'attachments' ) {
        $schema = "ALTER TABLE $table ADD COLUMN $column tsvector";
    } else {
        $schema = "CREATE TABLE $table ( "
            ."id INTEGER NOT NULL,"
            ."$column tsvector )";
    }

    print_rt_config( Table => $table, Column => $column );

    insert_schema_as_dba( $schema );

    my $index_type;
    do {
        $index_type = lc prompt(
            message =>
                'You have choice between GiST or GIN index,'
                .' the first is times slower to search, but'
                .' it takes less place and faster to update.'
            ,
            default => 'GiST',
            silent  => !$OPT{'ask'},
        );
    } while ( $index_type ne 'gist' && $index_type ne 'gin' );
    insert_schema_as_dba("CREATE INDEX ${column}_idx ON $table USING $index_type($column)");
}
elsif ( $DB{'type'} eq 'Oracle' ) {
    {
        my $dbah = dba_handle();
        do_print_error( $dbah => 'GRANT CTXAPP TO '. $DB{'user'} );
        do_print_error( $dbah => 'GRANT EXECUTE ON CTXSYS.CTX_DDL TO '. $DB{'user'} );
    }

    my %PREFERENCES = (
        datastore => {
            type => 'DIRECT_DATASTORE',
        },
        filter => {
            type => 'AUTO_FILTER',
#        attributes => {
#            timeout => 120, # seconds
#            timeout_type => 'HEURISTIC', # or 'FIXED'
#        },
        },
        lexer => {
            type => 'WORLD_LEXER',
        },
        word_list => {
            type => 'BASIC_WORDLIST',
            attributes => {
                stemmer => 'AUTO',
                fuzzy_match => 'AUTO',
#            fuzzy_score => undef,
#            fuzzy_numresults => undef,
#            substring_index => undef,
#            prefix_index => undef,
#            prefix_length_min => undef,
#            prefix_length_max => undef,
#            wlidcard_maxterms => undef,
            },
        },
        'section_group' => {
            type => 'NULL_SECTION_GROUP',
        },

        storage => {
            type => 'BASIC_STORAGE',
            attributes => {
                R_TABLE_CLAUSE => 'lob (data) store as (cache)',
                I_INDEX_CLAUSE => 'compress 2',
            },
        },
    );

    my @params = ();
    push @params, ora_create_datastore( %{ $PREFERENCES{'datastore'} } );
    push @params, ora_create_filter( %{ $PREFERENCES{'filter'} } );
    push @params, ora_create_lexer( %{ $PREFERENCES{'lexer'} } );
    push @params, ora_create_word_list( %{ $PREFERENCES{'word_list'} } );
    push @params, ora_create_stop_list();
    push @params, ora_create_section_group( %{ $PREFERENCES{'section_group'} } );
    push @params, ora_create_storage( %{ $PREFERENCES{'storage'} } );

    my $index_params = join "\n", @params;
    my $index_name = $DEFAULT{prefix} .'index';
    do_error_is_ok( $dbh => "DROP INDEX $index_name" );
    $dbh->do(
        "CREATE INDEX $index_name ON Attachments(Content)
        indextype is ctxsys.context parameters('
            $index_params
        ')",
    );

    print_rt_config( IndexName => $index_name );
}
else {
    die "FTS on $DB{type} is not yet supported"; 
}

sub check_sphinx {
    my $dbh = $RT::Handle->dbh;
    my $sphinx = ($dbh->selectrow_array("show variables like 'have_sphinx'"))[1];
    unless ( lc $sphinx eq 'yes' ) {
        print STDERR "Mysql server you have compiled without sphinx storage engine (sphinxse).\n";
        print STDERR "Either use system packages with sphinxse, binaries from Sphinx site
                      or compile mysql according to instructions in Sphinx's docs.\n";
        exit 1;
    }
}

sub ora_create_datastore {
    return sprintf 'datastore %s', ora_create_preference(
        @_,
        name => 'datastore',
    );
}

sub ora_create_filter {
    my $res = '';
    $res .= sprintf "format column %s\n", ora_create_format_column();
    $res .= sprintf 'filter %s', ora_create_preference(
        @_,
        name => 'filter',
    );
    return $res;
}

sub ora_create_lexer {
    return sprintf 'lexer %s', ora_create_preference(
        @_,
        name => 'lexer',
    );
}

sub ora_create_word_list {
    return sprintf 'wordlist %s', ora_create_preference(
        @_,
        name => 'word_list',
    );
}

sub ora_create_stop_list {
    my $file = shift || 'etc/stopwords/en.txt';
    return '' unless -e $file;

    my $name = $DEFAULT{'prefix'} .'stop_list';
    do_error_is_ok( $dbh => 'begin ctx_ddl.drop_stoplist(?); end;', $name );
    
    $dbh->do(
        'begin ctx_ddl.create_stoplist(?, ?);  end;',
        undef, $name, 'BASIC_STOPLIST'
    );

    open( my $fh, '<:utf8', $file )
        or die "couldn't open file '$file': $!";
    while ( my $word = <$fh> ) {
        chomp $word;
        $dbh->do(
            'begin ctx_ddl.add_stopword(?, ?); end;',
            undef, $name, $word
        );
    }
    close $fh;
    return sprintf 'stoplist %s', $name;
}

sub ora_create_section_group {
    my %args = @_;
    my $name = $DEFAULT{'prefix'} .'section_group';
    do_error_is_ok( $dbh => 'begin ctx_ddl.drop_section_group(?); end;', $name );
    $dbh->do(
        'begin ctx_ddl.create_section_group(?, ?);  end;',
        undef, $name, $args{'type'}
    );
    return sprintf 'section group %s', $name;
}

sub ora_create_storage {
    return sprintf 'storage %s', ora_create_preference(
        @_,
        name => 'storage',
    );
}

sub ora_create_format_column {
    my $column_name = 'ContentOracleFormat';
    unless (
        $dbh->column_info(
            undef, undef, uc('Attachments'), uc( $column_name )
        )->fetchrow_array
    ) {
        $dbh->do(qq{
            ALTER TABLE Attachments ADD $column_name VARCHAR2(10)
        });
    }

    my $detect_format = qq{
        CREATE OR REPLACE FUNCTION $DEFAULT{prefix}detect_format_simple(
            parent IN NUMBER,
            type IN VARCHAR2,
            encoding IN VARCHAR2,
            fname IN VARCHAR2
        )
        RETURN VARCHAR2
        AS
            format VARCHAR2(10);
        BEGIN
            format := CASE
    };
    if ( $RT::DontSearchFileAttachments ) {
        $detect_format .= qq{
                WHEN fname IS NOT NULL THEN 'ignore'
        };
    }
    my $binary = $RT::DontSearchBinaryAttachments? 'ignore' : 'binary';
    $detect_format .= qq{
                WHEN type = 'text' THEN 'text'
                WHEN type = 'text/rtf' THEN '$binary'
                WHEN type LIKE 'text/%' THEN 'text'
                WHEN type LIKE 'message/%' THEN 'text'
                WHEN type LIKE 'multipart/%' THEN 'ignore'
                WHEN type LIKE 'image/%' THEN 'ignore'
                WHEN type LIKE 'audio/%' THEN 'ignore'
                WHEN type LIKE 'video/%' THEN 'ignore'
                WHEN type LIKE '%signature%' THEN 'ignore'
                WHEN type LIKE '%pkcs7%' THEN 'ignore'
                WHEN type LIKE '%compress%' THEN 'ignore'
                WHEN type LIKE '%zip%' THEN 'ignore'
                WHEN type LIKE '%tar%' THEN 'ignore'
                WHEN type LIKE '%/octet-stream' THEN 'ignore'
                ELSE '$binary'
            END;
            RETURN format;
        END;
    };
    ora_create_procedure( $detect_format );

    $dbh->do(qq{
        UPDATE Attachments
        SET $column_name = $DEFAULT{prefix}detect_format_simple(
            Parent,
            ContentType, ContentEncoding,
            Filename
        )
        WHERE $column_name IS NULL
    });
    $dbh->do(qq{
        CREATE OR REPLACE TRIGGER $DEFAULT{prefix}set_format
        BEFORE INSERT
        ON Attachments
        FOR EACH ROW
        BEGIN
            :new.$column_name := $DEFAULT{prefix}detect_format_simple(
                :new.Parent,
                :new.ContentType, :new.ContentEncoding,
                :new.Filename
            );
        END;
    });
    return $column_name;
}

sub ora_create_preference {
    my %info = @_;
    my $name = $DEFAULT{'prefix'} . $info{'name'};
    do_error_is_ok( $dbh => 'begin ctx_ddl.drop_preference(?); end;', $name );
    $dbh->do(
        'begin ctx_ddl.create_preference(?, ?);  end;',
        undef, $name, $info{'type'}
    );
    return $name unless $info{'attributes'};

    while ( my ($attr, $value) = each %{ $info{'attributes'} } ) {
        $dbh->do(
            'begin ctx_ddl.set_attribute(?, ?, ?);  end;',
            undef, $name, $attr, $value
        );
    }

    return $name;
}

sub ora_create_procedure {
    my $text = shift;

    my $status = $dbh->do($text, { RaiseError => 0 });

    # Statement succeeded
    return if $status;

    if ( 6550 != $dbh->err ) {
        # Utter failure
        die $dbh->errstr;
    }
    else {
        my $msg = $dbh->func( 'plsql_errstr' );
        die $dbh->errstr if !defined $msg;
        die $msg if $msg;
    }
}

sub dba_handle {
    if ( $DB{'type'} eq 'Oracle' ) {
        $ENV{'NLS_LANG'} = "AMERICAN_AMERICA.AL32UTF8";
        $ENV{'NLS_NCHAR'} = "AL32UTF8";
    }
    my $dsn = do { my $h = new RT::Handle; $h->BuildDSN; $h->DSN };
    my $dbh = DBI->connect(
        $dsn, $DB{admin}, $DB{admin_password},
        { RaiseError => 1, PrintError => 1 },
    );
    unless ( $dbh ) {
        die "Failed to connect to $dsn as user '$DB{admin}': ". $DBI::errstr;
    }
    return $dbh;
}

sub do_error_is_ok {
    my $dbh = shift;
    local $dbh->{'RaiseError'} = 0;
    local $dbh->{'PrintError'} = 0;
    return $dbh->do(shift, undef, @_);
}

sub do_print_error {
    my $dbh = shift;
    local $dbh->{'RaiseError'} = 0;
    local $dbh->{'PrintError'} = 1;
    return $dbh->do(shift, undef, @_);
}

sub prompt {
    my %args = ( @_ );
    return $args{'default'} if $args{'silent'};

    local $| = 1;
    print $args{'message'};
    if ( $args{'default'} ) {
        print "\n[". $args{'default'} .']: ';
    } else {
        print ":\n";
    }

    my $res = <STDIN>;
    chomp $res;
    return $args{'default'} if !$res && $args{'default'};
    return $res;
}

sub verbose  { print @_, "\n" if $OPT{verbose} || $OPT{verbose}; 1 }
sub debug    { print @_, "\n" if $OPT{debug}; 1 }
sub error    { $RT::Logger->error( @_ ); verbose(@_); 1 }
sub warning  { $RT::Logger->warning( @_ ); verbose(@_); 1 }

sub show_help {
    my $error = shift;
    RT::Interface::CLI->ShowHelp(
        ExitValue => $error,
        Sections => $error
            ? 'NAME|'. uc($DB{'type'}) .'/USAGE'
            : 'NAME|DESCRIPTION|'. uc($DB{'type'})
        ,
    );
}

sub print_rt_config {
    my %args = @_;
    my $config = <<END;

Configure your RT via site config:
Set( %FullTextSearch,
    Enable  => 1,
    Indexed => 1,
END

    $config .= "    $_   => '$args{$_}',\n"
        foreach grep defined $args{$_}, keys %args;
    $config .= ");\n";

    print $config;
}

sub insert_schema {
    return _insert_schema( $dbh, "Going to do the following change in the DB:", @_);
}

sub insert_schema_as_dba {
    return _insert_schema( dba_handle(), "Going to do the following change in the DB:", @_);
}

sub _insert_schema {
    my $dbh = shift;
    my $message = shift;
    my $schema = shift;
    print "$message\n";
    print $schema;
    return if $OPT{'dryrun'};

    my $res = $dbh->do( $schema );
    unless ( $res ) {
        die "Couldn't run DDL query: ". $dbh->errstr;
    }
}

=head1 NAME

rt-setup-fulltext-index - Helps create indexes for full text search

=head1 DESCRIPTION

Full text search is very database specific. This script setups indexing
for Oracle, Pg and mysql. Below you can find specifics for your current
DB.

At the end script prints piece that you should put into RT config to enable
created index. Don't forget to restart the web-server after changing config.

=head1 ORACLE

=head2 USAGE

    rt-setup-fulltext-index --help
    rt-setup-fulltext-index --dba sysdba --dba-password 'secret'

=head2 DESCRIPTION

Creates Oracle CONTEXT index on the Content column in the Attachments table.
As well, it creates several preferences, functions and triggers to support
this index.

CONTEXT index needs periodic synchronization after any updates. Either use
F<sbin/rt-fulltext-indexer> or read its documentation for alternatives.

=head1 PG

=head2 USAGE

    rt-setup-fulltext-index --help
    rt-setup-fulltext-index --dba postgres --dba-password 'secrete'

=head2 DESCRIPTION

Creates an additional column to store a ts_vector and then creates either
GiN or GiST index on it. Use F<sbin/rt-fulltext-indexer> to keep index in
sync.

=head1 MYSQL

=head2 USAGE

    rt-setup-fulltext-index --help

=head2 DESCRIPTION

Full text search in mysql implemented through Sphinx and MySQL storage
engine (SphinxSE), so you need mysql compiled with it. Use
F<sbin/rt-fulltext-indexer> to keep index in sync.

=head1 AUTHOR

Ruslan Zakirov E<lt>ruz@bestpractical.comE<gt>

=cut

