#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
#
# gcap
# Copyright (C) 2010-2011  Toni Gundogdu <legatvs@cpan.org>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#

use 5.010001;
use feature 'say';

use warnings;
use strict;

binmode STDOUT, ":utf8";
binmode STDERR, ":utf8";

use version 0.77 (); our $VERSION = version->declare("0.1.1");

use Getopt::ArgvFile(home => 1, startupFilename => [qw(.gcaprc)]);
use Getopt::Long qw(:config bundling);
use Carp qw(croak);

exit main();

sub treat_argv
{

  # Convert args (of length of 11) to Youtube URLs. Do this
  # before calling Getopt::* as some IDs may start with '-'
  # which confuses the Getopt::*.

  my @argv;
  foreach my $arg (@ARGV)
  {
    if (length($arg) == 11)
    {
      push @argv, "http://youtube.com/v/$arg";
    }
    else
    {
      push @argv, $arg;
    }
  }
  @ARGV = @argv;
}

sub print_version
{
  eval "require Umph::Prompt";
  my $p = $@ ? "" : ", Umph::Prompt version $Umph::Prompt::VERSION";
  say "gcap version $VERSION$p
Copyright (C) 2010-2011  Toni Gundogdu
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.";
  exit 0;
}

sub print_help
{
  require Pod::Usage;
  Pod::Usage::pod2usage(-exitstatus => 0, -verbose => 1);
}

my %config;

sub check_umph_prompt
{
  if ($config{'interactive'} and not eval 'require Umph::Prompt')
  {
    say STDERR
      qq/WARNING Umph::Prompt not found, ignoring --interactive option/;
    $config{interactive} = 0;
  }
}

sub init
{
  treat_argv();

  GetOptions(
             \%config,
             'interactive|i',
             'title|t',
             'regexp|r=s',
             'proxy=s',
             'no_proxy|no-proxy',
             'quiet|q',
             'version' => \&print_version,
             'help'    => \&print_help,
            ) or exit 1;

  print_help if scalar @ARGV == 0;

  $config{regexp} ||= "/(\\w|\\s)/g";

  apply_regexp($config{regexp});    # Check regexp syntax
  check_umph_prompt;
}

sub spew_qe {print STDERR @_ unless $config{quiet}}

my @items;
my $title;

sub main
{
  init;

  my $req_body = "http://video.google.com/timedtext?hl=en&type=list&v=";
  my $url      = $ARGV[0];

  my $q = qr{(?:embed|v)[=/]((?>[-_\w]{11}))};

  if ($url =~ /^https?:/i)
  {
    if ($url =~ /$q/)
    {
      $url = "$req_body$1";
    }
    else
    {
      croak qq/error: "$url" looks nothing like a youtube page url\n/;
    }
  }
  else
  {
    $url = "$req_body$url";
  }

  spew_qe "Checking ...";

  require LWP;
  my $a = new LWP::UserAgent;
  $a->env_proxy;    # http://search.cpan.org/perldoc?LWP::UserAgent
  $a->proxy('http', $config{proxy}) if $config{proxy};
  $a->no_proxy('') if $config{no_proxy};

  require XML::DOM;
  my $p = new XML::DOM::Parser(LWP_UserAgent => $a);
  my $d = $p->parsefile($url);
  my $r = $d->getDocumentElement;
  my $n = 0;

  for my $e ($r->getElementsByTagName("track"))
  {
    my %tmp = (
       name => $e->getAttributeNode("name")->getValue || "",
       lang_code   => $e->getAttributeNode("lang_code")->getValue,
       lang_transl => $e->getAttributeNode("lang_translated")->getValue,
       selected    => 1,
    );
    $tmp{title} = $tmp{lang_transl}; # So that Umph::Prompt works
    push @items, \%tmp;
    spew_qe((++$n % 5 == 0) ? " " : ".");
  }
  $d->dispose;

  spew_qe "done.\n";

  my $v = $1
    if $url =~ /$q/
      or croak "error: $url: no match: video id\n";

  get_title($v, $a) if $config{title};
  open_prompt() if $config{interactive};

  my $t = 0;

  foreach (@items)
  {
    ++$t if $_->{selected};
  }
  croak "error: no input: no captions found\n" unless $t;

  require HTML::Entities;

  $n = 0;

  foreach (@items)
  {
    next unless $_->{selected};

    $url =
        "http://video.google.com/timedtext?"
      . "hl=$_->{lang_code}"
      . "&lang=$_->{lang_code}"
      . "&name=$_->{name}" . "&v=$v";

    my $fname = sprintf "%s_%s.srt", $v, $_->{lang_code};

    if ($title)
    {
      $title = apply_regexp($config{regexp}, $title);
      $fname = sprintf "%s_%s.srt", $title, $_->{lang_code};
    }

    open my $fh, ">", $fname or die "$fname: $!\n";
    binmode $fh, ":utf8";

    spew_qe sprintf "(%02d of %02d) ", ++$n, $t if $t > 0;
    spew_qe "Saving $fname ...";

    $d = $p->parsefile($url);
    $r = $d->getDocumentElement;

    my $i          = 1;
    my $last_start = 0;

    for my $e ($r->getElementsByTagName("text"))
    {

      my $tmp = $e->getFirstChild;
      next unless $tmp;

      my $text = trim($tmp->getNodeValue);
      next unless $text;
      $text = HTML::Entities::decode_entities($text);

      my $start = $e->getAttributeNode("start")->getValue;

      my $start_sec  = 0;
      my $start_msec = 0;

      if ($start =~ /(\d+)/)
      {
        $start_sec  = $1;
        $start_msec = $1
          if $start =~
            /\d+\.(\d+)/;    # should only capture 3 first digits
      }

      my @start = gmtime($start_sec);

      $tmp = $e->getAttributeNode("dur");
      my $dur = $tmp ? $tmp->getValue : $start - $last_start;

      my $end_sec = $start + $dur;

      $dur =~ /\d+\.(\d+)/;    # should only capture 3 first digits
      my $end_msec = $1 || 0;

      my @end = gmtime($end_sec);

      printf $fh
        "%d\r\n%02d:%02d:%02d,%03d --> %02d:%02d:%02d,%03d\r\n%s\r\n\r\n",
        $i++, @start[2, 1, 0], $start_msec, @end[2, 1, 0],
        $end_msec, $text;

      $last_start = $start;
    }
    $d->dispose;
    close $fh;
    spew_qe "done.\n";
  }
  0;
}

sub get_title
{
  my ($v, $a) = @_;

  my $page_url = "http://youtube.com/watch?v=$v";
  my $url      = "http://www.youtube.com/get_video_info?&video_id=$v"
    . "&el=detailpage&ps=default&eurl=&gl=US&hl=en";

  spew_qe ":: Getting video title ...";

  my $r = $a->get($url);

  unless ($r->is_success)
  {
    printf STDERR "\nerror: $page_url: %s\n", $r->status_line;
    return;
  }

  require CGI;
  my $q = CGI->new($r->content);

  if ($q->param('reason'))
  {
    printf STDERR "\nerror: %s: %s (errorcode: %d)\n",
      $page_url, trim($q->param("reason")),
      $q->param("errorcode");
  }
  else
  {
    require Encode;
    $title = trim(Encode::decode_utf8($q->param('title')));
    spew_qe "done.\n";
  }
  $title;
}

sub apply_regexp
{
  my ($re, $s) = @_;
  my ($pat, $flags);

  if ($re =~ /^\/(.*)\/(.*)$/)
  {
    $pat   = $1;
    $flags = $2;
  }
  else
  {
    croak
      qq{error: --regexp: "$re" looks nothing like `/pattern/flags`\n};
  }
  return unless $s;

  my $q = $flags =~ /i/ ? qr/$pat/i : qr/$pat/;
  join '', $flags =~ /g/ ? $s =~ /$q/g : $s =~ /$q/;
}

sub trim
{
  my $s = shift;
  $s =~ s{^[\s]+}//;
  $s =~ s{\s+$}//;
  $s =~ s{\s\s+}/ /g;
  $s;
}

sub open_prompt
{
  my $p = new Umph::Prompt(

    # Commands.
    commands => {
      q => sub {
        my ($p, $args) = @_;
        $p->exit(\@items, $args);
      },
      d => sub {
        my ($p, $args) = @_;
        $p->display(\@items, $args);
      },
      m => sub {
        my ($p, $args) = @_;
        $p->max_shown_items(@{$args});
      },
      s => sub {
        my ($p, $args) = @_;
        $p->select(\@items, $args);
      },
      h => sub {
        my ($p, $args) = @_;
        $p->help;
      },
    },

    # Callbacks. All of these are optional.
    ontoggle => sub {
      my ($p, $args) = @_;
      $p->toggle(\@items, $args);
    },
    onitems  => sub {return \@items},
    onloaded => sub {
      my ($p, $args) = @_;
      $p->display(\@items, $args);
    },

    # Other (required) settings
    total_items     => scalar @items,
    prompt_msg      => 'gcap',
    max_shown_items => 20
  );

  say STDERR qq/Enter prompt. Type "help" to get a list of commands./;
  $p->exec;
}

__END__

=head1 SYNOPSIS

gcap [-i] [-t] [-r E<lt>regexpE<gt>] [--proxy=E<lt>addrE<gt> | --no-proxy]
     [--help] E<lt>urlE<gt> | E<lt>video_idE<gt>

=head2 OPTIONS

     --help                       Print help and exit
     --version                    Print version and exit
 -q, --quiet                      Be quiet
 -i, --interactive                Run in interactive mode
 -t, --title                      Use video title in filename
 -r, --regexp arg (="/(\w|\s)/g") Cleanup title with regexp
     --proxy arg (=http_proxy)    Use proxy for http connections
     --no-proxy                   Disable use of HTTP proxy

=cut

# vim: set ts=2 sw=2 tw=72 expandtab:
