#!/usr/bin/env ruby
# fastri-server: serve RI documentation over DRb 
# Copyright (C) 2006  Mauricio Fernandez <mfp@acm.org>

require 'fastri/version'
require 'fastri/ri_index'
require 'fastri/ri_service'
require 'fastri/util'
require 'fastri/full_text_indexer'
require 'enumerator'

FASTRI_SERVER_VERSION = "0.0.1"

def make_index(index_file)
  # The local environment is trusted --- what we don't trust is what would come
  # from the DRb connection. This way the RiService will be untainted, and we
  # will be able to use it with $SAFE = 1.
  ObjectSpace.each_object{|obj| obj.untaint unless obj.frozen? }

  paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
    p && File.directory?(p)
  end
  FastRI::Util.gem_directories_unique.each do |name, version, path|
    paths << path
    puts "Indexing RI docs for #{name} version #{version || "unknown"}."
  end

  puts "Building index."
  t0 = Time.new
  #ri_reader = RI::RiReader.new(RI::RiCache.new(paths))
  ri_reader = FastRI::RiIndex.new_from_paths(paths)
  open(index_file, "wb"){|io| Marshal.dump ri_reader, io}
  puts <<EOF
Indexed:
* #{ri_reader.num_methods} methods
* #{ri_reader.num_namespaces} classes/modules
Needed #{Time.new - t0} seconds
EOF
  ri_reader
end

def linearize(comment)
  case s = comment["body"]
  when String; s
  else 
    if Array === (y = comment["contents"])
      y.map{|z| linearize(z)}.join("\n")
    elsif s = comment["text"]
      s
    else
      nil
    end
  end
end

def make_full_text_index(dir)
  paths = [ RI::Paths::SYSDIR, RI::Paths::SITEDIR, RI::Paths::HOMEDIR ].find_all do |p|
    p && File.directory?(p)
  end
  FastRI::Util.gem_directories_unique.each do |name, version, path|
    paths << path
    puts "Indexing RI docs for #{name} version #{version || "unknown"}."
  end
  unless File.exist?(dir)
    Dir.mkdir(dir)
  end
  indexer = FastRI::FullTextIndexer.new(40)
  bad = 0
  paths.each do |path|
    Dir["#{path}/**/*.yaml"].each do |yamlfile|
      yaml = File.read(yamlfile)
      begin
        data = YAML.load(yaml.gsub(/ \!.*/, ''))
      rescue Exception
        bad += 1
        #puts "Couldn't load #{yamlfile}"
        next
      end

      desc = (data['comment']||[]).map{|x| linearize(x)}.join("\n")
      desc.gsub!(/<\/?(em|b|tt|ul|ol|table)>/, "")
      desc.gsub!(/&quot;/, "'")
      desc.gsub!(/&lt;/, "<")
      desc.gsub!(/&gt;/, ">")
      desc.gsub!(/&amp;/, "&")
      unless desc.empty?
        indexer.add_document(yamlfile, desc) 
      end
    end
  end

  File.open(File.join(dir, "full_text.dat"), "wb") do |fulltextIO|
    File.open(File.join(dir, "suffixes.dat"), "wb") do |suffixesIO|
      indexer.build_index(fulltextIO, suffixesIO)
    end
  end
end

#{{{ Main program

require 'optparse'

home = FastRI::Util.find_home
options = {:allowed_hosts => ["127.0.0.1"], :addr => "127.0.0.1",
  :index_file => File.join(home, ".fastri-index"),
  :do_full_text => false, 
  :full_text_dir => File.join(home, ".fastri-fulltext"),
}
OptionParser.new do |opts|
  opts.version = FastRI::FASTRI_VERSION
  opts.release = FastRI::FASTRI_RELEASE_DATE
  opts.banner = "Usage: fastri-server.rb [options]"

  opts.on("-a", "--allow HOST", "Allow connections from HOST.",
          "(default: 127.0.0.1)") do |host|
    options[:allowed_hosts] << host
  end

  opts.on("-s", "--bind ADDR", "Listen to connections on ADDR.",
          "(default: 127.0.0.1)") do |addr|
    options[:addr] = addr
  end

  opts.on("--index-file=FILE", "Use index file.",
          "(default: #{options[:index_file]})") do |file|
    options[:index_file] = file
  end

  opts.on("-b", "--rebuild-index", "Only rebuild index.") do 
    make_index(options[:index_file])
    exit 0
  end

  opts.on("-F", "--full-text-dir DIR", "Place full-text index in DIR",
          "(default: #{options[:full_text_dir]})") do |dir|
    options[:full_text_dir] = dir if dir
    options[:do_full_text]  = true
  end

  opts.on("-B", "--rebuild-full-text", "Rebuild full-text index.") do
    make_full_text_index(options[:full_text_dir])
    exit 0
  end

  opts.on("-h", "--help", "Show this help message") do 
    puts opts
    exit
  end
end.parse!

if File.exist?(options[:index_file])
  ri_reader = open(options[:index_file], "rb"){|io| Marshal.load io } rescue nil
end

ri_reader ||= make_index(options[:index_file])

service = FastRI::RiService.new(ri_reader)
GC.start

require 'rinda/ring'
require 'rinda/tuplespace'
require 'drb/acl'

class FastRI::RiService
  include DRbUndumped
end

#{{{ start DRb service
acl_opt = ["deny", "all"]
options[:allowed_hosts].each{|host| acl_opt.concat ["allow", host.strip]}
acl = ACL.new(acl_opt)
DRb.install_acl(acl)

ip   = options[:addr][/^[^:]+/]    || "127.0.0.1"
port = options[:addr][/:(\d+)/, 1] || 0
drb_addr = "druby://#{ip}:#{port}"
DRb.start_service(drb_addr)

$SAFE = 1

$stdout.sync = true
begin
  puts "Looking for Ring server..."
  finder = Rinda::RingFinger.new
  service_ts = finder.lookup_ring_any(2)
  puts "Located Ring server at #{service_ts.__drburi}"
rescue Exception
  puts "No Ring server found, starting my own."
  service_ts = Rinda::TupleSpace.new
  ring_server = Rinda::RingServer.new(service_ts)
end

begin
  service_ts.write([:name, :FastRI, service, 'FastRI documentation'], Rinda::SimpleRenewer.new)
rescue Exception
  puts <<EOF
The FastRI service could not be registered in the Ring.
This is probably due to the Ring being bound to an unreachable address.
You can specify which address the Ring should be bound to with
  fastri-server --bind ADDRESS --allow ADDRESS
EOF
  exit
end

# {{{ Init message
puts "fastri-server #{FASTRI_SERVER_VERSION} (FastRI #{FastRI::FASTRI_VERSION}) listening on #{DRb.uri}"
puts "ACL:"
acl_opt.each_slice(2){|action, host| puts "%-5s %s" % [action, host]}

# {{{ GC periodically
# keeps the process hot too :)
Thread.new do 
  loop do
    GC.start
    sleep 300
  end
end

if $DEBUG
  # trying to see where our memory is going
  population = Hash.new{|h,k| h[k] = [0,0]}
  array_sizes = Hash.new{|h,k| h[k] = 0}
  ObjectSpace.each_object do |object|
    size = case object   # rough estimates
           when Array
             array_sizes[object.size / 10] += 1
             case object.size
             when 0..16
               20 + 64
             else
               20 + 4 * object.size * 1.5
             end
           when Hash;    40 + 4 * [object.size / 5, 11].max + 16 * object.size
           when String;  30 + object.size
           else 120 # the iv_tbl, etc
           end
    count, tsize = population[object.class] 
    population[object.class] = [count + 1, tsize + size]
  end

  population.sort_by{|k,(c,s)| s}.reverse[0..10].each do |klass, (count, bytes)|
    puts "%-20s  %7d  %9d" % [klass, count, bytes]
  end

  puts "Array sizes:"
  array_sizes.sort.each{|k,v| puts "%5d  %6d" % [k * 10, v]}
end

DRb.thread.join

# vi: set sw=2 expandtab:
