#!/usr/bin/python
#
# Read adblock plus patterns on standard input, write adzapper patterns
# to standard output.
#       - Cameron Simpson <cs@zip.com.au> 12feb2011
#

import sys

def abp2zap(abp):
  ''' Receive adblock plus pattern file line, return zapper class and pattern.

      Adblock Plus rule doco here:
        http://adblockplus.org/en/filters

      Adzapper syntax doco here:
        http://adzapper.sf.net/#syntax
  '''
  ptn = abp.strip()

  # skip comments, ini file header, empty lines
  if len(ptn) == 0 or ptn.startswith('!') or ptn.startswith('['):
    return (None, None)

  # collect ptn$options,...
  optsep = ptn.find('$')
  if optsep < 0:
    options = ()
  else:
    options = [ o.strip() for o in ptn[optsep+1:].split(',') if len(o) ]
    ptn = ptn[:optsep]

  # this isn't a direct match for the adblock script option
  # in adblock 'script' means only apply to URLs fetched as external
  # scripts, whereas all the zapper can do is hope this pattern will
  # only match external scripts
  if 'script' in options:
    adclass = 'ADJS'
  else:
    adclass = 'AD'

  # replace '^' by regexp for "separator chars"
  ptn = ptn.replace('^', '[-a-zA-Z0-9.%]')

  # exception rules
  if ptn.startswith('@@'):
    ptn = ptn[2:]
    radclass = 'PASS'
    if ( ptn.startswith('http://')
      or ptn.startswith('|http://')
      or ptn.startswith('https://')
      or ptn.startswith('|https://')
       ):
      if ptn.startswith('|'):
        ptn = ptn[1:]
      ptn = ptn + '**'
      return ptn

  # domain component
  if ptn.startswith('||'):
    ptn = 'http://(*.|)' + ptn[2:]
  # start of URL
  elif ptn.startswith('|'):
    ptn = ptn[1:]
  # anywhere in URL
  else:
    ptn = 'http://**' + ptn

  # end of URL
  if ptn.endswith('|'):
    ptn = ptn[:-1]
  # anywhere in URL
  else:
    ptn = ptn + '**'

  return adclass, ptn

def main(argv, input=None, output=None):
  if input is None:
    input = sys.stdin
  if output is None:
    output = sys.stdout
  cmd = argv[0]

  ok = True
  lineno = 0
  for abp in input:
    lineno += 1
    assert abp.endswith('\n'), "%s, %d: unexpected EOF" % (input,lineno)
    adclass, ptn = abp2zap(abp.strip())
    if adclass is None:
      output.write('# ')
      output.write(abp)
    else:
      output.write(adclass)
      output.write(' ')
      output.write(ptn)
      output.write('\n')

  return 0 if ok else 1

if __name__ == '__main__':
  sys.exit(main(sys.argv))
