#! /usr/bin/python2

# Copyright (c) 2012 Olivier Esser ( Firstname.Lastname@gmail.com )

# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


import sys,os,re,urllib,argparse

file_domain_names="tlds-alpha-by-domain.txt"

try:
    domain_names=open(file_domain_names).readlines()
except (IOError,OSError):
    try:
        domain_names=open(os.path.join(os.path.dirname(os.path.realpath(sys.argv[0])),file_domain_names)).readlines()
    except (OSError,IOError):
        print>>sys.stderr, "Cannot open %s" % file_domain_names
        print>>sys.stderr, "You can download it from: http://www.icann.org/en/resources/registries/tlds"
        sys.exit(1)

parser=argparse.ArgumentParser(description="""
Convert an urlfilter list originally destinated to Opera into a privoxy list.
You have to save the output in /etc/privoxy/fanboy.action and enable it /etc/privoxy/config
(after the default.action but before the user.action).
Note that the paths mentioned may vary according to your installation.
This script has only been tested with the fanboy adblock list for Opera (available at: http://www.fanboy.co.nz/adblock/opera/).
You need tlds-alpha-by-domain.txt (to be placed in the same directory as this script)
for this script to run; download it from: http://www.icann.org/en/resources/registries/tlds
""")
parser.add_argument('infile', help='The file to convert (can be an URL)')
parser.add_argument('-o', '--outfile', default=sys.stdout, type=argparse.FileType('w'), help="Output file (default standard output)")
parser.add_argument('-s', '--sectionname', default="opera-export", help="Name of the section to be used in the privoxy file (optional)")
args=parser.parse_args()

domain_names=["."+s.strip().lower() for s in domain_names if (not s.startswith("#")) and s.strip()]

def split_domain_path(pattern):
    """Split a pattern into a domain and path. This uses heuristic, it can't be garanteed 100% accurate"""
    # pattern=pattern.strip()
    result=[]
    for protocol in ["http://", "https://", "ftp://"]:
        if pattern.startswith(protocol):
            pattern=pattern.strip(protocol)
    split_pattern=pattern.split("/",1)
    if len(split_pattern)==1: split_pattern.append("")
    for d in domain_names:
        if split_pattern[0].endswith(d):
            return tuple(split_pattern)
    if not(pattern.startswith("*")):
        if "*" in split_pattern[0]:
            split_pattern=pattern.split("*",1)
            split_pattern[0]+='*'
            split_pattern[1]='*'+split_pattern[1]
            return tuple(split_pattern)
    return ("",pattern)

def translate_pattern_domain_path(domain,path):
    """Given a domain and a path, using the opera urlfilter syntax, return a pattern for privoxy"""
    path_result=""
    if path.endswith("*") or path=="":
        final_character=""
        path=path.rstrip("*")
    else:
        final_character=r"$"
    for char in path:
        if char==r"*":
            path_result+=r".*"
        else:
            path_result+=re.escape(char)
    return domain+"/"+path_result+final_character

def translate_pattern(pattern):
    return translate_pattern_domain_path(*split_domain_path(pattern))

try:
    opera_list=urllib.urlopen(args.infile).readlines()
except (IOError,OSError):
    print>>sys.stderr, "Cannot open %s " % args.infile
    sys.exit(1)

iter_pattern=iter(opera_list)

for pattern in iter_pattern:
    pattern=pattern.strip()
    if pattern.startswith("[exclude]"):
        break

print>>args.outfile, """{+block{%s}}""" % args.sectionname

for pattern in iter_pattern:
    pattern=pattern.strip()
    if pattern.startswith(";") or pattern=="": continue
    print>>args.outfile, translate_pattern(pattern)