# guberquote.py
#
# written by Karl Ramm 12/7/2003
# this code is in the public domain
#
# Turn free-form text into basic html.
#
# The basic scheme is to operate on list of (boolean, string) pairs.
# True means the string might still need sanitizing, false means that
# it's ready to be passed on

def urlquote(s):
    'Quote a url and turn it into a link'
    import urllib
    t = urllib.quote(s, ':/%')
    return '<a href="%s">%s</a>' % (t, t)

def maybefilt(p, f):
    'take a pair, if the car is true, call f on it, else return it in a list'
    if p[0]:
        return f(p[1])
    else:
        return [p]

# I can write incomprehesible code in any language
def brfilt(s):
    'put <br>s at linebreaks, return list of pairs'
    l = [[(True, i)] for i in s.split('\n')]
    return reduce(lambda x, y: x + [(False, '<br>\n')] + y, l)

def pfilt(s):
    'turn blocks of text seperated by double newlines into paragraphs'
    l = [[(False, '<p>'), (True, i.strip()), (False, '</p>\n')] for i in s.split('\n\n') if i]
    return collapse(l)

def refilt(s, regex, proc=lambda x: [(False, x)]):
    'run re.split on x, running proc on the odd numbered results'
    l = re.split(regex, s)
    x = False
    m = []
    for i in l:
        if x:
            m += proc(i)
        elif i:
            m += [(True, i)]
        x = not x
    return m

def entfilt(s):
    'use refilt to split out &entities; into pairs'
    return refilt(s, r'(&#[0-9]+;|&#x[0-9a-fA-F]+;|&[a-zA-Z]+;)')

def brekurlfilt(s):
    'use refilt to find urls enclosed in <>()[]{}'
    return refilt(s, '''(?x)(
    (?<=<)(?:(?:http|ftp):|www\.|web\.)\S*?(?=>)|
    (?<=\()(?:(?:http|ftp):|www\.|web\.)\S*?(?=\))|
    (?<=\[)(?:(?:http|ftp):|www\.|web\.)\S*?(?=\])|
    (?<=\{)(?:(?:http|ftp):|www\.|web\.)\S*?(?=\})
    )''', lambda x: [(False, urlquote(x))])
            
def urlfiltproc(s):
    'proc used by urlfilt to strip a trailing punctuation mark'
    if re.match('[.,!;:"\']$', s):
        return [(False, urlquote(s[:-1])), (True, s[-1])]
    else:
        return [(False, urlquote(s))]

def urlfilt(s):
    'split out urls in text'
    return refilt(s, '((?:(?:http|ftp):|www\.|web\.)\S*)', urlfiltproc)

def escaperest(s):
    'run cgi.escape on s and return a pair'
    from cgi import escape
    return [(False, escape(s))]

def collapse(l):
    'turn [[a,b],[c,d],[e,f]] into [a,b,c,d,e,f]'
    return reduce(lambda x, y: x + y, l)

def tighten_lambda(x, y):
    'function used internally to tighten'
    if x[-1][0] == y[0]:
        z = x[:-1]
        return z + [(y[0], x[-1][1] + y[1])]
    else:
        return x + [y]
    
def tighten(l):
    'collapse adjacent pairs of the same quotedness'
    return reduce(tighten_lambda, [[l[0]]] + l[1:])

def guberquote(s):
    'run a string through pfilt, brfilt, brekurlfilt, urlfilt, entfilt, and escaperest'
    l = [(True, s.strip())]
    for i in pfilt, brfilt, brekurlfilt, urlfilt, entfilt, escaperest:
        l = tighten(collapse([maybefilt(p, i) for p in l]))
    if len(l) != 1:
        raise 'quoting failed to return entirely processed string'
    return l[0][1]
                             
