#!/usr/bin/python2.4 -O
# Author: James Y Knight <foom@fuhm.net>
# License: GPLv3+.

import optparse, sys, locale, codecs

def parse_LIST(option, opt_str, value, parser, action_type, inter_str):
    prev = False
    for pair in value.split(','):
        if prev and inter_str:
            parser.values.actions.append(('TEXT', parser.values.delimiter or ' ', ''))
        if '-' in pair:
            start,end = pair.split('-', 1)
            if not start:
                start = 0
            else:
                if start[0] == 'e':
                    start = -int(start[1:])
                else:
                    start = int(start) - 1 # Correct for 1-based counting

            if not end:
                end = None
            else:
                if end[0] == 'e':
                    end = -int(end[1:]) + 1
                    if end == 0: end = None
                else:
                    end = int(end) # End pos is inclusive
        else:
            if pair[0] == 'e':
                start = -int(pair[1:])
                if start == -1:
                    end = None
                else:
                    end = start + 1
            else:
                start = int(pair) - 1
                end = start + 1

        val_range = (start, end)
        parser.values.actions.append((action_type, val_range, parser.values.delimiter))
        prev = True

def parse_TEXT(option, opt_str, value, parser):
    parser.values.actions.append(('TEXT', value, ''))

def parse_KEY(option, opt_str, value, parser):
    parser.values.actions.append(('KEY', value, parser.values.delimiter))

def generate_code(fun_name, actions, is_ascii):
    need_decode = False
    need_split = {}
    split_max = 0
    code = []
    
    for action_type,val,delim in actions:
        if action_type == 'TEXT':
            code.append("%r" % val)
        elif action_type == 'BYTES' or (action_type == 'CHARS' and is_ascii):
            code.append("line[%r:%r]" % val)
        elif action_type == 'CHARS':
            need_decode=True
            code.append("locale_encoder(decoded_line[%r:%r])[0]" % val)
        elif action_type == 'FIELDS':
            split_num = need_split.get(delim)
            if split_num is None:
                split_num = need_split[delim] = split_max
                split_max += 1
            
            if val[0] + 1 == val[1]:
                code.append("len(split_line_%r) > %r and split_line_%r[%r] or ''" % (split_num, val[0], split_num, val[0]))
            else:
                code.append("%r.join(split_line_%r[%r:%r])" % (delim or ' ', split_num, val[0], val[1]))
        elif action_type == 'KEY':
            split_num = need_split.get(delim)
            if split_num is None:
                split_num = need_split[delim] = split_max
                split_max += 1
            
            code.append("%r.join([x for x in split_line_%r if x.startswith(%r)])" % (delim or ' ', split_num, val))

    fun_code = "def %s(instream, write, locale_encoder, locale_decoder):\n" % fun_name
    fun_code += " for line in instream:\n"
    fun_code += "  line = line.rstrip('\\n')\n"
    if need_decode:
        fun_code += "  decoded_line = locale_decoder(line)[0]\n"

    for delim,num in need_split.items():
        fun_code += "  split_line_%r = line.split(%s)\n" % (num, delim is not None and repr(delim) or '')

    if not code:
        fun_code += "  pass\n"
    elif len(code) == 1:
        fun_code += "  write((%s)+'\\n')\n" % (code[0],)
    else:
        fun_code += "  output = %s\n" % (code[0],)
        for c in code[1:]:
            fun_code += "  output += %s\n" % (c,)
        fun_code += "  output += '\\n'\n"
        fun_code += "  write(output)\n"
#    print >>sys.stderr, fun_code
    return fun_code
    
def main():
    parser = optparse.OptionParser(usage="%prog [OPTION]... [FILE]...\n"
                                   "Print selected parts of lines from each FILE to standard output")
    parser.add_option("-b", "--bytes", metavar="LIST", action="callback", callback=parse_LIST, type="string", callback_args=("BYTES",0), help="Output these bytes")
    parser.add_option("-c", "--characters", metavar="LIST", action="callback", callback=parse_LIST, type="string", callback_args=("CHARS",0), help="Output these characters")
    parser.add_option("-d", "--delimiter", metavar="DELIM", help="use DELIM instead of whitespace for field delimiter (must appear before -f)")
    parser.add_option("-f", "--fields", metavar="LIST", action="callback", callback=parse_LIST, type="string", callback_args=("FIELDS",1), help="output only these fields")
    parser.add_option("-k", "--key", metavar="KEY", action="callback", callback=parse_KEY, type="string", help="output only fields starting with KEY")
    parser.add_option("-t", "--text", metavar="TEXT", action="callback", callback=parse_TEXT, type="string", help="output this text")

    parser.set_defaults(delimiter=None, actions=[])
    
    (options, args) = parser.parse_args()

    locale_encoding = locale.getpreferredencoding()

    if codecs.getdecoder(locale_encoding) == codecs.ascii_decode:
        locale_encoding = "ascii"
        
    encoding_funs = codecs.lookup(locale_encoding)
    locale_encoder = encoding_funs[0]
    locale_decoder = encoding_funs[1]
    
#    print >> sys.stderr, "ENCODING:", locale_encoding
    exec generate_code('process_stream', options.actions, locale_decoder == codecs.ascii_decode)

    if not args:
        args=("-",)
    for fname in args:
        if fname == '-':
            stream = sys.stdin
        else:
            stream = open(fname, 'r')

        process_stream(stream, sys.stdout.write, locale_encoder, locale_decoder)


if __name__ == '__main__':
    main()
