batchquery.py
author Radek Brich <brich.radek@ifortuna.cz>
Tue, 06 May 2014 18:37:41 +0200
changeset 100 d6088dba8fea
parent 93 b72591087495
child 101 2a2d0d5df03b
permissions -rwxr-xr-x
Add pgtool wrapper for all tools. Only this script will be installed into system bin.

#!/usr/bin/env python3

from pgtoolkit import toolbase


class BatchQueryTool(toolbase.SimpleTool):
    def __init__(self):
        toolbase.SimpleTool.__init__(self, name='batchquery', desc='Run a query using columns from CSV file as arguments.')
        self.parser.add_argument('--query', dest='query', type=str, help='Query to run. Use %%s for arguments, or %%(name)s for named arguments (see --header).')
        self.parser.add_argument('--file', dest='file', type=str, help='CSV file with data to use as arguments.')
        self.parser.add_argument('--init', dest='init', type=str, help='Query which initialize database session (eg. temporary function).')
        self.parser.add_argument('--output', dest='output', type=str, help='File name for results.')
        self.parser.add_argument('--outputfunc', dest='outputfunc', type=str, help='Python function which will format results (format_row(args, rows)).')
        self.parser.add_argument('--header', dest='header', action='store_true', help='First line of CSV is header with names for columns. These name can be used in query.')
        self.init()

    def _split_line(self, line):
        return [x.strip() for x in line.split(',')]

    def main(self):
        results = []
        # load query from file
        with open(self.args.query, 'r', encoding='utf8') as f:
            query = f.read()
        # connect DB
        with self.pgm.cursor('target') as curs:
            # run init query
            if self.args.init:
                with open(self.args.init, 'r', encoding='utf8') as f:
                    curs.execute(f.read(), [])
            # read CSV file
            with open(self.args.file, 'r', encoding='utf8') as f:
                # read header
                names = None
                if self.args.header:
                    line = f.readline()
                    names = self._split_line(line)
                # read and process lines
                for line in f:
                    args = self._split_line(line)
                    if names:
                        args = dict(zip(names, args))
                    curs.execute(query, args)
                    rows = curs.fetchall()
                    results.append((args, rows))
            curs.connection.commit()
        # write results to output file
        if self.args.output:
            format_row = None
            if self.args.outputfunc:
                with open(self.args.outputfunc, 'r', encoding='utf8') as f:
                    d = dict()
                    exec(f.read(), d)
                    format_row = d['format_row']

            with open(self.args.output, 'w', encoding='utf8') as f:
                for args, rows in results:
                    if format_row:
                        f.write(format_row(args, rows))
                    else:
                        f.write(repr(args))
                        f.write(' -> ')
                        f.write(repr(rows))
                    f.write('\n')


tool = BatchQueryTool()
tool.main()