# HG changeset patch # User Radek Brich # Date 1397507292 -7200 # Node ID a4af93e72e2b66caebf8d68384de39f40088774f # Parent acf63df539b76a99ec6713bfb7ea02011b01611f Add batchcopy tool. diff -r acf63df539b7 -r a4af93e72e2b batchcopy.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/batchcopy.py Mon Apr 14 22:28:12 2014 +0200 @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +from pgtoolkit import toolbase + + +class BatchCopyTool(toolbase.SrcDstTablesTool): + def __init__(self): + toolbase.SrcDstTablesTool.__init__(self, name='batchcopy', desc='Copy data from one table to another.') + + self.parser.add_argument('--table-name', type=str, help='Table to be copied.') + self.parser.add_argument('--src-filter', type=str, help='WHERE condition for source query.') + self.parser.add_argument('--file-with-ids', type=str, help='Read source IDs from file (each ID on new line). Use these in --src-filter as {ids}') + #TODO: duplicates=rollback|ignore|update + + self.init() + + def main(self): + # read list of IDs from file + ids = '' + if self.args.file_with_ids: + with open(self.args.file_with_ids, 'r') as f: + ids = ','.join(ln.rstrip() for ln in f.readlines()) + + # read source data + with self.pgm.cursor('src') as src_curs: + condition = self.args.src_filter.format(ids=ids) or 'true' + src_curs.execute('SELECT * FROM {} WHERE {}'.format(self.args.table_name, condition)) + #TODO: ORDER BY id OFFSET 0 LIMIT 100 + data = src_curs.fetchall_dict() + src_curs.connection.commit() + + with self.pgm.cursor('dst') as dst_curs: + for row in data: + keys = ', '.join(row.keys()) + values_mask = ', '.join(['%s'] * len(row)) + query = 'INSERT INTO {} ({}) VALUES ({})'.format(self.args.table_name, keys, values_mask) + dst_curs.execute(query, list(row.values())) + dst_curs.connection.commit() + + self.log.info('Copied {} rows.' % len(rows)) + + +tool = BatchCopyTool() +tool.main()