batchcopy.py
author Radek Brich <brich.radek@ifortuna.cz>
Mon, 14 Apr 2014 22:28:12 +0200
changeset 97 a4af93e72e2b
child 98 024299702087
permissions -rwxr-xr-x
Add batchcopy tool.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
97
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     1
#!/usr/bin/env python3
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     2
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     3
from pgtoolkit import toolbase
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     4
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     5
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     6
class BatchCopyTool(toolbase.SrcDstTablesTool):
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     7
    def __init__(self):
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     8
        toolbase.SrcDstTablesTool.__init__(self, name='batchcopy', desc='Copy data from one table to another.')
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
     9
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    10
        self.parser.add_argument('--table-name', type=str, help='Table to be copied.')
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    11
        self.parser.add_argument('--src-filter', type=str, help='WHERE condition for source query.')
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    12
        self.parser.add_argument('--file-with-ids', type=str, help='Read source IDs from file (each ID on new line). Use these in --src-filter as {ids}')
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    13
        #TODO: duplicates=rollback|ignore|update
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    14
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    15
        self.init()
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    16
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    17
    def main(self):
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    18
        # read list of IDs from file
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    19
        ids = '<no IDs read>'
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    20
        if self.args.file_with_ids:
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    21
            with open(self.args.file_with_ids, 'r') as f:
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    22
                ids = ','.join(ln.rstrip() for ln in f.readlines())
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    23
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    24
        # read source data
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    25
        with self.pgm.cursor('src') as src_curs:
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    26
            condition = self.args.src_filter.format(ids=ids) or 'true'
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    27
            src_curs.execute('SELECT * FROM {} WHERE {}'.format(self.args.table_name, condition))
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    28
            #TODO:  ORDER BY id OFFSET 0 LIMIT 100
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    29
            data = src_curs.fetchall_dict()
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    30
            src_curs.connection.commit()
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    31
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    32
        with self.pgm.cursor('dst') as dst_curs:
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    33
            for row in data:
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    34
                keys = ', '.join(row.keys())
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    35
                values_mask = ', '.join(['%s'] * len(row))
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    36
                query = 'INSERT INTO {} ({}) VALUES ({})'.format(self.args.table_name, keys, values_mask)
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    37
                dst_curs.execute(query, list(row.values()))
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    38
            dst_curs.connection.commit()
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    39
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    40
        self.log.info('Copied {} rows.' % len(rows))
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    41
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    42
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    43
tool = BatchCopyTool()
a4af93e72e2b Add batchcopy tool.
Radek Brich <brich.radek@ifortuna.cz>
parents:
diff changeset
    44
tool.main()