# HG changeset patch # User Radek Brich # Date 1312994094 -7200 # Node ID 4ab077c93b2d144dacd3b34577146a6bfbe17b73 # Parent 57cf8fdff5ed7fd6f6f0e79d3ff62077f08fc539 Add table diff tool. diff -r 57cf8fdff5ed -r 4ab077c93b2d pgtoolkit.conf.example --- a/pgtoolkit.conf.example Wed Jul 20 10:54:07 2011 +0200 +++ b/pgtoolkit.conf.example Wed Aug 10 18:34:54 2011 +0200 @@ -6,5 +6,5 @@ ### meta database # connection string (dsn) to meta db meta_db = 'host=10.8.0.1 dbname=central' -# query in meta db, input is database name (will be placed instead of %s), output is values for create_conn +# query from meta db, input is database name (will be placed instead of %s), output is values for create_conn meta_query = '''SELECT host, port, dbname, user, password FROM config.databases WHERE name = %s LIMIT 1''' diff -r 57cf8fdff5ed -r 4ab077c93b2d pgtools/pgdatadiff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pgtools/pgdatadiff.py Wed Aug 10 18:34:54 2011 +0200 @@ -0,0 +1,209 @@ +# -*- coding: utf-8 -*- +# +# PgDataDiff - compare tables, print data differencies +# +# Copyright (c) 2011 Radek Brich +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + + +from psycopg2.extensions import adapt + +from common.highlight import * +from collections import OrderedDict + + +class DiffData: + COLORS = { + '+' : BOLD | GREEN, + '-' : BOLD | RED, + '*' : BOLD | YELLOW, + 'V' : BOLD | WHITE} + + def __init__(self, table, change, src_cols, dst_cols, id=None): + self.table = table + self.change = change + self.src_cols = src_cols + self.dst_cols = dst_cols + self.id = id + + def format(self): + out = [] + + out.append(highlight(1, self.COLORS[self.change])) + out.extend([self.change, ' ']) + + out.extend(self._format_changes()) + + out.append(highlight(0)) + + return ''.join(out) + + def format_patch(self): + method = { + '+' : self._format_insert, + '-' : self._format_delete, + '*' : self._format_update} + + return method[self.change]() + + def _format_changes(self): + if self.src_cols and not self.dst_cols: + return [', '.join([self._format_value_del(*x) for x in self.src_cols.items()])] + if not self.src_cols and self.dst_cols: + return [', '.join([self._format_value_add(*x) for x in self.dst_cols.items()])] + + items = [] + for i in range(len(self.src_cols)): + items.append(( + list(self.src_cols.keys())[i], + list(self.src_cols.values())[i], + list(self.dst_cols.values())[i])) + + return [', '.join([self._format_value_change(*x) for x in items])] + + def _format_value_del(self, k, v): + fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}') + return fs.format(k, adapt(v).getquoted().decode()) + + def _format_value_add(self, k, v): + fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + + highlight(1, self.COLORS['V']) + '{}' + highlight(0)) + return fs.format(k, adapt(v).getquoted().decode()) + + def _format_value_change(self, k, v1, v2): + fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + + '{} ▶ ' + + highlight(1, self.COLORS['V']) + '{}' + highlight(0)) + return fs.format(k, + adapt(v1).getquoted().decode(), + adapt(v2).getquoted().decode()) + + def _format_insert(self): + out = ['INSERT INTO ', self.table, ' ('] + out.append(', '.join(self.dst_cols.keys())) + out.append(') VALUES (') + out.append(', '.join([adapt(v).getquoted().decode() for v in self.dst_cols.values()])) + out.append(');') + return ''.join(out) + + def _format_delete(self): + out = ['DELETE FROM ', self.table] + out.extend(self._format_where()) + return ''.join(out) + + def _format_update(self): + out = ['UPDATE ', self.table, ' SET '] + out.append(', '.join([self._format_set(*x) for x in self.dst_cols.items()])) + out.extend(self._format_where()) + return ''.join(out) + + def _format_set(self, k, v): + return '{} = {}'.format(k, adapt(v).getquoted().decode()) + + def _format_where(self): + out = [' WHERE '] + out.extend([self.id[0], ' = ']) + out.append(adapt(self.id[1]).getquoted().decode()) + out.append(';') + return out + +class PgDataDiff: + def __init__(self, table=None, src_rows=None, dst_rows=None, col_names=None): + self.allowcolor = False + self.table = table + self.src_rows = src_rows + self.dst_rows = dst_rows + self.col_names = col_names + + def iter_diff(self): + '''Return differencies between data of two tables. + + Yields one line at the time. + + ''' + while True: + try: + diff = self._compare_row(self.src_rows, self.dst_rows) + except IndexError: + break + + if diff: + yield diff + + def print_diff(self): + '''Print differencies between data of two tables. + + The output is in human readable form. + + Set allowcolor=True of PgDataDiff instance to get colored output. + + ''' + for ln in self.iter_diff(): + print(ln.format()) + + def print_patch(self): + '''Print SQL script usable as patch for destination table. + + Supports INSERT, DELETE and UPDATE operations. + + ''' + for ln in self.iter_diff(): + print(ln.format_patch()) + + def _compare_data(self, src, dst): + src_cols = OrderedDict() + dst_cols = OrderedDict() + for i in range(len(src)): + if src[i] != dst[i]: + src_cols[self.col_names[i]] = src[i] + dst_cols[self.col_names[i]] = dst[i] + if src_cols: + id = (self.col_names[0], src[0]) + return DiffData(self.table, '*', src_cols, dst_cols, id=id) + + return None + + def _compare_row(self, src_rows, dst_rows): + if len(src_rows) and not len(dst_rows): + src = src_rows.pop(0) + src_cols = OrderedDict(zip(self.col_names, src)) + return DiffData(self.table, '-', src_cols, None) + if not len(src_rows) and len(dst_rows): + dst = dst_rows.pop(0) + dst_cols = OrderedDict(zip(self.col_names, dst)) + return DiffData(self.table, '+', None, dst_cols) + + src = src_rows[0] + dst = dst_rows[0] + + if src[0] < dst[0]: + del src_rows[0] + src_cols = OrderedDict(zip(self.col_names, src)) + id = (self.col_names[0], src[0]) + return DiffData(self.table, '-', src_cols, None, id=id) + if src[0] > dst[0]: + del dst_rows[0] + dst_cols = OrderedDict(zip(self.col_names, dst)) + return DiffData(self.table, '+', None, dst_cols) + + del src_rows[0] + del dst_rows[0] + return self._compare_data(src, dst) + diff -r 57cf8fdff5ed -r 4ab077c93b2d pgtools/pgdiff.py --- a/pgtools/pgdiff.py Wed Jul 20 10:54:07 2011 +0200 +++ b/pgtools/pgdiff.py Wed Aug 10 18:34:54 2011 +0200 @@ -29,18 +29,18 @@ class DiffBase: + COLORS = { + '+' : BOLD | GREEN, + '-' : BOLD | RED, + '*' : BOLD | YELLOW} + def __init__(self): self.changes = None def format(self): out = [' ' * self.level] - if self.change == '+': - out.append(highlight(1, BOLD | GREEN)) - elif self.change == '-': - out.append(highlight(1, BOLD | RED)) - else: - out.append(highlight(1, BOLD | YELLOW)) + out.append(highlight(1, self.COLORS[self.change])) out.append(self.change) out += [' ', self.type, ' ', self.name, highlight(0)] diff -r 57cf8fdff5ed -r 4ab077c93b2d tablediff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tablediff.py Wed Aug 10 18:34:54 2011 +0200 @@ -0,0 +1,52 @@ +#!/usr/bin/env python3.2 +# +# Print differencies between data in two tables of same schema. +# +# Requirements: +# * First column of both tables must be numerical primary key. +# * Destination table must contain all columns from source table. +# Order is not important. +# + +from pgtools import pgmanager, pgbrowser, pgdatadiff +from toolbase import SrcDstTool + + +class TableDiffTool(SrcDstTool): + def __init__(self): + SrcDstTool.__init__(self, name='tablediff', desc='Table diff.') + + self.parser.add_argument('table', metavar='table', type=str, help='Table name') + + self.init() + + def main(self): + if '.' in self.args.table: + schema, table = self.args.table.split('.', 1) + else: + table = self.args.table + + srcbrowser = pgbrowser.PgBrowser(self.pgm.get_conn('src')) + + columns = srcbrowser.list_columns(schema=schema, table=table) + columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns]) + + table_fullname = '"' + schema + '"."'+ table + '"'; + query = 'SELECT ' + columns_sel + ' FROM ' + table_fullname + ' ORDER BY 1;' + + with self.pgm.cursor('src') as curs: + curs.execute(query) + src_rows = curs.fetchall() + + with self.pgm.cursor('dst') as curs: + curs.execute(query) + dst_rows = curs.fetchall() + + pgdd = pgdatadiff.PgDataDiff(table_fullname, + src_rows, dst_rows, [x['name'] for x in columns]) + #pgdd.print_diff() + pgdd.print_patch() + + +tool = TableDiffTool() +tool.main()