diff -r 4ab077c93b2d -r 685b20d2d3ab tools/pgdatadiff.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/pgdatadiff.py Fri Aug 12 14:39:49 2011 +0200 @@ -0,0 +1,238 @@ +# -*- coding: utf-8 -*- +# +# PgDataDiff - compare tables, print data differencies +# +# Copyright (c) 2011 Radek Brich +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + + +from collections import OrderedDict + +from common.highlight import * +from tools import pgbrowser + + +class DiffData: + COLORS = { + '+' : BOLD | GREEN, + '-' : BOLD | RED, + '*' : BOLD | YELLOW, + 'V' : BOLD | WHITE} + + def __init__(self, change, cols1, cols2, id=None): + self.change = change + self.cols1 = cols1 + self.cols2 = cols2 + self.id = id + + def format(self): + out = [] + + out.append(highlight(1, self.COLORS[self.change])) + out.extend([self.change, ' ']) + + out.extend(self._format_changes()) + + out.append(highlight(0)) + + return ''.join(out) + + def format_patch(self, table): + method = { + '+' : self._format_insert, + '-' : self._format_delete, + '*' : self._format_update} + + return method[self.change](table) + + def _format_changes(self): + if self.cols1 and not self.cols2: + return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])] + if not self.cols1 and self.cols2: + return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])] + + items = [] + for i in range(len(self.cols1)): + items.append(( + list(self.cols1.keys())[i], + list(self.cols1.values())[i], + list(self.cols2.values())[i])) + + return [', '.join([self._format_value_change(*x) for x in items])] + + def _format_value_del(self, k, v): + fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}') + return fs.format(k, v) + + def _format_value_add(self, k, v): + fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + + highlight(1, self.COLORS['V']) + '{}' + highlight(0)) + return fs.format(k, v) + + def _format_value_change(self, k, v1, v2): + fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + + '{} ▶ ' + + highlight(1, self.COLORS['V']) + '{}' + highlight(0)) + return fs.format(k, v1, v2) + + def _format_insert(self, table): + out = ['INSERT INTO ', table, ' ('] + out.append(', '.join(self.cols2.keys())) + out.append(') VALUES (') + out.append(', '.join(self.cols2.values())) + out.append(');') + return ''.join(out) + + def _format_delete(self, table): + out = ['DELETE FROM ', table] + out.extend(self._format_where()) + return ''.join(out) + + def _format_update(self, table): + out = ['UPDATE ', table, ' SET '] + out.append(', '.join([self._format_set(*x) for x in self.cols2.items()])) + out.extend(self._format_where()) + return ''.join(out) + + def _format_set(self, k, v): + return '{} = {}'.format(k, v) + + def _format_where(self): + out = [' WHERE '] + out.extend([self.id[0], ' = ']) + out.append(self.id[1]) + out.append(';') + return out + +class PgDataDiff: + def __init__(self, conn1, conn2): + self.allowcolor = False + self.conn1 = conn1 + self.conn2 = conn2 + self.fulltable1 = None + self.fulltable2 = None + + def settable1(self, table, schema='public'): + self.schema1 = schema + self.table1 = table + self.fulltable1 = '"' + schema + '"."'+ table + '"' + + def settable2(self, table, schema='public'): + self.schema2 = schema + self.table2 = table + self.fulltable2 = '"' + schema + '"."'+ table + '"' + + def iter_diff(self): + '''Return differencies between data of two tables. + + Yields one line at the time. + + ''' + curs1, curs2 = self._select() + + row1 = curs1.fetchone_adapted() + row2 = curs2.fetchone_adapted() + + while True: + if row1 is None and row2 is None: + break + diff = self._compare_row(row1, row2) + + if diff: + yield diff + + if diff.change == '-': + row1 = curs1.fetchone_adapted() + continue + if diff.change == '+': + row2 = curs2.fetchone_adapted() + continue + # change == '*' or not diff + row1 = curs1.fetchone_adapted() + row2 = curs2.fetchone_adapted() + + def print_diff(self): + '''Print differencies between data of two tables. + + The output is in human readable form. + + Set allowcolor=True of PgDataDiff instance to get colored output. + + ''' + for ln in self.iter_diff(): + print(ln.format()) + + def print_patch(self): + '''Print SQL script usable as patch for destination table. + + Supports INSERT, DELETE and UPDATE operations. + + ''' + for ln in self.iter_diff(): + print(ln.format_patch(self.fulltable2)) + + def _select(self): + browser = pgbrowser.PgBrowser(self.conn1) + columns = browser.list_columns(schema=self.schema1, table=self.table1) + columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns]) + self.colnames = [x['name'] for x in columns] + + query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY 1;' + query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY 1;' + + curs1 = self.conn1.cursor() + curs2 = self.conn2.cursor() + + curs1.execute(query1) + curs2.execute(query2) + + return curs1, curs2 + + def _compare_data(self, row1, row2): + cols1 = OrderedDict() + cols2 = OrderedDict() + for i in range(len(row1)): + if row1[i] != row2[i]: + cols1[self.colnames[i]] = row1[i] + cols2[self.colnames[i]] = row2[i] + if cols1: + id = (self.colnames[0], row1[0]) + return DiffData('*', cols1, cols2, id=id) + + return None + + def _compare_row(self, row1, row2): + if row2 is None: + cols1 = OrderedDict(zip(self.colnames, row1)) + return DiffData('-', cols1, None) + if row1 is None: + cols2 = OrderedDict(zip(self.colnames, row2)) + return DiffData('+', None, cols2) + + if row1[0] < row2[0]: + cols1 = OrderedDict(zip(self.colnames, row1)) + id = (self.colnames[0], row1[0]) + return DiffData('-', cols1, None, id=id) + if row1[0] > row2[0]: + cols2 = OrderedDict(zip(self.colnames, row2)) + return DiffData('+', None, cols2) + + return self._compare_data(row1, row2) +