--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pgtoolkit/pgdatadiff.py Tue Aug 16 16:03:46 2011 +0200
@@ -0,0 +1,238 @@
+# -*- coding: utf-8 -*-
+#
+# PgDataDiff - compare tables, print data differencies
+#
+# Copyright (c) 2011 Radek Brich <radek.brich@devl.cz>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+from collections import OrderedDict
+
+from pgtoolkit import pgbrowser
+from pgtoolkit.highlight import *
+
+
+class DiffData:
+ COLORS = {
+ '+' : BOLD | GREEN,
+ '-' : BOLD | RED,
+ '*' : BOLD | YELLOW,
+ 'V' : BOLD | WHITE}
+
+ def __init__(self, change, cols1, cols2, id=None):
+ self.change = change
+ self.cols1 = cols1
+ self.cols2 = cols2
+ self.id = id
+
+ def format(self):
+ out = []
+
+ out.append(highlight(1, self.COLORS[self.change]))
+ out.extend([self.change, ' '])
+
+ out.extend(self._format_changes())
+
+ out.append(highlight(0))
+
+ return ''.join(out)
+
+ def format_patch(self, table):
+ method = {
+ '+' : self._format_insert,
+ '-' : self._format_delete,
+ '*' : self._format_update}
+
+ return method[self.change](table)
+
+ def _format_changes(self):
+ if self.cols1 and not self.cols2:
+ return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])]
+ if not self.cols1 and self.cols2:
+ return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])]
+
+ items = []
+ for i in range(len(self.cols1)):
+ items.append((
+ list(self.cols1.keys())[i],
+ list(self.cols1.values())[i],
+ list(self.cols2.values())[i]))
+
+ return [', '.join([self._format_value_change(*x) for x in items])]
+
+ def _format_value_del(self, k, v):
+ fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
+ return fs.format(k, v)
+
+ def _format_value_add(self, k, v):
+ fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) +
+ highlight(1, self.COLORS['V']) + '{}' + highlight(0))
+ return fs.format(k, v)
+
+ def _format_value_change(self, k, v1, v2):
+ fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) +
+ '{} ▶ ' +
+ highlight(1, self.COLORS['V']) + '{}' + highlight(0))
+ return fs.format(k, v1, v2)
+
+ def _format_insert(self, table):
+ out = ['INSERT INTO ', table, ' (']
+ out.append(', '.join(self.cols2.keys()))
+ out.append(') VALUES (')
+ out.append(', '.join(self.cols2.values()))
+ out.append(');')
+ return ''.join(out)
+
+ def _format_delete(self, table):
+ out = ['DELETE FROM ', table]
+ out.extend(self._format_where())
+ return ''.join(out)
+
+ def _format_update(self, table):
+ out = ['UPDATE ', table, ' SET ']
+ out.append(', '.join([self._format_set(*x) for x in self.cols2.items()]))
+ out.extend(self._format_where())
+ return ''.join(out)
+
+ def _format_set(self, k, v):
+ return '{} = {}'.format(k, v)
+
+ def _format_where(self):
+ out = [' WHERE ']
+ out.extend([self.id[0], ' = '])
+ out.append(self.id[1])
+ out.append(';')
+ return out
+
+class PgDataDiff:
+ def __init__(self, conn1, conn2):
+ self.allowcolor = False
+ self.conn1 = conn1
+ self.conn2 = conn2
+ self.fulltable1 = None
+ self.fulltable2 = None
+
+ def settable1(self, table, schema='public'):
+ self.schema1 = schema
+ self.table1 = table
+ self.fulltable1 = '"' + schema + '"."'+ table + '"'
+
+ def settable2(self, table, schema='public'):
+ self.schema2 = schema
+ self.table2 = table
+ self.fulltable2 = '"' + schema + '"."'+ table + '"'
+
+ def iter_diff(self):
+ '''Return differencies between data of two tables.
+
+ Yields one line at the time.
+
+ '''
+ curs1, curs2 = self._select()
+
+ row1 = curs1.fetchone_adapted()
+ row2 = curs2.fetchone_adapted()
+
+ while True:
+ if row1 is None and row2 is None:
+ break
+ diff = self._compare_row(row1, row2)
+
+ if diff:
+ yield diff
+
+ if diff.change == '-':
+ row1 = curs1.fetchone_adapted()
+ continue
+ if diff.change == '+':
+ row2 = curs2.fetchone_adapted()
+ continue
+ # change == '*' or not diff
+ row1 = curs1.fetchone_adapted()
+ row2 = curs2.fetchone_adapted()
+
+ def print_diff(self):
+ '''Print differencies between data of two tables.
+
+ The output is in human readable form.
+
+ Set allowcolor=True of PgDataDiff instance to get colored output.
+
+ '''
+ for ln in self.iter_diff():
+ print(ln.format())
+
+ def print_patch(self):
+ '''Print SQL script usable as patch for destination table.
+
+ Supports INSERT, DELETE and UPDATE operations.
+
+ '''
+ for ln in self.iter_diff():
+ print(ln.format_patch(self.fulltable2))
+
+ def _select(self):
+ browser = pgbrowser.PgBrowser(self.conn1)
+ columns = browser.list_columns(schema=self.schema1, table=self.table1)
+ columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns])
+ self.colnames = [x['name'] for x in columns]
+
+ query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY 1;'
+ query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY 1;'
+
+ curs1 = self.conn1.cursor()
+ curs2 = self.conn2.cursor()
+
+ curs1.execute(query1)
+ curs2.execute(query2)
+
+ return curs1, curs2
+
+ def _compare_data(self, row1, row2):
+ cols1 = OrderedDict()
+ cols2 = OrderedDict()
+ for i in range(len(row1)):
+ if row1[i] != row2[i]:
+ cols1[self.colnames[i]] = row1[i]
+ cols2[self.colnames[i]] = row2[i]
+ if cols1:
+ id = (self.colnames[0], row1[0])
+ return DiffData('*', cols1, cols2, id=id)
+
+ return None
+
+ def _compare_row(self, row1, row2):
+ if row2 is None:
+ cols1 = OrderedDict(zip(self.colnames, row1))
+ return DiffData('-', cols1, None)
+ if row1 is None:
+ cols2 = OrderedDict(zip(self.colnames, row2))
+ return DiffData('+', None, cols2)
+
+ if row1[0] < row2[0]:
+ cols1 = OrderedDict(zip(self.colnames, row1))
+ id = (self.colnames[0], row1[0])
+ return DiffData('-', cols1, None, id=id)
+ if row1[0] > row2[0]:
+ cols2 = OrderedDict(zip(self.colnames, row2))
+ return DiffData('+', None, cols2)
+
+ return self._compare_data(row1, row2)
+