pgtoolkit/pgdatadiff.py
changeset 104 d8ff52a0390f
parent 103 24e94a3da209
child 105 10551741f61f
--- a/pgtoolkit/pgdatadiff.py	Mon May 26 18:18:21 2014 +0200
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,273 +0,0 @@
-# -*- coding: utf-8 -*-
-#
-# PgDataDiff - compare tables, print data differencies
-#
-# Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-
-
-from collections import OrderedDict
-
-from pgtoolkit import pgbrowser
-from pycolib.ansicolor import *
-
-import sys
-
-
-class DiffData:
-    COLORS = {
-        '+' : BOLD | GREEN,
-        '-' : BOLD | RED,
-        '*' : BOLD | YELLOW,
-        'V' : BOLD | WHITE,
-        'K' : BOLD | BLUE}
-
-    def __init__(self, change, cols1, cols2, key=None):
-        """
-
-        change - one of '+', '-', '*' (add, remove, update)
-        cols1 - original column values (OrderedDict)
-        cols2 - new column values (OrderedDict)
-        key - primary key columns (OrderedDict)
-
-        """
-        self.change = change
-        self.cols1 = cols1
-        self.cols2 = cols2
-        self.key = key
-
-    def format(self):
-        out = []
-
-        out.append(highlight(1, self.COLORS[self.change]))
-        out.extend([self.change, ' '])
-
-        out.extend(self._format_changes())
-
-        out.append(highlight(0))
-
-        return ''.join(out)
-
-    def format_patch(self, table):
-        method = {
-            '+' : self._format_insert,
-            '-' : self._format_delete,
-            '*' : self._format_update}
-
-        return method[self.change](table)
-
-    def _format_changes(self):
-        if self.cols1 and not self.cols2:
-            return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])]
-        if not self.cols1 and self.cols2:
-            return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])]
-
-        out = []
-        if self.key:
-            for colname in self.key:
-                out.extend([highlight(1, self.COLORS['*']), colname, ': ', highlight(0), self.key[colname], ', '])
-
-        items = []
-        for i in range(len(self.cols1)):
-            items.append((
-                list(self.cols1.keys())[i],
-                list(self.cols1.values())[i],
-                list(self.cols2.values())[i]))
-        out.extend([', '.join([self._format_value_change(*x) for x in items])])
-
-        return out
-
-    def _format_value_del(self, k, v):
-        fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
-        return fs.format(k, v)
-
-    def _format_value_add(self, k, v):
-        fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) +
-            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
-        return fs.format(k, v)
-
-    def _format_value_change(self, k, v1, v2):
-        fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) +
-            '{} ▶ ' +
-            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
-        return fs.format(k, v1, v2)
-
-    def _format_insert(self, table):
-        out = ['INSERT INTO ', table, ' (']
-        out.append(', '.join(self.cols2.keys()))
-        out.append(') VALUES (')
-        out.append(', '.join(self.cols2.values()))
-        out.append(');')
-        return ''.join(out)
-
-    def _format_delete(self, table):
-        out = ['DELETE FROM ', table]
-        out.extend(self._format_where())
-        return ''.join(out)
-
-    def _format_update(self, table):
-        out = ['UPDATE ', table, ' SET ']
-        out.append(', '.join([self._format_set(*x) for x in self.cols2.items()]))
-        out.extend(self._format_where())
-        return ''.join(out)
-
-    def _format_set(self, k, v):
-        return '{} = {}'.format(k, v)
-
-    def _format_where(self):
-        out = [' WHERE ']
-        for colname in self.key:
-            out.extend([colname, ' = ', self.key[colname], ' AND '])
-        out[-1] = ';'
-        return out
-
-class PgDataDiff:
-    def __init__(self, conn1, conn2):
-        self.allowcolor = False
-        self.conn1 = conn1
-        self.conn2 = conn2
-        self.fulltable1 = None
-        self.fulltable2 = None
-
-    def settable1(self, table, schema='public'):
-        self.schema1 = schema
-        self.table1 = table
-        self.fulltable1 = '"' + schema + '"."'+ table + '"'
-
-    def settable2(self, table, schema='public'):
-        self.schema2 = schema
-        self.table2 = table
-        self.fulltable2 = '"' + schema + '"."'+ table + '"'
-
-    def iter_diff(self):
-        """Return differencies between data of two tables.
-
-        Yields one line at the time.
-
-        """
-        curs1, curs2 = self._select()
-
-        row1 = curs1.fetchone_dict()
-        row2 = curs2.fetchone_dict()
-
-        while True:
-            if row1 is None and row2 is None:
-                break
-            diff = self._compare_row(row1, row2, curs1.adapt, curs2.adapt)
-
-            if diff:
-                yield diff
-
-                if diff.change == '-':
-                    row1 = curs1.fetchone_dict()
-                    continue
-                if diff.change == '+':
-                    row2 = curs2.fetchone_dict()
-                    continue
-            # change == '*' or not diff
-            row1 = curs1.fetchone_dict()
-            row2 = curs2.fetchone_dict()
-
-        curs1.close()
-        curs2.close()
-
-    def print_diff(self, file=sys.stdout):
-        """Print differencies between data of two tables.
-
-        The output is in human readable form.
-
-        Set allowcolor=True of PgDataDiff instance to get colored output.
-
-        """
-        for ln in self.iter_diff():
-            print(ln.format(), file=file)
-
-    def print_patch(self, file=sys.stdout):
-        """Print SQL script usable as patch for destination table.
-
-        Supports INSERT, DELETE and UPDATE operations.
-
-        """
-        for ln in self.iter_diff():
-            print(ln.format_patch(self.fulltable1), file=file)
-
-    def _select(self):
-        browser = pgbrowser.PgBrowser(self.conn1)
-
-        columns = browser.list_columns(schema=self.schema1, table=self.table1, order=1)
-        if not columns:
-            raise Exception('Table %s.%s not found.' % (self.schema1, self.table1))
-        columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns])
-        self.colnames = [x['name'] for x in columns]
-
-        pkey = [ind for ind in browser.list_indexes(schema=self.schema1, table=self.table1) if ind['primary']]
-        if not pkey:
-            raise Exception('Table %s.%s has no primary key.' % (self.schema1, self.table1))
-        pkey = pkey[0]
-        pkey_sel = ', '.join(['"' + x + '"' for x in pkey['columns']])
-        self.pkeycolnames = pkey['columns']
-
-        query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY ' + pkey_sel
-        query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY ' + pkey_sel
-
-        curs1 = self.conn1.cursor('curs1')
-        curs2 = self.conn2.cursor('curs2')
-
-        curs1.execute(query1)
-        curs2.execute(query2)
-
-        return curs1, curs2
-
-    def _compare_data(self, row1, row2):
-        cols1 = OrderedDict()
-        cols2 = OrderedDict()
-        for name in row1.keys():
-            if row1[name] != row2[name]:
-                cols1[name] = row1[name]
-                cols2[name] = row2[name]
-        if cols1:
-            key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
-            return DiffData('*', cols1, cols2, key=key)
-
-        return None
-
-    def _compare_row(self, row1, row2, adapt1, adapt2):
-        if row2 is None:
-            row1 = adapt1(row1)
-            key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
-            return DiffData('-', row1, None, key=key)
-        if row1 is None:
-            row2 = adapt2(row2)
-            return DiffData('+', None, row2)
-
-        for keyname in self.pkeycolnames:
-            if row1[keyname] < row2[keyname]:
-                row1 = adapt1(row1)
-                key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
-                return DiffData('-', row1, None, key=key)
-        for keyname in self.pkeycolnames:
-            if row1[keyname] > row2[keyname]:
-                row2 = adapt2(row2)
-                return DiffData('+', None, row2)
-
-        row1 = adapt1(row1)
-        row2 = adapt2(row2)
-        return self._compare_data(row1, row2)
-