pgtoolkit/pgdatadiff.py
author Radek Brich <radek.brich@devl.cz>
Fri, 11 May 2012 14:16:36 +0200
changeset 35 e7f79c4a27ce
parent 31 c2e6e24b83d9
child 41 6aad5e35efe8
permissions -rw-r--r--
TableCopyTool: order tables with respect to references.

# -*- coding: utf-8 -*-
#
# PgDataDiff - compare tables, print data differencies
#
# Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


from collections import OrderedDict

from pgtoolkit import pgbrowser
from pgtoolkit.highlight import *


class DiffData:
    COLORS = {
        '+' : BOLD | GREEN,
        '-' : BOLD | RED,
        '*' : BOLD | YELLOW,
        'V' : BOLD | WHITE,
        'K' : BOLD | BLUE}
    
    def __init__(self, change, cols1, cols2, key=None):
        '''
        
        change - one of '+', '-', '*' (add, remove, update)
        cols1 - original column values (OrderedDict)
        cols2 - new column values (OrderedDict)
        key - primary key columns (OrderedDict)
        
        '''
        self.change = change
        self.cols1 = cols1
        self.cols2 = cols2
        self.key = key
    
    def format(self):
        out = []
                
        out.append(highlight(1, self.COLORS[self.change]))
        out.extend([self.change, ' '])
        
        out.extend(self._format_changes())
        
        out.append(highlight(0))
        
        return ''.join(out)

    def format_patch(self, table):
        method = {
            '+' : self._format_insert,
            '-' : self._format_delete,
            '*' : self._format_update}
        
        return method[self.change](table)

    def _format_changes(self):        
        if self.cols1 and not self.cols2:
            return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])]
        if not self.cols1 and self.cols2:
            return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])]
        
        out = []        
        if self.key:
            for colname in self.key:
                out.extend([highlight(1, self.COLORS['*']), colname, ': ', highlight(0), self.key[colname], ', '])

        items = []
        for i in range(len(self.cols1)):
            items.append((
                list(self.cols1.keys())[i],
                list(self.cols1.values())[i],
                list(self.cols2.values())[i]))
        out.extend([', '.join([self._format_value_change(*x) for x in items])])
        
        return out

    def _format_value_del(self, k, v):
        fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
        return fs.format(k, v)

    def _format_value_add(self, k, v):
        fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + 
            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
        return fs.format(k, v)

    def _format_value_change(self, k, v1, v2):
        fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + 
            '{} ▶ ' +
            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
        return fs.format(k, v1, v2)

    def _format_insert(self, table):
        out = ['INSERT INTO ', table, ' (']
        out.append(', '.join(self.cols2.keys()))
        out.append(') VALUES (')
        out.append(', '.join(self.cols2.values()))
        out.append(');')
        return ''.join(out)
    
    def _format_delete(self, table):
        out = ['DELETE FROM ', table]
        out.extend(self._format_where()) 
        return ''.join(out)
    
    def _format_update(self, table):
        out = ['UPDATE ', table, ' SET ']
        out.append(', '.join([self._format_set(*x) for x in self.cols2.items()]))
        out.extend(self._format_where())
        return ''.join(out)

    def _format_set(self, k, v):
        return '{} = {}'.format(k, v)

    def _format_where(self):
        out = [' WHERE ']
        for colname in self.key:
            out.extend([colname, ' = ', self.key[colname], ' AND '])
        out[-1] = ';'
        return out

class PgDataDiff:
    def __init__(self, conn1, conn2):
        self.allowcolor = False
        self.conn1 = conn1
        self.conn2 = conn2
        self.fulltable1 = None
        self.fulltable2 = None
    
    def settable1(self, table, schema='public'):
        self.schema1 = schema
        self.table1 = table
        self.fulltable1 = '"' + schema + '"."'+ table + '"'
        
    def settable2(self, table, schema='public'):
        self.schema2 = schema
        self.table2 = table
        self.fulltable2 = '"' + schema + '"."'+ table + '"'
    
    def iter_diff(self):
        '''Return differencies between data of two tables.
        
        Yields one line at the time.
        
        '''
        curs1, curs2 = self._select()
        
        row1 = curs1.fetchone_adapted()
        row2 = curs2.fetchone_adapted()
        
        while True:
            if row1 is None and row2 is None:
                break
            diff = self._compare_row(row1, row2)
            
            if diff:
                yield diff
            
                if diff.change == '-':
                    row1 = curs1.fetchone_adapted()
                    continue
                if diff.change == '+':
                    row2 = curs2.fetchone_adapted()
                    continue
            # change == '*' or not diff
            row1 = curs1.fetchone_adapted()
            row2 = curs2.fetchone_adapted()
    
    def print_diff(self):
        '''Print differencies between data of two tables.
        
        The output is in human readable form.
        
        Set allowcolor=True of PgDataDiff instance to get colored output.
        
        '''
        for ln in self.iter_diff():
            print(ln.format())
    
    def print_patch(self):
        '''Print SQL script usable as patch for destination table.
        
        Supports INSERT, DELETE and UPDATE operations.
        
        '''
        for ln in self.iter_diff():
            print(ln.format_patch(self.fulltable1))

    def _select(self):
        browser = pgbrowser.PgBrowser(self.conn1)
        
        columns = browser.list_columns(schema=self.schema1, table=self.table1, order=1)
        if not columns:
            raise Exception('Table %s.%s not found.' % (self.schema1, self.table1))
        columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns])
        self.colnames = [x['name'] for x in columns]
        
        pkey = [ind for ind in browser.list_indexes(schema=self.schema1, table=self.table1) if ind['primary']]
        if not pkey:
            raise Exception('Table %s.%s has no primary key.' % (self.schema1, self.table1))
        pkey = pkey[0]
        pkey_sel = ', '.join(['"' + x + '"' for x in pkey['columns']])
        self.pkeycolnames = pkey['columns']
        
        query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY ' + pkey_sel
        query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY ' + pkey_sel
        
        curs1 = self.conn1.cursor()
        curs2 = self.conn2.cursor()
        
        curs1.execute(query1)
        curs2.execute(query2)
        
        return curs1, curs2

    def _compare_data(self, row1, row2):
        cols1 = OrderedDict()
        cols2 = OrderedDict()
        for name in row1:
            if row1[name] != row2[name]:
                cols1[name] = row1[name]
                cols2[name] = row2[name]
        if cols1:
            key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
            return DiffData('*', cols1, cols2, key=key)
        
        return None
    
    def _compare_row(self, row1, row2):
        if row2 is None:
            key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
            return DiffData('-', row1, None, key=key)
        if row1 is None:
            return DiffData('+', None, row2)
        
        
        for keyname in self.pkeycolnames:
            if row1[keyname] < row2[keyname]:
                key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
                return DiffData('-', row1, None, key=key)
        for keyname in self.pkeycolnames:
            if row1[keyname] > row2[keyname]:
                return DiffData('+', None, row2)
        
        return self._compare_data(row1, row2)