pgtools/pgdatadiff.py
author Radek Brich <radek.brich@devl.cz>
Wed, 10 Aug 2011 18:34:54 +0200
changeset 6 4ab077c93b2d
permissions -rw-r--r--
Add table diff tool.

# -*- coding: utf-8 -*-
#
# PgDataDiff - compare tables, print data differencies
#
# Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


from psycopg2.extensions import adapt

from common.highlight import *
from collections import OrderedDict


class DiffData:
    COLORS = {
        '+' : BOLD | GREEN,
        '-' : BOLD | RED,
        '*' : BOLD | YELLOW,
        'V' : BOLD | WHITE}
    
    def __init__(self, table, change, src_cols, dst_cols, id=None):
        self.table = table
        self.change = change
        self.src_cols = src_cols
        self.dst_cols = dst_cols
        self.id = id
    
    def format(self):
        out = []
        
        out.append(highlight(1, self.COLORS[self.change]))
        out.extend([self.change, ' '])
        
        out.extend(self._format_changes())
        
        out.append(highlight(0))
        
        return ''.join(out)

    def format_patch(self):
        method = {
            '+' : self._format_insert,
            '-' : self._format_delete,
            '*' : self._format_update}
        
        return method[self.change]()

    def _format_changes(self):
        if self.src_cols and not self.dst_cols:
            return [', '.join([self._format_value_del(*x) for x in self.src_cols.items()])]
        if not self.src_cols and self.dst_cols:
            return [', '.join([self._format_value_add(*x) for x in self.dst_cols.items()])]
        
        items = []
        for i in range(len(self.src_cols)):
            items.append((
                list(self.src_cols.keys())[i],
                list(self.src_cols.values())[i],
                list(self.dst_cols.values())[i]))
            
        return [', '.join([self._format_value_change(*x) for x in items])]

    def _format_value_del(self, k, v):
        fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
        return fs.format(k, adapt(v).getquoted().decode())

    def _format_value_add(self, k, v):
        fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + 
            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
        return fs.format(k, adapt(v).getquoted().decode())

    def _format_value_change(self, k, v1, v2):
        fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + 
            '{} ▶ ' +
            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
        return fs.format(k,
            adapt(v1).getquoted().decode(),
            adapt(v2).getquoted().decode())

    def _format_insert(self):
        out = ['INSERT INTO ', self.table, ' (']
        out.append(', '.join(self.dst_cols.keys()))
        out.append(') VALUES (')
        out.append(', '.join([adapt(v).getquoted().decode() for v in self.dst_cols.values()]))
        out.append(');')
        return ''.join(out)
    
    def _format_delete(self):
        out = ['DELETE FROM ', self.table]
        out.extend(self._format_where()) 
        return ''.join(out)
    
    def _format_update(self):
        out = ['UPDATE ', self.table, ' SET ']
        out.append(', '.join([self._format_set(*x) for x in self.dst_cols.items()]))
        out.extend(self._format_where())
        return ''.join(out)

    def _format_set(self, k, v):
        return '{} = {}'.format(k, adapt(v).getquoted().decode())

    def _format_where(self):
        out = [' WHERE ']
        out.extend([self.id[0], ' = '])
        out.append(adapt(self.id[1]).getquoted().decode())
        out.append(';')
        return out

class PgDataDiff:
    def __init__(self, table=None, src_rows=None, dst_rows=None, col_names=None):
        self.allowcolor = False
        self.table = table
        self.src_rows = src_rows
        self.dst_rows = dst_rows
        self.col_names = col_names
    
    def iter_diff(self):
        '''Return differencies between data of two tables.
        
        Yields one line at the time.
        
        '''
        while True:
            try:
                diff = self._compare_row(self.src_rows, self.dst_rows)
            except IndexError:
                break
            
            if diff:
                yield diff
        
    def print_diff(self):
        '''Print differencies between data of two tables.
        
        The output is in human readable form.
        
        Set allowcolor=True of PgDataDiff instance to get colored output.
        
        '''
        for ln in self.iter_diff():
            print(ln.format())
    
    def print_patch(self):
        '''Print SQL script usable as patch for destination table.
        
        Supports INSERT, DELETE and UPDATE operations.
        
        '''
        for ln in self.iter_diff():
            print(ln.format_patch())

    def _compare_data(self, src, dst):
        src_cols = OrderedDict()
        dst_cols = OrderedDict()
        for i in range(len(src)):
            if src[i] != dst[i]:
                src_cols[self.col_names[i]] = src[i]
                dst_cols[self.col_names[i]] = dst[i]
        if src_cols:
            id = (self.col_names[0], src[0])
            return DiffData(self.table, '*', src_cols, dst_cols, id=id)
        
        return None
    
    def _compare_row(self, src_rows, dst_rows):
        if len(src_rows) and not len(dst_rows):
            src = src_rows.pop(0)
            src_cols = OrderedDict(zip(self.col_names, src))
            return DiffData(self.table, '-', src_cols, None)
        if not len(src_rows) and len(dst_rows):
            dst = dst_rows.pop(0)
            dst_cols = OrderedDict(zip(self.col_names, dst))
            return DiffData(self.table, '+', None, dst_cols)
        
        src = src_rows[0]
        dst = dst_rows[0]
        
        if src[0] < dst[0]:
            del src_rows[0]
            src_cols = OrderedDict(zip(self.col_names, src))
            id = (self.col_names[0], src[0])
            return DiffData(self.table, '-', src_cols, None, id=id)
        if src[0] > dst[0]:
            del dst_rows[0]
            dst_cols = OrderedDict(zip(self.col_names, dst))
            return DiffData(self.table, '+', None, dst_cols)
        
        del src_rows[0]
        del dst_rows[0]
        return self._compare_data(src, dst)