Add table diff tool.
authorRadek Brich <radek.brich@devl.cz>
Wed, 10 Aug 2011 18:34:54 +0200
changeset 6 4ab077c93b2d
parent 5 57cf8fdff5ed
child 7 685b20d2d3ab
Add table diff tool.
pgtoolkit.conf.example
pgtools/pgdatadiff.py
pgtools/pgdiff.py
tablediff.py
--- a/pgtoolkit.conf.example	Wed Jul 20 10:54:07 2011 +0200
+++ b/pgtoolkit.conf.example	Wed Aug 10 18:34:54 2011 +0200
@@ -6,5 +6,5 @@
 ### meta database
 # connection string (dsn) to meta db
 meta_db = 'host=10.8.0.1 dbname=central'
-# query in meta db, input is database name (will be placed instead of %s), output is values for create_conn
+# query from meta db, input is database name (will be placed instead of %s), output is values for create_conn
 meta_query = '''SELECT host, port, dbname, user, password FROM config.databases WHERE name = %s LIMIT 1'''
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pgtools/pgdatadiff.py	Wed Aug 10 18:34:54 2011 +0200
@@ -0,0 +1,209 @@
+# -*- coding: utf-8 -*-
+#
+# PgDataDiff - compare tables, print data differencies
+#
+# Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+
+from psycopg2.extensions import adapt
+
+from common.highlight import *
+from collections import OrderedDict
+
+
+class DiffData:
+    COLORS = {
+        '+' : BOLD | GREEN,
+        '-' : BOLD | RED,
+        '*' : BOLD | YELLOW,
+        'V' : BOLD | WHITE}
+    
+    def __init__(self, table, change, src_cols, dst_cols, id=None):
+        self.table = table
+        self.change = change
+        self.src_cols = src_cols
+        self.dst_cols = dst_cols
+        self.id = id
+    
+    def format(self):
+        out = []
+        
+        out.append(highlight(1, self.COLORS[self.change]))
+        out.extend([self.change, ' '])
+        
+        out.extend(self._format_changes())
+        
+        out.append(highlight(0))
+        
+        return ''.join(out)
+
+    def format_patch(self):
+        method = {
+            '+' : self._format_insert,
+            '-' : self._format_delete,
+            '*' : self._format_update}
+        
+        return method[self.change]()
+
+    def _format_changes(self):
+        if self.src_cols and not self.dst_cols:
+            return [', '.join([self._format_value_del(*x) for x in self.src_cols.items()])]
+        if not self.src_cols and self.dst_cols:
+            return [', '.join([self._format_value_add(*x) for x in self.dst_cols.items()])]
+        
+        items = []
+        for i in range(len(self.src_cols)):
+            items.append((
+                list(self.src_cols.keys())[i],
+                list(self.src_cols.values())[i],
+                list(self.dst_cols.values())[i]))
+            
+        return [', '.join([self._format_value_change(*x) for x in items])]
+
+    def _format_value_del(self, k, v):
+        fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
+        return fs.format(k, adapt(v).getquoted().decode())
+
+    def _format_value_add(self, k, v):
+        fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + 
+            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
+        return fs.format(k, adapt(v).getquoted().decode())
+
+    def _format_value_change(self, k, v1, v2):
+        fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + 
+            '{} ▶ ' +
+            highlight(1, self.COLORS['V']) + '{}' + highlight(0))
+        return fs.format(k,
+            adapt(v1).getquoted().decode(),
+            adapt(v2).getquoted().decode())
+
+    def _format_insert(self):
+        out = ['INSERT INTO ', self.table, ' (']
+        out.append(', '.join(self.dst_cols.keys()))
+        out.append(') VALUES (')
+        out.append(', '.join([adapt(v).getquoted().decode() for v in self.dst_cols.values()]))
+        out.append(');')
+        return ''.join(out)
+    
+    def _format_delete(self):
+        out = ['DELETE FROM ', self.table]
+        out.extend(self._format_where()) 
+        return ''.join(out)
+    
+    def _format_update(self):
+        out = ['UPDATE ', self.table, ' SET ']
+        out.append(', '.join([self._format_set(*x) for x in self.dst_cols.items()]))
+        out.extend(self._format_where())
+        return ''.join(out)
+
+    def _format_set(self, k, v):
+        return '{} = {}'.format(k, adapt(v).getquoted().decode())
+
+    def _format_where(self):
+        out = [' WHERE ']
+        out.extend([self.id[0], ' = '])
+        out.append(adapt(self.id[1]).getquoted().decode())
+        out.append(';')
+        return out
+
+class PgDataDiff:
+    def __init__(self, table=None, src_rows=None, dst_rows=None, col_names=None):
+        self.allowcolor = False
+        self.table = table
+        self.src_rows = src_rows
+        self.dst_rows = dst_rows
+        self.col_names = col_names
+    
+    def iter_diff(self):
+        '''Return differencies between data of two tables.
+        
+        Yields one line at the time.
+        
+        '''
+        while True:
+            try:
+                diff = self._compare_row(self.src_rows, self.dst_rows)
+            except IndexError:
+                break
+            
+            if diff:
+                yield diff
+        
+    def print_diff(self):
+        '''Print differencies between data of two tables.
+        
+        The output is in human readable form.
+        
+        Set allowcolor=True of PgDataDiff instance to get colored output.
+        
+        '''
+        for ln in self.iter_diff():
+            print(ln.format())
+    
+    def print_patch(self):
+        '''Print SQL script usable as patch for destination table.
+        
+        Supports INSERT, DELETE and UPDATE operations.
+        
+        '''
+        for ln in self.iter_diff():
+            print(ln.format_patch())
+
+    def _compare_data(self, src, dst):
+        src_cols = OrderedDict()
+        dst_cols = OrderedDict()
+        for i in range(len(src)):
+            if src[i] != dst[i]:
+                src_cols[self.col_names[i]] = src[i]
+                dst_cols[self.col_names[i]] = dst[i]
+        if src_cols:
+            id = (self.col_names[0], src[0])
+            return DiffData(self.table, '*', src_cols, dst_cols, id=id)
+        
+        return None
+    
+    def _compare_row(self, src_rows, dst_rows):
+        if len(src_rows) and not len(dst_rows):
+            src = src_rows.pop(0)
+            src_cols = OrderedDict(zip(self.col_names, src))
+            return DiffData(self.table, '-', src_cols, None)
+        if not len(src_rows) and len(dst_rows):
+            dst = dst_rows.pop(0)
+            dst_cols = OrderedDict(zip(self.col_names, dst))
+            return DiffData(self.table, '+', None, dst_cols)
+        
+        src = src_rows[0]
+        dst = dst_rows[0]
+        
+        if src[0] < dst[0]:
+            del src_rows[0]
+            src_cols = OrderedDict(zip(self.col_names, src))
+            id = (self.col_names[0], src[0])
+            return DiffData(self.table, '-', src_cols, None, id=id)
+        if src[0] > dst[0]:
+            del dst_rows[0]
+            dst_cols = OrderedDict(zip(self.col_names, dst))
+            return DiffData(self.table, '+', None, dst_cols)
+        
+        del src_rows[0]
+        del dst_rows[0]
+        return self._compare_data(src, dst)
+
--- a/pgtools/pgdiff.py	Wed Jul 20 10:54:07 2011 +0200
+++ b/pgtools/pgdiff.py	Wed Aug 10 18:34:54 2011 +0200
@@ -29,18 +29,18 @@
 
 
 class DiffBase:
+    COLORS = {
+        '+' : BOLD | GREEN,
+        '-' : BOLD | RED,
+        '*' : BOLD | YELLOW}
+    
     def __init__(self):
         self.changes = None
 
     def format(self):
         out = ['  ' * self.level]
 
-        if self.change == '+':
-            out.append(highlight(1, BOLD | GREEN))
-        elif self.change == '-':
-            out.append(highlight(1, BOLD | RED))
-        else:
-            out.append(highlight(1, BOLD | YELLOW))
+        out.append(highlight(1, self.COLORS[self.change]))
         out.append(self.change)
         
         out += [' ', self.type, ' ', self.name, highlight(0)]
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tablediff.py	Wed Aug 10 18:34:54 2011 +0200
@@ -0,0 +1,52 @@
+#!/usr/bin/env python3.2
+#
+# Print differencies between data in two tables of same schema.
+#
+# Requirements:
+#  * First column of both tables must be numerical primary key. 
+#  * Destination table must contain all columns from source table.
+#    Order is not important.
+#
+
+from pgtools import pgmanager, pgbrowser, pgdatadiff
+from toolbase import SrcDstTool
+
+
+class TableDiffTool(SrcDstTool):
+    def __init__(self):
+        SrcDstTool.__init__(self, name='tablediff', desc='Table diff.')
+        
+        self.parser.add_argument('table', metavar='table', type=str, help='Table name')
+        
+        self.init()
+
+    def main(self):
+        if '.' in self.args.table:
+            schema, table = self.args.table.split('.', 1)
+        else:
+            table = self.args.table
+
+        srcbrowser = pgbrowser.PgBrowser(self.pgm.get_conn('src'))
+
+        columns = srcbrowser.list_columns(schema=schema, table=table)
+        columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns])
+        
+        table_fullname = '"' + schema + '"."'+ table + '"';
+        query = 'SELECT ' + columns_sel + ' FROM ' + table_fullname + ' ORDER BY 1;'
+        
+        with self.pgm.cursor('src') as curs:
+            curs.execute(query)
+            src_rows = curs.fetchall()
+
+        with self.pgm.cursor('dst') as curs:
+            curs.execute(query)
+            dst_rows = curs.fetchall()
+
+        pgdd = pgdatadiff.PgDataDiff(table_fullname,
+            src_rows, dst_rows, [x['name'] for x in columns])
+        #pgdd.print_diff()
+        pgdd.print_patch()
+
+
+tool = TableDiffTool()
+tool.main()