pgtoolkit/pgdatadiff.py
changeset 104 d8ff52a0390f
parent 103 24e94a3da209
child 105 10551741f61f
equal deleted inserted replaced
103:24e94a3da209 104:d8ff52a0390f
     1 # -*- coding: utf-8 -*-
       
     2 #
       
     3 # PgDataDiff - compare tables, print data differencies
       
     4 #
       
     5 # Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
       
     6 #
       
     7 # Permission is hereby granted, free of charge, to any person obtaining a copy
       
     8 # of this software and associated documentation files (the "Software"), to deal
       
     9 # in the Software without restriction, including without limitation the rights
       
    10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       
    11 # copies of the Software, and to permit persons to whom the Software is
       
    12 # furnished to do so, subject to the following conditions:
       
    13 #
       
    14 # The above copyright notice and this permission notice shall be included in
       
    15 # all copies or substantial portions of the Software.
       
    16 #
       
    17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
       
    18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
       
    19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
       
    20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
       
    21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
       
    22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
       
    23 # THE SOFTWARE.
       
    24 
       
    25 
       
    26 from collections import OrderedDict
       
    27 
       
    28 from pgtoolkit import pgbrowser
       
    29 from pycolib.ansicolor import *
       
    30 
       
    31 import sys
       
    32 
       
    33 
       
    34 class DiffData:
       
    35     COLORS = {
       
    36         '+' : BOLD | GREEN,
       
    37         '-' : BOLD | RED,
       
    38         '*' : BOLD | YELLOW,
       
    39         'V' : BOLD | WHITE,
       
    40         'K' : BOLD | BLUE}
       
    41 
       
    42     def __init__(self, change, cols1, cols2, key=None):
       
    43         """
       
    44 
       
    45         change - one of '+', '-', '*' (add, remove, update)
       
    46         cols1 - original column values (OrderedDict)
       
    47         cols2 - new column values (OrderedDict)
       
    48         key - primary key columns (OrderedDict)
       
    49 
       
    50         """
       
    51         self.change = change
       
    52         self.cols1 = cols1
       
    53         self.cols2 = cols2
       
    54         self.key = key
       
    55 
       
    56     def format(self):
       
    57         out = []
       
    58 
       
    59         out.append(highlight(1, self.COLORS[self.change]))
       
    60         out.extend([self.change, ' '])
       
    61 
       
    62         out.extend(self._format_changes())
       
    63 
       
    64         out.append(highlight(0))
       
    65 
       
    66         return ''.join(out)
       
    67 
       
    68     def format_patch(self, table):
       
    69         method = {
       
    70             '+' : self._format_insert,
       
    71             '-' : self._format_delete,
       
    72             '*' : self._format_update}
       
    73 
       
    74         return method[self.change](table)
       
    75 
       
    76     def _format_changes(self):
       
    77         if self.cols1 and not self.cols2:
       
    78             return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])]
       
    79         if not self.cols1 and self.cols2:
       
    80             return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])]
       
    81 
       
    82         out = []
       
    83         if self.key:
       
    84             for colname in self.key:
       
    85                 out.extend([highlight(1, self.COLORS['*']), colname, ': ', highlight(0), self.key[colname], ', '])
       
    86 
       
    87         items = []
       
    88         for i in range(len(self.cols1)):
       
    89             items.append((
       
    90                 list(self.cols1.keys())[i],
       
    91                 list(self.cols1.values())[i],
       
    92                 list(self.cols2.values())[i]))
       
    93         out.extend([', '.join([self._format_value_change(*x) for x in items])])
       
    94 
       
    95         return out
       
    96 
       
    97     def _format_value_del(self, k, v):
       
    98         fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
       
    99         return fs.format(k, v)
       
   100 
       
   101     def _format_value_add(self, k, v):
       
   102         fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) +
       
   103             highlight(1, self.COLORS['V']) + '{}' + highlight(0))
       
   104         return fs.format(k, v)
       
   105 
       
   106     def _format_value_change(self, k, v1, v2):
       
   107         fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) +
       
   108             '{} ▶ ' +
       
   109             highlight(1, self.COLORS['V']) + '{}' + highlight(0))
       
   110         return fs.format(k, v1, v2)
       
   111 
       
   112     def _format_insert(self, table):
       
   113         out = ['INSERT INTO ', table, ' (']
       
   114         out.append(', '.join(self.cols2.keys()))
       
   115         out.append(') VALUES (')
       
   116         out.append(', '.join(self.cols2.values()))
       
   117         out.append(');')
       
   118         return ''.join(out)
       
   119 
       
   120     def _format_delete(self, table):
       
   121         out = ['DELETE FROM ', table]
       
   122         out.extend(self._format_where())
       
   123         return ''.join(out)
       
   124 
       
   125     def _format_update(self, table):
       
   126         out = ['UPDATE ', table, ' SET ']
       
   127         out.append(', '.join([self._format_set(*x) for x in self.cols2.items()]))
       
   128         out.extend(self._format_where())
       
   129         return ''.join(out)
       
   130 
       
   131     def _format_set(self, k, v):
       
   132         return '{} = {}'.format(k, v)
       
   133 
       
   134     def _format_where(self):
       
   135         out = [' WHERE ']
       
   136         for colname in self.key:
       
   137             out.extend([colname, ' = ', self.key[colname], ' AND '])
       
   138         out[-1] = ';'
       
   139         return out
       
   140 
       
   141 class PgDataDiff:
       
   142     def __init__(self, conn1, conn2):
       
   143         self.allowcolor = False
       
   144         self.conn1 = conn1
       
   145         self.conn2 = conn2
       
   146         self.fulltable1 = None
       
   147         self.fulltable2 = None
       
   148 
       
   149     def settable1(self, table, schema='public'):
       
   150         self.schema1 = schema
       
   151         self.table1 = table
       
   152         self.fulltable1 = '"' + schema + '"."'+ table + '"'
       
   153 
       
   154     def settable2(self, table, schema='public'):
       
   155         self.schema2 = schema
       
   156         self.table2 = table
       
   157         self.fulltable2 = '"' + schema + '"."'+ table + '"'
       
   158 
       
   159     def iter_diff(self):
       
   160         """Return differencies between data of two tables.
       
   161 
       
   162         Yields one line at the time.
       
   163 
       
   164         """
       
   165         curs1, curs2 = self._select()
       
   166 
       
   167         row1 = curs1.fetchone_dict()
       
   168         row2 = curs2.fetchone_dict()
       
   169 
       
   170         while True:
       
   171             if row1 is None and row2 is None:
       
   172                 break
       
   173             diff = self._compare_row(row1, row2, curs1.adapt, curs2.adapt)
       
   174 
       
   175             if diff:
       
   176                 yield diff
       
   177 
       
   178                 if diff.change == '-':
       
   179                     row1 = curs1.fetchone_dict()
       
   180                     continue
       
   181                 if diff.change == '+':
       
   182                     row2 = curs2.fetchone_dict()
       
   183                     continue
       
   184             # change == '*' or not diff
       
   185             row1 = curs1.fetchone_dict()
       
   186             row2 = curs2.fetchone_dict()
       
   187 
       
   188         curs1.close()
       
   189         curs2.close()
       
   190 
       
   191     def print_diff(self, file=sys.stdout):
       
   192         """Print differencies between data of two tables.
       
   193 
       
   194         The output is in human readable form.
       
   195 
       
   196         Set allowcolor=True of PgDataDiff instance to get colored output.
       
   197 
       
   198         """
       
   199         for ln in self.iter_diff():
       
   200             print(ln.format(), file=file)
       
   201 
       
   202     def print_patch(self, file=sys.stdout):
       
   203         """Print SQL script usable as patch for destination table.
       
   204 
       
   205         Supports INSERT, DELETE and UPDATE operations.
       
   206 
       
   207         """
       
   208         for ln in self.iter_diff():
       
   209             print(ln.format_patch(self.fulltable1), file=file)
       
   210 
       
   211     def _select(self):
       
   212         browser = pgbrowser.PgBrowser(self.conn1)
       
   213 
       
   214         columns = browser.list_columns(schema=self.schema1, table=self.table1, order=1)
       
   215         if not columns:
       
   216             raise Exception('Table %s.%s not found.' % (self.schema1, self.table1))
       
   217         columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns])
       
   218         self.colnames = [x['name'] for x in columns]
       
   219 
       
   220         pkey = [ind for ind in browser.list_indexes(schema=self.schema1, table=self.table1) if ind['primary']]
       
   221         if not pkey:
       
   222             raise Exception('Table %s.%s has no primary key.' % (self.schema1, self.table1))
       
   223         pkey = pkey[0]
       
   224         pkey_sel = ', '.join(['"' + x + '"' for x in pkey['columns']])
       
   225         self.pkeycolnames = pkey['columns']
       
   226 
       
   227         query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY ' + pkey_sel
       
   228         query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY ' + pkey_sel
       
   229 
       
   230         curs1 = self.conn1.cursor('curs1')
       
   231         curs2 = self.conn2.cursor('curs2')
       
   232 
       
   233         curs1.execute(query1)
       
   234         curs2.execute(query2)
       
   235 
       
   236         return curs1, curs2
       
   237 
       
   238     def _compare_data(self, row1, row2):
       
   239         cols1 = OrderedDict()
       
   240         cols2 = OrderedDict()
       
   241         for name in row1.keys():
       
   242             if row1[name] != row2[name]:
       
   243                 cols1[name] = row1[name]
       
   244                 cols2[name] = row2[name]
       
   245         if cols1:
       
   246             key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
       
   247             return DiffData('*', cols1, cols2, key=key)
       
   248 
       
   249         return None
       
   250 
       
   251     def _compare_row(self, row1, row2, adapt1, adapt2):
       
   252         if row2 is None:
       
   253             row1 = adapt1(row1)
       
   254             key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
       
   255             return DiffData('-', row1, None, key=key)
       
   256         if row1 is None:
       
   257             row2 = adapt2(row2)
       
   258             return DiffData('+', None, row2)
       
   259 
       
   260         for keyname in self.pkeycolnames:
       
   261             if row1[keyname] < row2[keyname]:
       
   262                 row1 = adapt1(row1)
       
   263                 key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames]))
       
   264                 return DiffData('-', row1, None, key=key)
       
   265         for keyname in self.pkeycolnames:
       
   266             if row1[keyname] > row2[keyname]:
       
   267                 row2 = adapt2(row2)
       
   268                 return DiffData('+', None, row2)
       
   269 
       
   270         row1 = adapt1(row1)
       
   271         row2 = adapt2(row2)
       
   272         return self._compare_data(row1, row2)
       
   273