pgtoolkit/pgdatadiff.py
changeset 9 2fcc8ef0b97d
parent 7 685b20d2d3ab
child 12 203be9022b46
equal deleted inserted replaced
8:2911935c524d 9:2fcc8ef0b97d
       
     1 # -*- coding: utf-8 -*-
       
     2 #
       
     3 # PgDataDiff - compare tables, print data differencies
       
     4 #
       
     5 # Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
       
     6 #
       
     7 # Permission is hereby granted, free of charge, to any person obtaining a copy
       
     8 # of this software and associated documentation files (the "Software"), to deal
       
     9 # in the Software without restriction, including without limitation the rights
       
    10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       
    11 # copies of the Software, and to permit persons to whom the Software is
       
    12 # furnished to do so, subject to the following conditions:
       
    13 #
       
    14 # The above copyright notice and this permission notice shall be included in
       
    15 # all copies or substantial portions of the Software.
       
    16 #
       
    17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
       
    18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
       
    19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
       
    20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
       
    21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
       
    22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
       
    23 # THE SOFTWARE.
       
    24 
       
    25 
       
    26 from collections import OrderedDict
       
    27 
       
    28 from pgtoolkit import pgbrowser
       
    29 from pgtoolkit.highlight import *
       
    30 
       
    31 
       
    32 class DiffData:
       
    33     COLORS = {
       
    34         '+' : BOLD | GREEN,
       
    35         '-' : BOLD | RED,
       
    36         '*' : BOLD | YELLOW,
       
    37         'V' : BOLD | WHITE}
       
    38     
       
    39     def __init__(self, change, cols1, cols2, id=None):
       
    40         self.change = change
       
    41         self.cols1 = cols1
       
    42         self.cols2 = cols2
       
    43         self.id = id
       
    44     
       
    45     def format(self):
       
    46         out = []
       
    47         
       
    48         out.append(highlight(1, self.COLORS[self.change]))
       
    49         out.extend([self.change, ' '])
       
    50         
       
    51         out.extend(self._format_changes())
       
    52         
       
    53         out.append(highlight(0))
       
    54         
       
    55         return ''.join(out)
       
    56 
       
    57     def format_patch(self, table):
       
    58         method = {
       
    59             '+' : self._format_insert,
       
    60             '-' : self._format_delete,
       
    61             '*' : self._format_update}
       
    62         
       
    63         return method[self.change](table)
       
    64 
       
    65     def _format_changes(self):
       
    66         if self.cols1 and not self.cols2:
       
    67             return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])]
       
    68         if not self.cols1 and self.cols2:
       
    69             return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])]
       
    70         
       
    71         items = []
       
    72         for i in range(len(self.cols1)):
       
    73             items.append((
       
    74                 list(self.cols1.keys())[i],
       
    75                 list(self.cols1.values())[i],
       
    76                 list(self.cols2.values())[i]))
       
    77             
       
    78         return [', '.join([self._format_value_change(*x) for x in items])]
       
    79 
       
    80     def _format_value_del(self, k, v):
       
    81         fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}')
       
    82         return fs.format(k, v)
       
    83 
       
    84     def _format_value_add(self, k, v):
       
    85         fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + 
       
    86             highlight(1, self.COLORS['V']) + '{}' + highlight(0))
       
    87         return fs.format(k, v)
       
    88 
       
    89     def _format_value_change(self, k, v1, v2):
       
    90         fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + 
       
    91             '{} ▶ ' +
       
    92             highlight(1, self.COLORS['V']) + '{}' + highlight(0))
       
    93         return fs.format(k, v1, v2)
       
    94 
       
    95     def _format_insert(self, table):
       
    96         out = ['INSERT INTO ', table, ' (']
       
    97         out.append(', '.join(self.cols2.keys()))
       
    98         out.append(') VALUES (')
       
    99         out.append(', '.join(self.cols2.values()))
       
   100         out.append(');')
       
   101         return ''.join(out)
       
   102     
       
   103     def _format_delete(self, table):
       
   104         out = ['DELETE FROM ', table]
       
   105         out.extend(self._format_where()) 
       
   106         return ''.join(out)
       
   107     
       
   108     def _format_update(self, table):
       
   109         out = ['UPDATE ', table, ' SET ']
       
   110         out.append(', '.join([self._format_set(*x) for x in self.cols2.items()]))
       
   111         out.extend(self._format_where())
       
   112         return ''.join(out)
       
   113 
       
   114     def _format_set(self, k, v):
       
   115         return '{} = {}'.format(k, v)
       
   116 
       
   117     def _format_where(self):
       
   118         out = [' WHERE ']
       
   119         out.extend([self.id[0], ' = '])
       
   120         out.append(self.id[1])
       
   121         out.append(';')
       
   122         return out
       
   123 
       
   124 class PgDataDiff:
       
   125     def __init__(self, conn1, conn2):
       
   126         self.allowcolor = False
       
   127         self.conn1 = conn1
       
   128         self.conn2 = conn2
       
   129         self.fulltable1 = None
       
   130         self.fulltable2 = None
       
   131     
       
   132     def settable1(self, table, schema='public'):
       
   133         self.schema1 = schema
       
   134         self.table1 = table
       
   135         self.fulltable1 = '"' + schema + '"."'+ table + '"'
       
   136         
       
   137     def settable2(self, table, schema='public'):
       
   138         self.schema2 = schema
       
   139         self.table2 = table
       
   140         self.fulltable2 = '"' + schema + '"."'+ table + '"'
       
   141     
       
   142     def iter_diff(self):
       
   143         '''Return differencies between data of two tables.
       
   144         
       
   145         Yields one line at the time.
       
   146         
       
   147         '''
       
   148         curs1, curs2 = self._select()
       
   149         
       
   150         row1 = curs1.fetchone_adapted()
       
   151         row2 = curs2.fetchone_adapted()
       
   152         
       
   153         while True:
       
   154             if row1 is None and row2 is None:
       
   155                 break
       
   156             diff = self._compare_row(row1, row2)
       
   157             
       
   158             if diff:
       
   159                 yield diff
       
   160             
       
   161                 if diff.change == '-':
       
   162                     row1 = curs1.fetchone_adapted()
       
   163                     continue
       
   164                 if diff.change == '+':
       
   165                     row2 = curs2.fetchone_adapted()
       
   166                     continue
       
   167             # change == '*' or not diff
       
   168             row1 = curs1.fetchone_adapted()
       
   169             row2 = curs2.fetchone_adapted()
       
   170     
       
   171     def print_diff(self):
       
   172         '''Print differencies between data of two tables.
       
   173         
       
   174         The output is in human readable form.
       
   175         
       
   176         Set allowcolor=True of PgDataDiff instance to get colored output.
       
   177         
       
   178         '''
       
   179         for ln in self.iter_diff():
       
   180             print(ln.format())
       
   181     
       
   182     def print_patch(self):
       
   183         '''Print SQL script usable as patch for destination table.
       
   184         
       
   185         Supports INSERT, DELETE and UPDATE operations.
       
   186         
       
   187         '''
       
   188         for ln in self.iter_diff():
       
   189             print(ln.format_patch(self.fulltable2))
       
   190 
       
   191     def _select(self):
       
   192         browser = pgbrowser.PgBrowser(self.conn1)
       
   193         columns = browser.list_columns(schema=self.schema1, table=self.table1)
       
   194         columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns])
       
   195         self.colnames = [x['name'] for x in columns]
       
   196         
       
   197         query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY 1;'
       
   198         query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY 1;'
       
   199         
       
   200         curs1 = self.conn1.cursor()
       
   201         curs2 = self.conn2.cursor()
       
   202         
       
   203         curs1.execute(query1)
       
   204         curs2.execute(query2)
       
   205         
       
   206         return curs1, curs2
       
   207 
       
   208     def _compare_data(self, row1, row2):
       
   209         cols1 = OrderedDict()
       
   210         cols2 = OrderedDict()
       
   211         for i in range(len(row1)):
       
   212             if row1[i] != row2[i]:
       
   213                 cols1[self.colnames[i]] = row1[i]
       
   214                 cols2[self.colnames[i]] = row2[i]
       
   215         if cols1:
       
   216             id = (self.colnames[0], row1[0])
       
   217             return DiffData('*', cols1, cols2, id=id)
       
   218         
       
   219         return None
       
   220     
       
   221     def _compare_row(self, row1, row2):
       
   222         if row2 is None:
       
   223             cols1 = OrderedDict(zip(self.colnames, row1))
       
   224             return DiffData('-', cols1, None)
       
   225         if row1 is None:
       
   226             cols2 = OrderedDict(zip(self.colnames, row2))
       
   227             return DiffData('+', None, cols2)
       
   228         
       
   229         if row1[0] < row2[0]:
       
   230             cols1 = OrderedDict(zip(self.colnames, row1))
       
   231             id = (self.colnames[0], row1[0])
       
   232             return DiffData('-', cols1, None, id=id)
       
   233         if row1[0] > row2[0]:
       
   234             cols2 = OrderedDict(zip(self.colnames, row2))
       
   235             return DiffData('+', None, cols2)
       
   236         
       
   237         return self._compare_data(row1, row2)
       
   238