1 # -*- coding: utf-8 -*- |
|
2 # |
|
3 # PgDataDiff - compare tables, print data differencies |
|
4 # |
|
5 # Copyright (c) 2011 Radek Brich <radek.brich@devl.cz> |
|
6 # |
|
7 # Permission is hereby granted, free of charge, to any person obtaining a copy |
|
8 # of this software and associated documentation files (the "Software"), to deal |
|
9 # in the Software without restriction, including without limitation the rights |
|
10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
11 # copies of the Software, and to permit persons to whom the Software is |
|
12 # furnished to do so, subject to the following conditions: |
|
13 # |
|
14 # The above copyright notice and this permission notice shall be included in |
|
15 # all copies or substantial portions of the Software. |
|
16 # |
|
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
23 # THE SOFTWARE. |
|
24 |
|
25 |
|
26 from collections import OrderedDict |
|
27 |
|
28 from pgtoolkit import pgbrowser |
|
29 from pycolib.ansicolor import * |
|
30 |
|
31 import sys |
|
32 |
|
33 |
|
34 class DiffData: |
|
35 COLORS = { |
|
36 '+' : BOLD | GREEN, |
|
37 '-' : BOLD | RED, |
|
38 '*' : BOLD | YELLOW, |
|
39 'V' : BOLD | WHITE, |
|
40 'K' : BOLD | BLUE} |
|
41 |
|
42 def __init__(self, change, cols1, cols2, key=None): |
|
43 """ |
|
44 |
|
45 change - one of '+', '-', '*' (add, remove, update) |
|
46 cols1 - original column values (OrderedDict) |
|
47 cols2 - new column values (OrderedDict) |
|
48 key - primary key columns (OrderedDict) |
|
49 |
|
50 """ |
|
51 self.change = change |
|
52 self.cols1 = cols1 |
|
53 self.cols2 = cols2 |
|
54 self.key = key |
|
55 |
|
56 def format(self): |
|
57 out = [] |
|
58 |
|
59 out.append(highlight(1, self.COLORS[self.change])) |
|
60 out.extend([self.change, ' ']) |
|
61 |
|
62 out.extend(self._format_changes()) |
|
63 |
|
64 out.append(highlight(0)) |
|
65 |
|
66 return ''.join(out) |
|
67 |
|
68 def format_patch(self, table): |
|
69 method = { |
|
70 '+' : self._format_insert, |
|
71 '-' : self._format_delete, |
|
72 '*' : self._format_update} |
|
73 |
|
74 return method[self.change](table) |
|
75 |
|
76 def _format_changes(self): |
|
77 if self.cols1 and not self.cols2: |
|
78 return [', '.join([self._format_value_del(*x) for x in self.cols1.items()])] |
|
79 if not self.cols1 and self.cols2: |
|
80 return [', '.join([self._format_value_add(*x) for x in self.cols2.items()])] |
|
81 |
|
82 out = [] |
|
83 if self.key: |
|
84 for colname in self.key: |
|
85 out.extend([highlight(1, self.COLORS['*']), colname, ': ', highlight(0), self.key[colname], ', ']) |
|
86 |
|
87 items = [] |
|
88 for i in range(len(self.cols1)): |
|
89 items.append(( |
|
90 list(self.cols1.keys())[i], |
|
91 list(self.cols1.values())[i], |
|
92 list(self.cols2.values())[i])) |
|
93 out.extend([', '.join([self._format_value_change(*x) for x in items])]) |
|
94 |
|
95 return out |
|
96 |
|
97 def _format_value_del(self, k, v): |
|
98 fs = (highlight(1, self.COLORS['-']) + '{}: ' + highlight(0) + '{}') |
|
99 return fs.format(k, v) |
|
100 |
|
101 def _format_value_add(self, k, v): |
|
102 fs = (highlight(1, self.COLORS['+']) + '{}: ' + highlight(0) + |
|
103 highlight(1, self.COLORS['V']) + '{}' + highlight(0)) |
|
104 return fs.format(k, v) |
|
105 |
|
106 def _format_value_change(self, k, v1, v2): |
|
107 fs = (highlight(1, self.COLORS['*']) + '{}: ' + highlight(0) + |
|
108 '{} ▶ ' + |
|
109 highlight(1, self.COLORS['V']) + '{}' + highlight(0)) |
|
110 return fs.format(k, v1, v2) |
|
111 |
|
112 def _format_insert(self, table): |
|
113 out = ['INSERT INTO ', table, ' ('] |
|
114 out.append(', '.join(self.cols2.keys())) |
|
115 out.append(') VALUES (') |
|
116 out.append(', '.join(self.cols2.values())) |
|
117 out.append(');') |
|
118 return ''.join(out) |
|
119 |
|
120 def _format_delete(self, table): |
|
121 out = ['DELETE FROM ', table] |
|
122 out.extend(self._format_where()) |
|
123 return ''.join(out) |
|
124 |
|
125 def _format_update(self, table): |
|
126 out = ['UPDATE ', table, ' SET '] |
|
127 out.append(', '.join([self._format_set(*x) for x in self.cols2.items()])) |
|
128 out.extend(self._format_where()) |
|
129 return ''.join(out) |
|
130 |
|
131 def _format_set(self, k, v): |
|
132 return '{} = {}'.format(k, v) |
|
133 |
|
134 def _format_where(self): |
|
135 out = [' WHERE '] |
|
136 for colname in self.key: |
|
137 out.extend([colname, ' = ', self.key[colname], ' AND ']) |
|
138 out[-1] = ';' |
|
139 return out |
|
140 |
|
141 class PgDataDiff: |
|
142 def __init__(self, conn1, conn2): |
|
143 self.allowcolor = False |
|
144 self.conn1 = conn1 |
|
145 self.conn2 = conn2 |
|
146 self.fulltable1 = None |
|
147 self.fulltable2 = None |
|
148 |
|
149 def settable1(self, table, schema='public'): |
|
150 self.schema1 = schema |
|
151 self.table1 = table |
|
152 self.fulltable1 = '"' + schema + '"."'+ table + '"' |
|
153 |
|
154 def settable2(self, table, schema='public'): |
|
155 self.schema2 = schema |
|
156 self.table2 = table |
|
157 self.fulltable2 = '"' + schema + '"."'+ table + '"' |
|
158 |
|
159 def iter_diff(self): |
|
160 """Return differencies between data of two tables. |
|
161 |
|
162 Yields one line at the time. |
|
163 |
|
164 """ |
|
165 curs1, curs2 = self._select() |
|
166 |
|
167 row1 = curs1.fetchone_dict() |
|
168 row2 = curs2.fetchone_dict() |
|
169 |
|
170 while True: |
|
171 if row1 is None and row2 is None: |
|
172 break |
|
173 diff = self._compare_row(row1, row2, curs1.adapt, curs2.adapt) |
|
174 |
|
175 if diff: |
|
176 yield diff |
|
177 |
|
178 if diff.change == '-': |
|
179 row1 = curs1.fetchone_dict() |
|
180 continue |
|
181 if diff.change == '+': |
|
182 row2 = curs2.fetchone_dict() |
|
183 continue |
|
184 # change == '*' or not diff |
|
185 row1 = curs1.fetchone_dict() |
|
186 row2 = curs2.fetchone_dict() |
|
187 |
|
188 curs1.close() |
|
189 curs2.close() |
|
190 |
|
191 def print_diff(self, file=sys.stdout): |
|
192 """Print differencies between data of two tables. |
|
193 |
|
194 The output is in human readable form. |
|
195 |
|
196 Set allowcolor=True of PgDataDiff instance to get colored output. |
|
197 |
|
198 """ |
|
199 for ln in self.iter_diff(): |
|
200 print(ln.format(), file=file) |
|
201 |
|
202 def print_patch(self, file=sys.stdout): |
|
203 """Print SQL script usable as patch for destination table. |
|
204 |
|
205 Supports INSERT, DELETE and UPDATE operations. |
|
206 |
|
207 """ |
|
208 for ln in self.iter_diff(): |
|
209 print(ln.format_patch(self.fulltable1), file=file) |
|
210 |
|
211 def _select(self): |
|
212 browser = pgbrowser.PgBrowser(self.conn1) |
|
213 |
|
214 columns = browser.list_columns(schema=self.schema1, table=self.table1, order=1) |
|
215 if not columns: |
|
216 raise Exception('Table %s.%s not found.' % (self.schema1, self.table1)) |
|
217 columns_sel = ', '.join(['"' + x['name'] + '"' for x in columns]) |
|
218 self.colnames = [x['name'] for x in columns] |
|
219 |
|
220 pkey = [ind for ind in browser.list_indexes(schema=self.schema1, table=self.table1) if ind['primary']] |
|
221 if not pkey: |
|
222 raise Exception('Table %s.%s has no primary key.' % (self.schema1, self.table1)) |
|
223 pkey = pkey[0] |
|
224 pkey_sel = ', '.join(['"' + x + '"' for x in pkey['columns']]) |
|
225 self.pkeycolnames = pkey['columns'] |
|
226 |
|
227 query1 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable1 + ' ORDER BY ' + pkey_sel |
|
228 query2 = 'SELECT ' + columns_sel + ' FROM ' + self.fulltable2 + ' ORDER BY ' + pkey_sel |
|
229 |
|
230 curs1 = self.conn1.cursor('curs1') |
|
231 curs2 = self.conn2.cursor('curs2') |
|
232 |
|
233 curs1.execute(query1) |
|
234 curs2.execute(query2) |
|
235 |
|
236 return curs1, curs2 |
|
237 |
|
238 def _compare_data(self, row1, row2): |
|
239 cols1 = OrderedDict() |
|
240 cols2 = OrderedDict() |
|
241 for name in row1.keys(): |
|
242 if row1[name] != row2[name]: |
|
243 cols1[name] = row1[name] |
|
244 cols2[name] = row2[name] |
|
245 if cols1: |
|
246 key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames])) |
|
247 return DiffData('*', cols1, cols2, key=key) |
|
248 |
|
249 return None |
|
250 |
|
251 def _compare_row(self, row1, row2, adapt1, adapt2): |
|
252 if row2 is None: |
|
253 row1 = adapt1(row1) |
|
254 key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames])) |
|
255 return DiffData('-', row1, None, key=key) |
|
256 if row1 is None: |
|
257 row2 = adapt2(row2) |
|
258 return DiffData('+', None, row2) |
|
259 |
|
260 for keyname in self.pkeycolnames: |
|
261 if row1[keyname] < row2[keyname]: |
|
262 row1 = adapt1(row1) |
|
263 key = OrderedDict(zip(self.pkeycolnames, [row1[colname] for colname in self.pkeycolnames])) |
|
264 return DiffData('-', row1, None, key=key) |
|
265 for keyname in self.pkeycolnames: |
|
266 if row1[keyname] > row2[keyname]: |
|
267 row2 = adapt2(row2) |
|
268 return DiffData('+', None, row2) |
|
269 |
|
270 row1 = adapt1(row1) |
|
271 row2 = adapt2(row2) |
|
272 return self._compare_data(row1, row2) |
|
273 |
|