pgtools/pgdiff.py
author Radek Brich <radek.brich@devl.cz>
Wed, 10 Aug 2011 18:34:54 +0200
changeset 6 4ab077c93b2d
parent 0 eaae9539e910
permissions -rw-r--r--
Add table diff tool.

# -*- coding: utf-8 -*-
#
# PgDiff - capture differences of database metadata
#
# Depends on PgBrowser
#
# Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


from common.highlight import *


class DiffBase:
    COLORS = {
        '+' : BOLD | GREEN,
        '-' : BOLD | RED,
        '*' : BOLD | YELLOW}
    
    def __init__(self):
        self.changes = None

    def format(self):
        out = ['  ' * self.level]

        out.append(highlight(1, self.COLORS[self.change]))
        out.append(self.change)
        
        out += [' ', self.type, ' ', self.name, highlight(0)]
        
        if self.changes:
            out += [highlight(1, WHITE), ' (', self.formatchanges(), ')', highlight(0)]

        return ''.join(out)

    def formatnotnull(self, notnull):
        if notnull:
            return 'NOT NULL'
        else:
            return None
    
    def formatchanges(self):
        res = []
        for x in self.changes:
            type, a, b = x
            if type == 'notnull':
                type = ''
                a = self.formatnotnull(a)
                b = self.formatnotnull(b)
                
            if a and b:
                s = ''.join(['Changed ', type, ' from ',
                    highlight(1,15), a, highlight(0), ' to ',
                    highlight(1,15), b, highlight(0), '.'])
            elif a and not b:
                l = ['Removed ']
                if type:
                    l += [type, ' ']
                l += [highlight(1,15), a, highlight(0), '.']
                s = ''.join(l)
            elif b and not a:
                l = ['Added ']
                if type:
                    l += [type, ' ']
                l += [highlight(1,15), b, highlight(0), '.']
                s = ''.join(l)
            res.append(s)
        return ' '.join(res)


class DiffSchema(DiffBase):
    def __init__(self, change, schema):
        DiffBase.__init__(self)
        self.level = 0
        self.type = 'schema'
        self.change = change
        self.schema = schema
        self.name = schema
        

class DiffTable(DiffBase):
    def __init__(self, change, schema, table):
        DiffBase.__init__(self)
        self.level = 1
        self.type = 'table'
        self.change = change
        self.schema = schema
        self.table = table
        self.name = table


class DiffColumn(DiffBase):
    def __init__(self, change, schema, table, column, changes=None):
        DiffBase.__init__(self)
        self.level = 2
        self.type = 'column'
        self.change = change
        self.schema = schema
        self.table = table
        self.column = column
        self.name = column
        self.changes = changes


class DiffConstraint(DiffBase):
    def __init__(self, change, schema, table, constraint, changes=None):
        DiffBase.__init__(self)
        self.level = 2
        self.type = 'constraint'
        self.change = change
        self.schema = schema
        self.table = table
        self.constraint = constraint
        self.name = constraint
        self.changes = changes


class PgDiff:
    def __init__(self, srcbrowser=None, dstbrowser=None):
        self.allowcolor = False
        self.src = srcbrowser
        self.dst = dstbrowser
        self.include_schemas = set()  # if not empty, consider only these schemas for diff
        self.exclude_schemas = set()  # exclude these schemas from diff
        self.include_tables = set()
        self.exclude_tables = set()
    
    def _test_filter(self, schema):
        if self.include_schemas and schema not in self.include_schemas:
            return False
        if schema in self.exclude_schemas:
            return False
        return True
    
    def _test_table(self, schema, table):
        name = schema + '.' + table
        if self.include_tables and name not in self.include_tables:
            return False
        if name in self.exclude_tables:
            return False
        return True 
    
    def _diffnames(self, src, dst):
        for x in src:
            if x in dst:
                yield ('*', x)
            else:
                yield ('-', x)
        for x in dst:
            if x not in src:
                yield ('+', x)

    def _compare_columns(self, a, b):
        diff = []
        if a.type != b.type:
            diff.append(('type', a.type, b.type))
        if a.notnull != b.notnull:
            diff.append(('notnull', a.notnull, b.notnull))
        if a.default != b.default:
            diff.append(('default', a.default, b.default))
        return diff
    
    def _compare_constraints(self, a, b):
        diff = []
        if a.type != b.type:
            diff.append(('type', a.type, b.type))
        if a.definition != b.definition:
            diff.append(('definition', a.definition, b.definition))
        return diff
                
    def _diff_columns(self, schema, table, src_columns, dst_columns):
        for nd in self._diffnames(src_columns, dst_columns):
            cdo = DiffColumn(change=nd[0], schema=schema, table=table, column=nd[1])
            if nd[0] == '*':
                a = src_columns[nd[1]]
                b = dst_columns[nd[1]]
                cdo.changes = self._compare_columns(a, b)
                if cdo.changes:
                    yield cdo
            else:
                yield cdo

    def _diff_constraints(self, schema, table, src_constraints, dst_constraints):
        for nd in self._diffnames(src_constraints, dst_constraints):
            cdo = DiffConstraint(change=nd[0], schema=schema, table=table, constraint=nd[1])
            if nd[0] == '*':
                a = src_constraints[nd[1]]
                b = dst_constraints[nd[1]]
                cdo.changes = self._compare_constraints(a, b)
                if cdo.changes:
                    yield cdo
            else:
                yield cdo

    def _difftables(self, schema, src_tables, dst_tables):
        for nd in self._diffnames(src_tables, dst_tables):
            if not self._test_table(schema, nd[1]):
                continue
            tdo = DiffTable(change=nd[0], schema=schema, table=nd[1])
            if nd[0] == '*':
                # columns
                src_columns = src_tables[nd[1]].columns
                dst_columns = dst_tables[nd[1]].columns
                for cdo in self._diff_columns(schema, nd[1], src_columns, dst_columns):
                    if tdo:
                        yield tdo
                        tdo = None
                    yield cdo
                # constraints
                src_constraints = src_tables[nd[1]].constraints
                dst_constraints = dst_tables[nd[1]].constraints
                for cdo in self._diff_constraints(schema, nd[1], src_constraints, dst_constraints):
                    if tdo:
                        yield tdo
                        tdo = None
                    yield cdo
            else:
                yield tdo

    def iter_diff(self):
        '''Return diff between src and dst database schema.

        Yields one line at the time. Each line is in form of object
        iherited from DiffBase. This object contains all information
        about changes. See format() method.
        
        '''
        src_schemas = self.src.schemas
        dst_schemas = self.dst.schemas
        src = [x.name for x in src_schemas.values() if not x.system and self._test_filter(x.name)]
        dst = [x.name for x in dst_schemas.values() if not x.system and self._test_filter(x.name)]
        for nd in self._diffnames(src, dst):
            sdo = DiffSchema(change=nd[0], schema=nd[1])
            if nd[0] == '*':
                src_tables = src_schemas[nd[1]].tables
                dst_tables = dst_schemas[nd[1]].tables
                for tdo in self._difftables(nd[1], src_tables, dst_tables):
                    if sdo:
                        yield sdo
                        sdo = None
                    yield tdo
            else:
                yield sdo

    def print_diff(self):
        '''Print diff between src and dst database schema.
        
        The output is in human readable form.
        
        Set allowcolor=True of PgDiff instance to get colored output. 
        
        '''
        for ln in self.iter_diff():
            print(ln.format())
        
    def print_patch(self):
        '''Print patch for updating from src schema to dst schema.
        
        Supports table drop, add, column drop, add and following
        changes of columns:
          - type
          - set/remove not null
          - default value
        
        This is experimental, not tested very much.
        Do not use without checking the commands.
        Even if it works as intended, it can cause table lock ups
        and/or loss of data. You have been warned.
        
        '''
        for ln in self.iter_diff():
            print(ln.format_patch())

    def filter_schemas(self, include=[], exclude=[]):
        '''Modify list of schemas which are used for computing diff.
        
        include (list) -- if not empty, consider only these schemas for diff
        exclude (list) -- exclude these schemas from diff
        
        Order: include, exclude
        include=[] means include everything
        '''
        self.include_schemas.clear()
        self.include_schemas.update(include)
        self.exclude_schemas.clear()
        self.exclude_schemas.update(exclude)


    def filter_tables(self, include=[], exclude=[]):
        self.include_tables.clear()
        self.include_tables.update(include)
        self.exclude_tables.clear()
        self.exclude_tables.update(exclude)