pgtoolkit/pgdiff.py
author Radek Brich <radek.brich@devl.cz>
Thu, 24 Jan 2013 17:11:17 +0100
changeset 58 0bcc13460dae
parent 53 4a049a5af657
child 59 65efd0c6919f
permissions -rw-r--r--
PgBrowser: Add functions. PgDiff: Compare functions.

# -*- coding: utf-8 -*-
#
# PgDiff - capture differences of database metadata
#
# Depends on PgBrowser
#
# Copyright (c) 2011  Radek Brich <radek.brich@devl.cz>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.


from pgtoolkit.highlight import *


class DiffBase:
    COLORS = {
        '+' : BOLD | GREEN,
        '-' : BOLD | RED,
        '*' : BOLD | YELLOW,
    }

    COMMANDS = {
        '+' : 'CREATE',
        '-' : 'DROP',
        '*' : 'ALTER',
    }

    def __init__(self):
        self.changes = None

    def format(self):
        out = ['  ' * self.level]

        out.append(highlight(1, self.COLORS[self.change]))
        out.append(self.change)

        out += [' ', self.type, ' ', self.name, highlight(0)]

        if self.changes:
            out += [highlight(1, WHITE), ' (', self._formatchanges(), ')', highlight(0)]

        return ''.join(out)

    def _formatnotnull(self, notnull):
        if notnull:
            return 'NOT NULL'
        else:
            return None

    def _formatchanges(self):
        res = []
        for x in self.changes:
            type, a, b = x
            if type == 'notnull':
                type = ''
                a = self._formatnotnull(a)
                b = self._formatnotnull(b)

            if a and b:
                s = ''.join(['Changed ', type, ' from ',
                    highlight(1,15), a, highlight(0), ' to ',
                    highlight(1,15), b, highlight(0), '.'])
            elif a and not b:
                l = ['Removed ']
                if type:
                    l += [type, ' ']
                l += [highlight(1,15), a, highlight(0), '.']
                s = ''.join(l)
            elif b and not a:
                l = ['Added ']
                if type:
                    l += [type, ' ']
                l += [highlight(1,15), b, highlight(0), '.']
                s = ''.join(l)
            res.append(s)
        return ' '.join(res)

    def format_patch(self):
        if self.change == '*' and self.type in ('schema', 'table'):
            return None
        return ['%s %s %s;' % (self.COMMANDS[self.change], self.type.upper(), self.name)]


class DiffSchema(DiffBase):
    def __init__(self, change, schema):
        DiffBase.__init__(self)
        self.level = 0
        self.type = 'schema'
        self.change = change
        self.schema = schema
        self.name = schema


class DiffTable(DiffBase):
    def __init__(self, change, schema, table):
        DiffBase.__init__(self)
        self.level = 1
        self.type = 'table'
        self.change = change
        self.schema = schema
        self.table = table
        self.name = table


class DiffFunction(DiffBase):
    def __init__(self, change, schema, function):
        DiffBase.__init__(self)
        self.level = 1
        self.type = 'function'
        self.change = change
        self.schema = schema
        self.function = function
        self.name = function

    def _formatchanges(self):
        res = []
        for x in self.changes:
            type, a, b = x
            if type == 'source':
                s = 'Changed source.'
            else:
                s = ''.join(['Changed ', type, ' from ',
                    highlight(1,15), a, highlight(0), ' to ',
                    highlight(1,15), b, highlight(0), '.'])
            res.append(s)
        return ' '.join(res)


class DiffColumn(DiffBase):
    ALTER_COMMANDS = {
        '+' : 'ADD',
        '-' : 'DROP',
        '*' : 'ALTER',
    }

    def __init__(self, change, schema, table, column, columntype, columndefault, changes=None):
        DiffBase.__init__(self)
        self.level = 2
        self.type = 'column'
        self.change = change
        self.schema = schema
        self.table = table
        self.column = column
        self.columntype = columntype
        self.columndefault = columndefault
        self.name = column
        self.changes = changes

    def format_patch(self):
        if self.change == '*':
            type_statement = ' TYPE'
        else:
            type_statement = ''
        if self.columntype is not None:
            type_statement += ' ' + self.columntype;
        out = []
        out += ['ALTER TABLE %s.%s %s COLUMN %s%s;' % (
            self.schema,
            self.table,
            self.ALTER_COMMANDS[self.change],
            self.name,
            type_statement
        )]
        if self.columndefault:
            out += ['ALTER TABLE %s.%s ALTER COLUMN %s SET DEFAULT %s;' % (
                self.schema,
                self.table,
                self.name,
                self.columndefault
                )]
        return out


class DiffConstraint(DiffBase):
    def __init__(self, change, schema, table, constraint, definition, changes=None):
        DiffBase.__init__(self)
        self.level = 2
        self.type = 'constraint'
        self.change = change
        self.schema = schema
        self.table = table
        self.constraint = constraint
        self.name = constraint
        self.definition = definition
        self.changes = changes

    def format_patch(self):
        q_alter = 'ALTER TABLE %s.%s' % (self.schema, self.table)
        q_drop = '%s DROP CONSTRAINT %s;' % (q_alter, self.constraint)
        q_add = '%s ADD CONSTRAINT %s %s;' % (q_alter, self.constraint, self.definition)
        if self.change == '*':
            out = [q_drop, q_add]
        if self.change == '+':
            out = [q_add]
        if self.change == '-':
            out = [q_drop]
        return out


class PgDiff:
    def __init__(self, srcbrowser=None, dstbrowser=None):
        self.allowcolor = False
        self.src = srcbrowser
        self.dst = dstbrowser
        self.include_schemas = set()  # if not empty, consider only these schemas for diff
        self.exclude_schemas = set()  # exclude these schemas from diff
        self.include_tables = set()
        self.exclude_tables = set()

    def _test_schema(self, schema):
        if self.include_schemas and schema not in self.include_schemas:
            return False
        if schema in self.exclude_schemas:
            return False
        return True

    def _test_table(self, table):
        if self.include_tables and table not in self.include_tables:
            return False
        if table in self.exclude_tables:
            return False
        return True

    def _diff_names(self, src, dst):
        for x in src:
            if x in dst:
                yield ('*', x)
            else:
                yield ('-', x)
        for x in dst:
            if x not in src:
                yield ('+', x)

    def _compare_functions(self, a, b):
        diff = []
        if a.arguments != b.arguments:
            diff.append(('args', a.arguments, b.arguments))
        if a.result != b.result:
            diff.append(('result', a.result, b.result))
        if a.source != b.source:
            diff.append(('source', a.source, b.source))
        return diff

    def _compare_columns(self, a, b):
        diff = []
        if a.type != b.type:
            diff.append(('type', a.type, b.type))
        if a.notnull != b.notnull:
            diff.append(('notnull', a.notnull, b.notnull))
        if a.default != b.default:
            diff.append(('default', a.default, b.default))
        return diff

    def _compare_constraints(self, a, b):
        diff = []
        if a.type != b.type:
            diff.append(('type', a.type, b.type))
        if a.definition != b.definition:
            diff.append(('definition', a.definition, b.definition))
        return diff

    def _diff_columns(self, schema, table, src_columns, dst_columns):
        for nd in self._diff_names(src_columns, dst_columns):
            if nd[1] in dst_columns:
                dst_type = dst_columns[nd[1]].type
                dst_default = dst_columns[nd[1]].default
            else:
                dst_type = None
                dst_default = None
            cdo = DiffColumn(change=nd[0], schema=schema, table=table, column=nd[1],
                columntype=dst_type, columndefault=dst_default)
            if nd[0] == '*':
                a = src_columns[nd[1]]
                b = dst_columns[nd[1]]
                cdo.changes = self._compare_columns(a, b)
                if cdo.changes:
                    yield cdo
            else:
                yield cdo

    def _diff_constraints(self, schema, table, src_constraints, dst_constraints):
        for nd in self._diff_names(src_constraints, dst_constraints):
            if nd[1] in dst_constraints:
                dst_definition = dst_constraints[nd[1]].definition
            else:
                dst_definition = None
            cdo = DiffConstraint(change=nd[0], schema=schema, table=table, constraint=nd[1],
                definition=dst_definition)
            if nd[0] == '*':
                a = src_constraints[nd[1]]
                b = dst_constraints[nd[1]]
                cdo.changes = self._compare_constraints(a, b)
                if cdo.changes:
                    yield cdo
            else:
                yield cdo

    def _diff_tables(self, schema, src_tables, dst_tables):
        for nd in self._diff_names(src_tables, dst_tables):
            if not self._test_table(nd[1]):
                continue
            tdo = DiffTable(change=nd[0], schema=schema, table=nd[1])
            if nd[0] == '*':
                # columns
                src_columns = src_tables[nd[1]].columns
                dst_columns = dst_tables[nd[1]].columns
                for cdo in self._diff_columns(schema, nd[1], src_columns, dst_columns):
                    if tdo:
                        yield tdo
                        tdo = None
                    yield cdo
                # constraints
                src_constraints = src_tables[nd[1]].constraints
                dst_constraints = dst_tables[nd[1]].constraints
                for cdo in self._diff_constraints(schema, nd[1], src_constraints, dst_constraints):
                    if tdo:
                        yield tdo
                        tdo = None
                    yield cdo
            else:
                yield tdo

    def _diff_functions(self, schema, src_functions, dst_functions):
        for nd in self._diff_names(src_functions, dst_functions):
            fdo = DiffFunction(change=nd[0], schema=schema, function=nd[1])
            if nd[0] == '*':
                # compare function body and arguments
                a = src_functions[nd[1]]
                b = dst_functions[nd[1]]
                fdo.changes = self._compare_functions(a, b)
                if fdo.changes:
                    yield fdo
            else:
                yield fdo

    def iter_diff(self):
        '''Return diff between src and dst database schema.

        Yields one line at the time. Each line is in form of object
        iherited from DiffBase. This object contains all information
        about changes. See format() method.

        '''
        src_schemas = self.src.schemas
        dst_schemas = self.dst.schemas
        src = [x.name for x in src_schemas.values() if not x.system and self._test_schema(x.name)]
        dst = [x.name for x in dst_schemas.values() if not x.system and self._test_schema(x.name)]
        for nd in self._diff_names(src, dst):
            sdo = DiffSchema(change=nd[0], schema=nd[1])
            if nd[0] == '*':
                # tables
                src_tables = src_schemas[nd[1]].tables
                dst_tables = dst_schemas[nd[1]].tables
                for tdo in self._diff_tables(nd[1], src_tables, dst_tables):
                    if sdo:
                        yield sdo
                        sdo = None
                    yield tdo
                # functions
                src_functions = src_schemas[nd[1]].functions
                dst_functions = dst_schemas[nd[1]].functions
                for fdo in self._diff_functions(nd[1], src_functions, dst_functions):
                    if sdo:
                        yield sdo
                        sdo = None
                    yield fdo
            else:
                yield sdo

    def print_diff(self):
        '''Print diff between src and dst database schema.

        The output is in human readable form.

        Set allowcolor=True of PgDiff instance to get colored output.

        '''
        for ln in self.iter_diff():
            print(ln.format())

    def print_patch(self):
        '''Print patch for updating from src schema to dst schema.

        Supports table drop, add, column drop, add and following
        changes of columns:
          - type
          - set/remove not null
          - default value

        This is experimental, not tested very much.
        Do not use without checking the commands.
        Even if it works as intended, it can cause table lock ups
        and/or loss of data. You have been warned.

        '''
        for ln in self.iter_diff():
            patch = ln.format_patch()
            if patch:
                print('\n'.join(patch))

    def filter_schemas(self, include=[], exclude=[]):
        '''Modify list of schemas which are used for computing diff.

        include (list) -- if not empty, consider only these schemas for diff
        exclude (list) -- exclude these schemas from diff

        Order: include, exclude
        include=[] means include everything
        '''
        self.include_schemas.clear()
        self.include_schemas.update(include)
        self.exclude_schemas.clear()
        self.exclude_schemas.update(exclude)


    def filter_tables(self, include=[], exclude=[]):
        self.include_tables.clear()
        self.include_tables.update(include)
        self.exclude_tables.clear()
        self.exclude_tables.update(exclude)