Source code for deriva.utils.catalog.manage.graph_catalog

from __future__ import print_function

import os
from urllib.parse import urlparse

from graphviz import Digraph

[docs]class DerivaCatalogToGraph:
    def __init__(self, catalog, engine='dot'):
        self.graph = Digraph(
            engine=engine,
            format='pdf',
            edge_attr=None,
            strict=True,
        )

        self.catalog = catalog
        self._model = catalog.getCatalogModel()
        self._chaise_base = "https://{}/chaise/recordset/#{}/".format(
            urlparse(catalog.get_server_uri()).netloc, self.catalog.catalog_id)

        self.graph.attr('graph', rankdir='LR')
        self.graph.attr('graph', overlap='false', splines='true')
        #self.graph.attr('graph', concentrate=True)

[docs]    def clear(self):
        self.graph.clear()

[docs]    def view(self):
        self.graph.view()

[docs]    def catalog_to_graph(self, schemas=None, skip_terms=False, skip_association_tables=False):
        """
        Convert a catalog to a DOT based graph.
        :param schemas:  List of schemas that should be included.  Use whole catalog if None.
        :param skip_terms: Do not include term tables in the graph
        :param skip_association_tables: Collapse association tables so that only edges between endpoints are used
        :return:
        """

        schemas = [s.name for s in self._model.schemas.values() if s.name not in ['_acl_admin', 'public', 'WWW']] \
            if schemas is None else schemas

        for schema in schemas:
            self.schema_to_graph(schema, skip_terms=skip_terms, schemas=schemas,
                                 skip_association_tables=skip_association_tables)

[docs]    def schema_to_graph(self, schema_name, schemas=[], skip_terms=False, skip_association_tables=False):
        """
        Create a graph for the specified schema.
        :param schema_name: Name of the schema in the model to be used.
        :param schemas: List of additional schemas to include in the graph.
        :param skip_terms:
        :param skip_association_tables:
        :return:
        """

        schema = self._model.schemas[schema_name]

        # Put nodes for each schema in a seperate subgraph.
        with self.graph.subgraph(name='cluster_' + schema_name, node_attr={'shape': 'box'}) as schema_graph:
            schema_graph.attr(style='invis')
            for table in schema.tables.values():
                node_name = '{}_{}'.format(schema_name, table.name)
                if DerivaCatalogToGraph._is_vocabulary_table(table):
                    if not skip_terms:
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='ellipse',
                                          URL=self._chaise_uri(table))
                else:
                    # Skip over current table if it is a association table and option is set.
                    if not (table.is_association() and skip_association_tables):
                        schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name),
                                          shape='box',
                                          URL=self._chaise_uri(table))
                    else:
                        print('Skipping node', node_name)

        # We have all the nodes out now, so run over and add edges.
        for table in schema.tables.values():
            self.foreign_key_defs_to_graph(table,
                                           skip_terms=skip_terms,
                                           schemas=schemas,
                                           skip_association_tables=skip_association_tables)
        return

[docs]    def foreign_key_defs_to_graph(self, table, skip_terms=False, skip_association_tables=False, schemas=[]):
        """
        Add edges for each foreign key relationship in the specified table.
        :param table:
        :param skip_terms:
        :param skip_association_tables:
        :param skip_schemas:
        :return:
        """

        # If table is an association table, put in a edge between the two endpoints in the relation.
        if table.is_association() == 2 and skip_association_tables:
            t1 = table.foreign_keys[0].referenced_columns[0].table
            t2 = table.foreign_keys[1].referenced_columns[0].table
            t1_name = '{}_{}'.format(t1.schema.name, t1.name)
            t2_name = '{}_{}'.format(t2.schema.name, t2.name)
            self.graph.edge(t1_name, t2_name, dir='both', color='gray')
        else:
            for fkey in table.foreign_keys:
                referenced_table = list(fkey.column_map.values())[0].table
                table_name = '{}_{}'.format(referenced_table.schema.name, referenced_table.name)

                # If the target is a schema we are skipping, do not add an edge.
                if (referenced_table.schema.name not in schemas or table.schema.name not in schemas):
                    continue
                # If the target is a term table, and we are not including terms, do not add an edge.
                if DerivaCatalogToGraph._is_vocabulary_table(referenced_table) and skip_terms:
                    continue

                # Add an edge from the current node to the target table.
                self.graph.edge('{}_{}'.format(table.schema.name, table.name), table_name)

        return

[docs]    def save(self, filename=None, format='pdf', view=False):
        (dir, file) = os.path.split(os.path.abspath(filename))
        if 'gv' in format:
            self.graph.save(filename=file, directory=dir)
        else:
            print('dumping graph in file', file, format)
            self.graph.render(filename=file, directory=dir, view=view, cleanup=True, format=format)

    def _repr_svg_(self):
        return self.graph._repr_svg_()

    @staticmethod
    def _is_vocabulary_table(t):
        if t.schema.name.lower() in 'vocabulary':
            return True
        try:
            return t.columns['ID'] and t.columns['Name'] and t.columns['URI'] and t.columns['Synonyms']
        except KeyError:
            return False

    def _chaise_uri(self, table):
        return self._chaise_base + "{}:{}".format(table.schema.name, table.name)