Source code for deriva.utils.catalog.manage.graph_catalog

from __future__ import print_function

import os
from urllib.parse import urlparse

from graphviz import Digraph

[docs]class DerivaCatalogToGraph: def __init__(self, catalog, engine='dot'): self.graph = Digraph( engine=engine, format='pdf', edge_attr=None, strict=True, ) self.catalog = catalog self._model = catalog.getCatalogModel() self._chaise_base = "https://{}/chaise/recordset/#{}/".format( urlparse(catalog.get_server_uri()).netloc, self.catalog.catalog_id) self.graph.attr('graph', rankdir='LR') self.graph.attr('graph', overlap='false', splines='true') #self.graph.attr('graph', concentrate=True)
[docs] def clear(self): self.graph.clear()
[docs] def view(self): self.graph.view()
[docs] def catalog_to_graph(self, schemas=None, skip_terms=False, skip_association_tables=False): """ Convert a catalog to a DOT based graph. :param schemas: List of schemas that should be included. Use whole catalog if None. :param skip_terms: Do not include term tables in the graph :param skip_association_tables: Collapse association tables so that only edges between endpoints are used :return: """ schemas = [s.name for s in self._model.schemas.values() if s.name not in ['_acl_admin', 'public', 'WWW']] \ if schemas is None else schemas for schema in schemas: self.schema_to_graph(schema, skip_terms=skip_terms, schemas=schemas, skip_association_tables=skip_association_tables)
[docs] def schema_to_graph(self, schema_name, schemas=[], skip_terms=False, skip_association_tables=False): """ Create a graph for the specified schema. :param schema_name: Name of the schema in the model to be used. :param schemas: List of additional schemas to include in the graph. :param skip_terms: :param skip_association_tables: :return: """ schema = self._model.schemas[schema_name] # Put nodes for each schema in a seperate subgraph. with self.graph.subgraph(name='cluster_' + schema_name, node_attr={'shape': 'box'}) as schema_graph: schema_graph.attr(style='invis') for table in schema.tables.values(): node_name = '{}_{}'.format(schema_name, table.name) if DerivaCatalogToGraph._is_vocabulary_table(table): if not skip_terms: schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name), shape='ellipse', URL=self._chaise_uri(table)) else: # Skip over current table if it is a association table and option is set. if not (table.is_association() and skip_association_tables): schema_graph.node(node_name, label='{}:{}'.format(schema_name, table.name), shape='box', URL=self._chaise_uri(table)) else: print('Skipping node', node_name) # We have all the nodes out now, so run over and add edges. for table in schema.tables.values(): self.foreign_key_defs_to_graph(table, skip_terms=skip_terms, schemas=schemas, skip_association_tables=skip_association_tables) return
[docs] def foreign_key_defs_to_graph(self, table, skip_terms=False, skip_association_tables=False, schemas=[]): """ Add edges for each foreign key relationship in the specified table. :param table: :param skip_terms: :param skip_association_tables: :param skip_schemas: :return: """ # If table is an association table, put in a edge between the two endpoints in the relation. if table.is_association() == 2 and skip_association_tables: t1 = table.foreign_keys[0].referenced_columns[0].table t2 = table.foreign_keys[1].referenced_columns[0].table t1_name = '{}_{}'.format(t1.schema.name, t1.name) t2_name = '{}_{}'.format(t2.schema.name, t2.name) self.graph.edge(t1_name, t2_name, dir='both', color='gray') else: for fkey in table.foreign_keys: referenced_table = list(fkey.column_map.values())[0].table table_name = '{}_{}'.format(referenced_table.schema.name, referenced_table.name) # If the target is a schema we are skipping, do not add an edge. if (referenced_table.schema.name not in schemas or table.schema.name not in schemas): continue # If the target is a term table, and we are not including terms, do not add an edge. if DerivaCatalogToGraph._is_vocabulary_table(referenced_table) and skip_terms: continue # Add an edge from the current node to the target table. self.graph.edge('{}_{}'.format(table.schema.name, table.name), table_name) return
[docs] def save(self, filename=None, format='pdf', view=False): (dir, file) = os.path.split(os.path.abspath(filename)) if 'gv' in format: self.graph.save(filename=file, directory=dir) else: print('dumping graph in file', file, format) self.graph.render(filename=file, directory=dir, view=view, cleanup=True, format=format)
def _repr_svg_(self): return self.graph._repr_svg_() @staticmethod def _is_vocabulary_table(t): if t.schema.name.lower() in 'vocabulary': return True try: return t.columns['ID'] and t.columns['Name'] and t.columns['URI'] and t.columns['Synonyms'] except KeyError: return False def _chaise_uri(self, table): return self._chaise_base + "{}:{}".format(table.schema.name, table.name)