Source code for deriva.utils.catalog.components.configure_catalog
import logging
from requests import exceptions
from urllib.parse import urlparse
import deriva.core.ermrest_model as em
from deriva.core import tag as chaise_tags
from deriva.utils.catalog.components.deriva_model import DerivaCatalog, DerivaSchema, DerivaColumn, \
DerivaTable, DerivaContext, DerivaKey, DerivaForeignKey, DerivaCatalogError, DerivaModel
logger = logging.getLogger(__name__)
chaise_tags.catalog_config = 'tag:isrd.isi.edu,2019:catalog-config'
class DerivaConfigError(Exception):
def __init__(self, msg):
self.msg = msg
[docs]class DerivaCatalogConfigure(DerivaCatalog):
"""
DerovaCatalogConfigure extends DerivaCatalog by providing methods for creating a default configuration for a
catalog.
The only method of interest in this class is configure_baseline_catalog.
"""
def __init__(self, host, scheme='https', catalog_id=1, validate=True):
super().__init__(host, scheme=scheme, catalog_id=catalog_id, validate=validate)
def _make_schema_instance(self, schema_name):
return DerivaSchemaConfigure(self, schema_name)
def _configure_ermrest_client(self, groups):
"""
Set up ermrest_client table so that it has readable names and uses the display name of the user as the row name.
:param groups:
:return:
"""
ermrest_client = self.catalog['public']['ERMrest_Client']
# Make ermrest_client table visible. If the GUID or member name is considered sensitivie, then this needs to be
# changed.
ermrest_client.acls['select'] = ['*']
# Set table and row name.
ermrest_client.annotations.update({
chaise_tags.display: {'name': 'Users'},
chaise_tags.visible_columns: {'compact': ['ID', 'Full_Name', 'Display_Name', 'Email']},
chaise_tags.table_display: {'row_name': {'row_markdown_pattern': '{{{Full_Name}}}'}}
})
column_annotations = {
'RCT': {chaise_tags.display: {'name': 'Creation Time'}},
'RMT': {chaise_tags.display: {'name': 'Modified Time'}},
'RCB': {chaise_tags.display: {'name': 'Created By'}},
'RMB': {chaise_tags.display: {'name': 'Modified By'}}
}
for k, v in column_annotations.items():
ermrest_client.column(k).annotations.update(v)
return
def _configure_www_schema(self):
"""
Set up a new schema and tables to hold web-page like content. The tables include a page table, and a asset
table that can have images that are referred to by the web page. Pages are written using markdown.
:return:
"""
self.logger.info('Configuring WWW schema')
# Create a WWW schema if one doesn't already exist.
try:
www_schema = self.create_schema('WWW', comment='Schema for tables that will be displayed as web content')
except DerivaCatalogError as e:
if 'already exists' not in e.args[0]:
raise
else:
www_schema = self['WWW']
# Create the page table
try:
www_schema.create_table(
'Page',
column_defs=[
DerivaColumn.define('Title', 'text', nullok=False, comment='Unique title for the page'),
DerivaColumn.define('Content', 'markdown', comment='Content of the page in markdown')
],
key_defs=[DerivaKey.define(['Title'])],
annotations={
chaise_tags.table_display: {'detailed': {'hide_column_headers': True, 'collapse_toc_panel': True}
},
chaise_tags.visible_foreign_keys: {'detailed': {}},
chaise_tags.visible_columns: {'detailed': ['Content']}}
)
except DerivaCatalogError as e:
if 'already exists' not in e.args[0]:
raise
table = DerivaTableConfigure(self, 'WWW', 'Page')
table.configure_table_defaults()
# Now set up the asset table
try:
table.create_asset_table('RID')
except ValueError as e:
if 'already exists' not in e.args[0]:
raise
return self
def _set_core_groups(self, catalog_name=None, admin=None, curator=None, writer=None, reader=None,
replace=False):
"""
Look in the catalog to get the group IDs for the four core groups used in the baseline configuration. There are
three options: 1) core group name can be provided explicitly, 2) group name can be formed from a catalog
name and a default group name, 3) group name can be formed from the host name and a default group name.
:param catalog_name: Name of the catalog to use as a prefix in looking up default name of the group. Default
group names are formed by combining the catalog_name with the standard group name: e.g. foo-admin
foo-writer, and foo-reader
:param admin: Group name to use in place of default
:param curator: Group name to use in place of default
:param writer: Group name to use in lace of default
:param reader: Either '*' for anonymous read access, or the group name to use in place of default
:param replace: Ignore existing catalog config and use provided arguements.
:return: dictionary with the four group ids.
"""
groups = {}
# Get previous catalog configuration values if they exist
if chaise_tags.catalog_config in self.annotations and not replace:
groups.update({
'admin': self.annotations[chaise_tags.catalog_config]['groups']['admin'],
'curator': self.annotations[chaise_tags.catalog_config]['groups']['curator'],
'writer': self.annotations[chaise_tags.catalog_config]['groups']['writer'],
'reader': self.annotations[chaise_tags.catalog_config]['groups']['reader']
})
else:
if admin == '*' or curator == '*' or writer == '*':
raise DerivaConfigError(msg='Only reader may be anonymous when setting core catalog groups')
if not catalog_name and (admin is None or curator is None or writer is None or reader is None):
raise DerivaConfigError(msg='Catalog name required to look up group')
if admin is None:
admin = catalog_name + '-admin'
if curator is None:
curator = catalog_name + '-curator'
if writer is None:
writer = catalog_name + '-writer'
if reader is None:
reader = catalog_name + '-reader'
pb = self.getPathBuilder()
catalog_groups = {i['Display_Name']: i for i in pb.public.ERMrest_Group.entities()}
groups = {}
try:
groups.update({
'admin': catalog_groups[admin]['ID'],
'curator': catalog_groups[curator]['ID'],
'writer': catalog_groups[writer]['ID'],
'reader': catalog_groups[reader]['ID'] if reader != '*' else '*'
})
except KeyError as e:
raise DerivaConfigError(msg='Group {} not defined'.format(e.args[0]))
return groups
def _configure_group_table(self, groups):
"""
Create a table in the public schema for tracking mapping of group names.
:param groups:
:return:
"""
logging.info('Configuring groups')
ermrest_group = self['public']['ERMrest_Group']
# Make ERMrest_Group table visible to writers, curators, and admins.
ermrest_group.acls['select'] = [groups['writer'], groups['curator'], groups['admin']]
# Set table and row name.
ermrest_group.annotations.update({
chaise_tags.display: {'name': 'Globus Group'},
chaise_tags.visible_columns: {'*': ['Display_Name', 'Description', 'URL', 'ID']},
chaise_tags.table_display: {'row_name': {'row_markdown_pattern': '{{{Display_Name}}}'}}
})
# Set compound key so that we can link up with Visible_Group table.
try:
ermrest_group.create_key(['ID', 'URL', 'Display_Name', 'Description'], comment='Group ID is unique.')
except DerivaCatalogError:
pass
# Create a catalog groups table
column_defs = [
DerivaColumn.define('Display_Name', em.builtin_types['text']),
DerivaColumn.define('URL', em.builtin_types['text'],
annotations={
chaise_tags.column_display: {
'*': {'markdown_pattern': '[**{{Display_Name}}**]({{{URL}}})'}},
chaise_tags.display: {'name': 'Group Management Page'}
}
),
DerivaColumn.define('Description', em.builtin_types['text']),
DerivaColumn.define('ID', em.builtin_types['text'], nullok=False)
]
key_defs = [
DerivaKey.define(['ID']),
DerivaKey.define(['ID', 'URL', 'Display_Name', 'Description'],
comment='Key to ensure that group only is entered once.'
),
]
# Set up a foreign key to the group table so that the creator of a record can only select
# groups of which they are members of for values of the Owners column.
fkey_group_policy = {
# FKey to group can be created only if you are a member of the group you are referencing
'set_owner': {"types": ["insert"],
"projection": ["ID"],
"projection_type": "acl"}
}
# Allow curators to also update the foreign key.
fkey_group_acls = {"insert": [groups['curator']], "update": [groups['curator']]}
# Create a foreign key to the group table. Set update policy to keep group entry in sync.
fkey_defs = [
DerivaForeignKey.define(['ID', 'URL', 'Display_Name', 'Description'],
ermrest_group, ['ID', 'URL', 'Display_Name', 'Description'],
on_update='CASCADE',
acls=fkey_group_acls,
acl_bindings=fkey_group_policy,
)
]
# Create the visible groups table. Set ACLs so that writers or curators can add entries or edit. Allow writers
# to be able to create new entries. No one is allowed to update, as this is only done via the CASCADE.
# Get or create Catalog_Group table....
try:
self['public'].create_table(
'Catalog_Group', column_defs=column_defs,
key_defs=key_defs, fkey_defs=fkey_defs,
comment=None,
acls={
# Make ERMrest_Group table visible to members of the group members, curators, and admins.
'select': [groups['reader']],
'insert': [groups['writer'],
groups['curator']]
},
acl_bindings={},
annotations={
chaise_tags.table_display: {
'row_name': {'row_markdown_pattern': '{{{Display_Name}}}'}}},
)
except ValueError as e:
if 'already exists' not in e.args[0]:
raise
table = DerivaTableConfigure(self, 'public', 'Catalog_Group')
table.configure_table_defaults(set_policy=False)
return
[docs] def configure_baseline_catalog(self,
catalog_name=None,
admin=None, curator=None, writer=None, reader=None,
set_policy=True,
public=False):
"""
Put catalog into standard configuration which includes:
1. Setting default display mode to be to turn underscores to spaces.
2. Set access control assuming admin, curator, writer, and reader groups.
3. Configure ermrest_client to have readable names.
4. Create a schema called *WWW* and create a *Page* table in that schema configured to display web-page like
content.
:param catalog_name: Name to use when looking up catalog groups. Defaults to host name if not provided.
:param admin: Name of the admin group. Defaults to catalog-admin, where catalog is the catalog_name
:param curator: Name of the curator group. Defaults to catalog-curator
:param writer: Name of the writer group. Defaults to catalog-writer
:param reader: Name of the reader group. Defaults to catalog-reader
:param set_policy: Set policy for catalog to support reader/writer/curator/admin groups.
:param public: Set to true if anonymous read access should be allowed.
"""
with DerivaModel(self.catalog):
if not catalog_name:
# If catalog name is not provided, default to the host name of the host.
catalog_name = urlparse(self.ermrest_catalog.get_server_uri()).hostname.split('.')[0]
groups = self._set_core_groups(catalog_name=catalog_name,
admin=admin, curator=curator, writer=writer, reader=reader)
# Record configuration of catalog so we can retrieve when we configure tables later on.
self.annotations[chaise_tags.catalog_config] = {'name': catalog_name, 'groups': groups}
# Set up default name style for all schemas.
for s in self.schemas:
s.annotations[chaise_tags.display] = {'name_style': {'underline_space': True}}
# modify catalog ACL config to support basic admin/curator/writer/reader access.
if set_policy:
self.acls.update({
"owner": [groups['admin']],
"insert": [groups['curator'], groups['writer']],
"update": [groups['curator']],
"delete": [groups['curator']],
"select": [groups['writer'], groups['reader']] if not public else ['*'],
"enumerate": ["*"],
})
self._configure_ermrest_client(groups)
self._configure_group_table(groups)
self._configure_www_schema()
def update_group_table(catalog):
def group_urls(group):
guid = group.split('/')[-1]
link = 'https://app.globus.org/groups/' + guid
uri = 'https://auth.globus.org/' + guid
return link, uri
pb = catalog.getPathBuilder()
# Attempt to add URL. This can go away once we have URL entered by ERMrest.
pb.public.ERMrest_Group.update(
[{'RID': i['RID'], 'URL': group_urls(i['ID'])[0]} for i in pb.public.ERMrest_Group.entities()]
)
class DerivaSchemaConfigure(DerivaSchema):
"""
This is a shim class that is used to make sure that a DerivaCatalogConfigure returns a DerivaTableConfigure
"""
def __init__(self, catalog, schema_name):
super(DerivaSchemaConfigure, self).__init__(catalog, schema_name)
def _make_table_instance(self, schema_name, table_name):
return DerivaTableConfigure(self.catalog, schema_name, table_name)
[docs]class DerivaTableConfigure(DerivaTable):
"""
This class is used to create a default configuration for a Deriva Table.
"""
def __init__(self, catalog, table):
super(DerivaTableConfigure, self).__init__(catalog, table)
return
[docs] def configure_self_serve_policy(self, groups):
"""
Set up a table so it has a self service policy. Add an owner column if one is not present, and set the acl
binding so that it follows the self service policy.
:param groups: dictionary of core catalog groups
:return:
"""
# Configure table so that access can be assigned to a group. This requires that we create a column and
# establish a foreign key to an entry in the group table. We will set the access control on the foreign key
# so that you are only able to delagate access to a the creator of the entity belongs to.
if 'Owner' not in self.columns:
self.create_columns(DerivaColumn.define('Owner', 'text', comment='Group that can update the record.'))
# Now configure the policy on the table...
self_service_policy = {
# Set up a policy for the table that allows the creator of the record to update and delete the record.
"self_service_creator": {
"types": ["update", 'delete'],
"projection": ["RCB"],
"projection_type": "acl"
},
# Set up a policy for the table that allows members of the group referenced by the Owner column to
# update and delete the record.
'self_service_group': {
"types": ["update", "delete"],
"projection": ["Owner"],
"projection_type": "acl"
}
}
# Make table policy be self service, creators and owners can update.
self.acl_bindings.update(self_service_policy)
# Set up a foreign key to the group table on the owners column so that the creator of a record can only
# select groups of which they are members of for values of the Owners column.
fkey_group_policy = {
# FKey to group can be created only if you are a member of the group you are referencing
'set_owner': {"types": ["update", "insert"],
"projection": ["ID"],
"projection_type": "acl"}
}
# Allow curators to also update the foreign key.
fkey_group_acls = {"insert": [groups['curator']], "update": [groups['curator']]}
# Delete old fkey if there is one laying around....
for fk in self.foreign_keys:
if len(fk.columns) == 1 and next(iter(fk.columns)).name == 'Owner':
fk.delete()
# Now create the foreign key to the group table.
self.create_foreign_key(['Owner'],
self.catalog['public']['Catalog_Group'], ['ID'],
acls=fkey_group_acls, acl_bindings=fkey_group_policy,
)
return
[docs] def create_default_visible_columns(self, really=False):
column_sources, outbound_sources, inbound_sources = self.sources(merge_outbound=True)
location = {'RCB': ['Owner']} if 'Owner' in self.columns else {}
# Don't overwrite existing annotations if they are already in place.
if chaise_tags.visible_columns not in self.annotations:
self.annotations[chaise_tags.visible_columns] = {}
positions = {}
if '*' not in self.annotations[chaise_tags.visible_columns] or really:
positions.update({DerivaContext('*'): location})
self.visible_columns.insert_context(DerivaContext('*'), column_sources)
if 'entry' not in self.annotations[chaise_tags.visible_columns] or really:
positions.update({DerivaContext('entry'): location})
self.visible_columns.insert_context(DerivaContext('entry'), column_sources)
self.visible_columns.reorder_visible_source(positions)
[docs] def create_default_visible_foreign_keys(self, really=False):
_, _, inbound_sources = self.sources()
self.logger.debug('visible_fkeys {}'.format(inbound_sources))
# Don't overwrite existing annotations if they are already in place.
if chaise_tags.visible_foreign_keys not in self.annotations:
self.annotations[chaise_tags.visible_foreign_keys] = {}
if '*' not in self.annotations[chaise_tags.visible_foreign_keys] or really:
self.visible_foreign_keys.insert_context(DerivaContext('*'), inbound_sources)
[docs] def configure_table_defaults(self, set_policy=True, public=False, reset_visible_columns=True):
"""
This function adds the following basic configuration details to an existing table:
1. Creates a self service modification policy in which creators can update update any row they create.
Optionally, an Owner column can be provided, which allows the creater of a row to delegate row ownership to
a specific group.
2. Adds display annotations and foreign key declarations so that system columns RCB, RMB display in a user
friendly way.
3. Adds a visible_foreign_keys and visible_columns annotation.
:param set_policy: If true, then configure the table to have a self service policy
:param public: Make table acessible without logging in.
:param reset_visible_columns: Overwrite any existing visible_columns annotation.
"""
if chaise_tags.catalog_config not in self.catalog.annotations:
raise DerivaConfigError(msg='Attempting to configure table before catalog is configured')
# Hack to update description and URL until we get these passed through ermrest....
update_group_table(self.catalog)
if public:
# First copy over any inherited ACLS.
self.acls.update({**self.schema.acls, **self.acls})
self.acls.pop("create", None)
# Now add permision for anyone to read.
self.acls['select'] = ['*']
if set_policy:
self.configure_self_serve_policy(self.catalog.get_groups())
# Configure schema if not already done so.
if chaise_tags.display not in self.schema.annotations:
self.schema.annotations[chaise_tags.display] = {}
if 'name_style' not in self.schema.annotations[chaise_tags.display]:
self.schema.annotations[chaise_tags.display].update({'name_style': {'underline_space': True}})
# Set up foreign key to ermrest_client on RCB, RMB and Owner. If ermrest_client is configured, the
# full name of the user will be used for the FK value.
for col, display in [('RCB', 'Created By'), ('RMB', 'Modified By')]:
# Delete old fkey if there is one laying around....
for fk in self.foreign_keys:
if len(fk.columns) == 1 and next(iter(fk.columns)).name == col:
fk.delete()
self.create_foreign_key([col], self.catalog['public']['ERMrest_Client'], ['ID'])
# Add a display annotation so that we have sensible name for RCB and RMB.
self.columns[col].annotations[chaise_tags.display] = {'name': display}
self.columns['RCT'].annotations.update({chaise_tags.display: {'name': 'Creation Time'}})
self.columns['RMT'].annotations.update({chaise_tags.display: {'name': 'Modified Time'}})
self.create_default_visible_columns(really=reset_visible_columns)