Source code for deriva.core.polling_ermrest_catalog

import sys
import json
import pika
import time
from . import NotModified, ConcurrentUpdate
from .ermrest_catalog import ErmrestCatalog

[docs]class PollingErmrestCatalog(ErmrestCatalog): """Persistent handle for an ERMrest catalog. Provides a higher-level state_change_once() idiom to efficiently find candidate rows, transform them, and apply updates. Provides a higher-level blocking_poll() idiom to efficiently poll a catalog, using AMQP to optimize polling where possible. (AMQP is currently limited to clients on localhost of catalog in practice.) These features can be composed to implement condition-action agents with domain-specific logic, e.g. catalog = ErmrestCatalog(...) idle_etag = None def look_for_work(): global idle_etag idle_etag, batch = catalog.state_change_once( # claim up to 5 items per batch '/entity/Foo/state=actionable?limit=5', '/attributegroup/Foo/id;state', lambda row: {'id': row['id'], 'state': 'claimed'}, idle_etag ) for candidate, update in batch: # assume we have free reign on claimed candidates # using state=claimed as a semaphore revision = candidate.copy() revision['state'] = update['state'] ... # do agent work revision['state'] = 'complete' catalog.put('/entity/Foo', [revision]) catalog.blocking_poll(look_for_work) """ def __init__(self, scheme, server, catalog_id, credentials={}, caching=True, session_config=None, amqp_server=None): """Create ERMrest catalog binding. Arguments: scheme: 'http' or 'https' server: server FQDN string catalog_id: e.g. '1' credentials: credential secrets, e.g. cookie caching: whether to retain a GET response cache """ ErmrestCatalog.__init__(self, scheme, server, catalog_id, credentials, caching, session_config) self.amqp_server = amqp_server if amqp_server else server self.amqp_connection = None self.notice_exchange = "ermrest_changes" def _amqp_bind(self): """Bind or rebind to AMQP for change notice monitoring.""" if self.amqp_connection is not None: try: self.amqp_connection.close() except: pass self.amqp_connection = pika.BlockingConnection( pika.ConnectionParameters( host=self.amqp_server ) ) # listening channel for ermrest change notifications self.notice_channel = try: # newer pika API self.notice_channel.exchange_declare(self.notice_exchange, exchange_type='fanout') self.notice_queue_name = self.notice_channel.queue_declare('', exclusive=True).method.queue self.notice_channel.queue_bind(self.notice_queue_name, self.notice_exchange) except TypeError as te: # try older API as fallback self.notice_channel.exchange_declare(exchange=self.notice_exchange, type='fanout') self.notice_queue_name = self.notice_channel.queue_declare(exclusive=True).method.queue self.notice_channel.queue_bind(exchange=self.notice_exchange, queue=self.notice_queue_name) sys.stderr.write('ERMrest change-notice channel open.\n') @staticmethod def _run_notice_event(look_for_work): """Consume all available work before returning.""" while True: try: found = look_for_work() if not found: break except ConcurrentUpdate as e: # retry if we had a race-condition while claiming work sys.stderr.write('Handling ErmrestConcurrentUpdate exception...\n') pass
[docs] def blocking_poll(self, look_for_work, polling_seconds=600, coalesce_seconds=0.1): """Use ERMrest change-notice monitoring to optimize polled work processing. Client-provided look_for_work function finds actual work in ERMrest and processes it. We only optimize the scheduling of this work. Run look_for_work() whenever there *might* be more work in ERMrest. If look_for_work() returns True, assume there is more work. If look_for_work() returns non-True, wait for ERMrest change-notice or polling_seconds timeout before looking again (whichever comes first). On any change-monitoring communication error, assume there might be more work and restart the monitoring process. Other exceptions abort the blocking_poll() call. """ amqp_failed_at = None amqp_retry_count = 0 last_notice_event = 0 def next_poll_time(): return max( 1, polling_seconds - (time.time() - last_notice_event) ) def next_amqp_time(): if amqp_failed_at is None: return 0 return max( 0, 5**amqp_retry_count # exponential backoff 5s ... ~21h - (time.time() - amqp_failed_at) ) while True: try: if (self.amqp_connection is None or not self.amqp_connection.is_open) \ and next_amqp_time() <= next_poll_time(): # initialize AMQP (unless we're in a cool-down period) time.sleep(next_amqp_time()) self._amqp_bind() polling_gen = self.notice_channel.consume( self.notice_queue_name, exclusive=True, inactivity_timeout=polling_seconds ) coalesce_gen = self.notice_channel.consume( self.notice_queue_name, exclusive=True, inactivity_timeout=coalesce_seconds ) amqp_failed_at = None amqp_retry_count = 0 sys.stderr.write('Using AMQP hybrid polling.\n') # drain any pre-existing work that won't fire an AMQP event for us self._run_notice_event(look_for_work) last_notice_event = time.time() if self.amqp_connection and self.amqp_connection.is_open: # wait for AMQP event or timeout to wake us for result in polling_gen: sys.stderr.write('Woke up on %s.\n' % ('change-notice' if result else 'poll timeout')) # ... and delay for up to coalesce_seconds to combine multiple notices into one wakeup while next(coalesce_gen)[0] is not None: pass # run once per wakeup self._run_notice_event(look_for_work) last_notice_event = time.time() else: # wait for next poll deadline and run once time.sleep(next_poll_time()) self._run_notice_event(look_for_work) last_notice_event = time.time() except pika.exceptions.AMQPConnectionError as e: if amqp_failed_at is None: sys.stderr.write('Using basic polling due to AMQP communication problems.\n') self.amqp_connection = None amqp_failed_at = time.time() if amqp_retry_count < 6: # don't let retry exponent get bigger than 7... amqp_retry_count += 1 except Exception as e: sys.stderr.write('Got error %s in main event loop.' % e) raise
[docs] def state_change_once(self, query_datapath, update_datapath, row_transform_func, idle_etag=None): """Perform generic conditional state update via GET-PUT sequence. Arguments: query_datapath: a query for candidate rows update_datapath: an update to consume update rows row_transform_func: maps candidate to update rows idle_etag: no-op if table is still in this state Returns: (idle_etag, [(candidate, update)...]) idle_etag: value to thread to future calls [(candidate, update)...]: each row that was updated Exceptions from the transform or update process will abort without returning results. 1. GET query_datapath to get candidate row(s) 2. apply row_transform_func(row) to get updated content 3. PUT update_datapath to deliver transformed content -- discards rows transformed to None Uses opportunistic concurrency control with ETag, If-Match, etc. for safety. """ try: before = self.get(query_datapath, raise_not_modified=True) except NotModified as e: before = self._cache[self._server_uri + query_datapath] if idle_etag is not None: if before.headers['etag'] == idle_etag: sys.stderr.write('No new state to process.\n') return idle_etag, [] rows_before = before.json() if not rows_before: sys.stderr.write('No candidate rows found.\n') plan = [(row, row_transform_func(row)) for row in rows_before] plan = [(candidate, update) for candidate, update in plan if update is not None] if not plan: sys.stderr.write('No row updates requested.\n') return before.headers.get('etag'), [] after = self.put( update_datapath, json=[update for candidate, update in plan], guard_response=before ) sys.stderr.write('Updated %d rows in catalog:\n' % len(after.json())) json.dump(plan, sys.stderr, indent=2) sys.stderr.write('\n') return after.headers.get('etag'), plan