Source code for bionic.cache_api

"""
Contains the Cache class, which provides a user-facing API for directly interacting
with Bionic's cache.
"""

from functools import total_ordering

from .descriptors.parsing import entity_dnode_from_descriptor
from .utils.misc import oneline
from .utils.urls import path_from_url, is_file_url


[docs]class Cache: """ A programmatic interface to Bionic's persistent cache. Accessible as an attribute named ``cache`` on a :class:`Flow <bionic.Flow>` object. Use ``get_entries`` to iterate through the set of cache entries. """ def __init__(self, deriver): # Currently we don't really need the entire deriver, just the persistent cache. # However, in the future I expect we'll use the deriver to figure out which # artifacts are relevant to the current flow, and in the meantime it's # convenient to have access to it so that we can call get_ready() at the last # minute. self._deriver = deriver
[docs] def get_entries(self): """ Returns a sequence of :class:`CacheEntry <bionic.cache_api.CacheEntry>` objects, one for each artifact in Bionic's persistent cache. Cached artifacts are stored by flow name, so this will include any artifacts generated by a flow with the same name as this one; this typically includes the current Flow object, as well as any older or modified versions. Artifacts are returned for all cache tiers that are enabled for the current flow. For example, if GCS caching is enabled, this method will return entities from both the "local" (on-disk") and "cloud" (GCS) tiers. """ # These private accesses are a bit gross, but I'm not sure it's worth adding # more layers of APIs to avoid them. self._deriver.get_ready() persistent_cache = self._deriver._core.persistent_cache stores = [ store for store in [persistent_cache._local_store, persistent_cache._cloud_store] if store is not None ] return ( CacheEntry( cache=self, inv_item=inv_item, ) for store in stores for inv_item in store.inventory.list_items() )
def path_from_url_if_file(url): """Converts a URL into a file path if it is a file URL; otherwise returns None.""" if is_file_url(url): return path_from_url(url) else: return None
[docs]@total_ordering class CacheEntry: """ Represents an artifact in Bionic's persistent cache. Has the following fields: - ``tier``: "local" or "cloud", depending on which tier of the cache the artifact is in. - ``entity``: the name of the cached entity, or ``None`` if the artifact is does not correspond to an entity - ``artifact_url``: a URL to the cached artifact file or blob - ``metadata_url``: a URL to the metadata file or blob describing the artifact - ``artifact_path``: a Path object locating the artifact file (if it's a local file) or None (if it's a cloud blob) - ``metadata_path``: a Path object locating the metadata file (if it's a local file) or None (if it's a cloud blob) """ def __init__(self, cache, inv_item): self._cache = cache self._comparison_key = inv_item.abs_metadata_url self.tier = inv_item.inventory.tier self.artifact_url = inv_item.abs_artifact_url self.metadata_url = inv_item.abs_metadata_url self._descriptor = inv_item.descriptor self._inventory = inv_item.inventory @property def entity(self): try: return entity_dnode_from_descriptor(self._descriptor).assume_entity().name except ValueError: return None @property def artifact_path(self): return path_from_url_if_file(self.artifact_url) @property def metadata_path(self): return path_from_url_if_file(self.metadata_url)
[docs] def delete(self): """ Safely deletes the artifact and its metadata from the cache. Returns True if the artifact was deleted and False if it was not found. Throws a ``CacheEntryDeletionFailureError`` if the deletion fails. (Note that if two entries refer to the same artifact and ``delete`` is called on both, the first call with return True and the second will return False.) """ # We delete the artifact first, since it's the thing that actually takes up # space. If we fail afterwards, the metadata file will contain an invalid URL, # but that's okay: we handle that gracefully when loading the metadata. try: artifact_was_deleted = self._inventory.delete_url(self.artifact_url) except Exception as e: message = f"Unable to delete artifact file at {self.artifact_url}" raise CacheEntryDeletionError(message) from e # TODO There's an unhandled edge case here: it's possible that this metadata # file was deleted and then another one was created with the same URL but # pointing to a different artifact URL. In that case, we'll delete the new # metadata file but not its artifact, leaving an orphaned artifact. That's not # the end of the world but it's lame. We could avoid this by reloading the # metadata file before deleting it and checking that the artifact URL is what # we expect. try: self._inventory.delete_url(self.metadata_url) except Exception as e: first_message = f"Unable to delete metadata file at {self.metadata_url}" if artifact_was_deleted: second_message = f""" however, the artifact at {self.artifact_url} was successfully deleted """ else: second_message = f""" no artifact was deleted as the URL {self.artifact_url} no longer exists """ full_message = first_message + " -- " + second_message raise CacheEntryDeletionError(oneline(full_message)) from e return artifact_was_deleted
def __hash__(self): return hash(self._comparison_key) def __eq__(self, other): if not isinstance(other, CacheEntry): return False return self._comparison_key == other._comparison_key def __lt__(self, other): if not isinstance(other, CacheEntry): return TypeError(f"Can't compare {self!r} with non-CacheEntry {other!r}") return self._comparison_key < other._comparison_key def __repr__(self): return f"CacheEntry(metadata_url={self.metadata_url!r})"
class CacheEntryDeletionError(Exception): pass