"""
Contains the Cache class, which provides a user-facing API for directly interacting
with Bionic's cache.
"""
from functools import total_ordering
from .descriptors.parsing import entity_dnode_from_descriptor
from .utils.misc import oneline
from .utils.urls import path_from_url, is_file_url
[docs]class Cache:
"""
A programmatic interface to Bionic's persistent cache.
Accessible as an attribute named ``cache`` on a :class:`Flow <bionic.Flow>`
object. Use ``get_entries`` to iterate through the set of cache entries.
"""
def __init__(self, deriver):
# Currently we don't really need the entire deriver, just the persistent cache.
# However, in the future I expect we'll use the deriver to figure out which
# artifacts are relevant to the current flow, and in the meantime it's
# convenient to have access to it so that we can call get_ready() at the last
# minute.
self._deriver = deriver
[docs] def get_entries(self):
"""
Returns a sequence of :class:`CacheEntry <bionic.cache_api.CacheEntry>`
objects, one for each artifact in Bionic's persistent cache.
Cached artifacts are stored by flow name, so this will include any artifacts
generated by a flow with the same name as this one; this typically includes
the current Flow object, as well as any older or modified versions.
Artifacts are returned for all cache tiers that are enabled for the current
flow. For example, if GCS caching is enabled, this method will return
entities from both the "local" (on-disk") and "cloud" (GCS) tiers.
"""
# These private accesses are a bit gross, but I'm not sure it's worth adding
# more layers of APIs to avoid them.
self._deriver.get_ready()
persistent_cache = self._deriver._core.persistent_cache
stores = [
store
for store in [persistent_cache._local_store, persistent_cache._cloud_store]
if store is not None
]
return (
CacheEntry(
cache=self,
inv_item=inv_item,
)
for store in stores
for inv_item in store.inventory.list_items()
)
def path_from_url_if_file(url):
"""Converts a URL into a file path if it is a file URL; otherwise returns None."""
if is_file_url(url):
return path_from_url(url)
else:
return None
[docs]@total_ordering
class CacheEntry:
"""
Represents an artifact in Bionic's persistent cache.
Has the following fields:
- ``tier``: "local" or "cloud", depending on which tier of the cache the artifact
is in.
- ``entity``: the name of the cached entity, or ``None`` if the artifact is does
not correspond to an entity
- ``artifact_url``: a URL to the cached artifact file or blob
- ``metadata_url``: a URL to the metadata file or blob describing the artifact
- ``artifact_path``: a Path object locating the artifact file (if it's a local
file) or None (if it's a cloud blob)
- ``metadata_path``: a Path object locating the metadata file (if it's a local
file) or None (if it's a cloud blob)
"""
def __init__(self, cache, inv_item):
self._cache = cache
self._comparison_key = inv_item.abs_metadata_url
self.tier = inv_item.inventory.tier
self.artifact_url = inv_item.abs_artifact_url
self.metadata_url = inv_item.abs_metadata_url
self._descriptor = inv_item.descriptor
self._inventory = inv_item.inventory
@property
def entity(self):
try:
return entity_dnode_from_descriptor(self._descriptor).assume_entity().name
except ValueError:
return None
@property
def artifact_path(self):
return path_from_url_if_file(self.artifact_url)
@property
def metadata_path(self):
return path_from_url_if_file(self.metadata_url)
[docs] def delete(self):
"""
Safely deletes the artifact and its metadata from the cache.
Returns True if the artifact was deleted and False if it was not found. Throws a
``CacheEntryDeletionFailureError`` if the deletion fails.
(Note that if two entries refer to the same artifact and ``delete`` is called on
both, the first call with return True and the second will return False.)
"""
# We delete the artifact first, since it's the thing that actually takes up
# space. If we fail afterwards, the metadata file will contain an invalid URL,
# but that's okay: we handle that gracefully when loading the metadata.
try:
artifact_was_deleted = self._inventory.delete_url(self.artifact_url)
except Exception as e:
message = f"Unable to delete artifact file at {self.artifact_url}"
raise CacheEntryDeletionError(message) from e
# TODO There's an unhandled edge case here: it's possible that this metadata
# file was deleted and then another one was created with the same URL but
# pointing to a different artifact URL. In that case, we'll delete the new
# metadata file but not its artifact, leaving an orphaned artifact. That's not
# the end of the world but it's lame. We could avoid this by reloading the
# metadata file before deleting it and checking that the artifact URL is what
# we expect.
try:
self._inventory.delete_url(self.metadata_url)
except Exception as e:
first_message = f"Unable to delete metadata file at {self.metadata_url}"
if artifact_was_deleted:
second_message = f"""
however, the artifact at {self.artifact_url} was successfully deleted
"""
else:
second_message = f"""
no artifact was deleted as the URL {self.artifact_url} no longer exists
"""
full_message = first_message + " -- " + second_message
raise CacheEntryDeletionError(oneline(full_message)) from e
return artifact_was_deleted
def __hash__(self):
return hash(self._comparison_key)
def __eq__(self, other):
if not isinstance(other, CacheEntry):
return False
return self._comparison_key == other._comparison_key
def __lt__(self, other):
if not isinstance(other, CacheEntry):
return TypeError(f"Can't compare {self!r} with non-CacheEntry {other!r}")
return self._comparison_key < other._comparison_key
def __repr__(self):
return f"CacheEntry(metadata_url={self.metadata_url!r})"
class CacheEntryDeletionError(Exception):
pass