Skip to content

Commit

Permalink
Refactor metadata classes (elyra-ai#708)
Browse files Browse the repository at this point in the history
To accommodate future development and support for other storage
models, the metadata core has been refactored into separate
storage and manager files.  In addition a set of custom errors
has been introduced that is not tied to the storage model.

The MetadataManager methods have changed for easier use and the
MetadataStore methods and FileMetadataStore implementation have
been redefined to be completely specific to storage.

All refactored code has also added type hints in method signatures
and all tests have been updated and rewritten.

* Move SchemaManager into its own module - schema

* Add parameters and text to custom error classes; add tests

* Isolate storage-specific ops, all tests use available storage mgrs

* fix test, improve type hints

* Disassociate MetadataStore from Metadata instances
  • Loading branch information
kevin-bates authored Jul 7, 2020
1 parent 69ccd62 commit d2b0c0f
Show file tree
Hide file tree
Showing 13 changed files with 1,060 additions and 817 deletions.
7 changes: 5 additions & 2 deletions elyra/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from .metadata import FileMetadataStore, Metadata, MetadataManager, MetadataStore, SchemaManager, \
METADATA_TEST_NAMESPACE
from .error import MetadataNotFoundError, MetadataExistsError, SchemaNotFoundError
from .manager import MetadataManager
from .metadata import Metadata
from .schema import SchemaManager, METADATA_TEST_NAMESPACE
from .storage import FileMetadataStore, MetadataStore
49 changes: 49 additions & 0 deletions elyra/metadata/error.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#
# Copyright 2018-2020 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
"""This module includes custom error classes pertaining to the metadata service."""
import errno


class MetadataNotFoundError(FileNotFoundError):
"""Raised when a metadata instance is not found.
Overrides FileNotFoundError to set contextual message text
and includes the corresponding namespace.
"""
def __init__(self, namespace: str, name: str):
super().__init__(errno.ENOENT, "No such metadata instance found in namespace '{}'".format(namespace), name)


class MetadataExistsError(FileExistsError):
"""Raised when a metadata instance unexpectedly exists.
Overrides FileExistsError to set contextual message text
and includes the corresponding namespace.
"""

def __init__(self, namespace: str, name: str):
super().__init__(errno.EEXIST, "Metadata instance already exists in namespace '{}'".format(namespace), name)


class SchemaNotFoundError(FileNotFoundError):
"""Raised when a schema instance is not found.
Overrides FileNotFoundError to set contextual message text
and includes the corresponding namespace.
"""

def __init__(self, namespace: str, name: str):
super().__init__(errno.ENOENT, "No such schema instance found in namespace '{}'".format(namespace), name)
28 changes: 16 additions & 12 deletions elyra/metadata/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@
from tornado import web
from notebook.base.handlers import APIHandler
from notebook.utils import url_unescape, url_path_join
from .metadata import MetadataManager, SchemaManager, Metadata

from .error import MetadataNotFoundError, MetadataExistsError, SchemaNotFoundError
from .metadata import Metadata
from .manager import MetadataManager
from .schema import SchemaManager
from ..util.http import HttpErrorMixin


Expand All @@ -34,7 +38,7 @@ async def get(self, namespace):
metadata = metadata_manager.get_all()
except (ValidationError, ValueError) as err:
raise web.HTTPError(400, str(err)) from err
except FileNotFoundError as err:
except MetadataNotFoundError as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand All @@ -53,12 +57,12 @@ async def post(self, namespace):
self.log.debug("MetadataHandler: Creating metadata instance '{}' in namespace '{}'...".
format(instance.name, namespace))
metadata_manager = MetadataManager(namespace=namespace)
metadata = metadata_manager.add(instance.name, instance, replace=False)
metadata = metadata_manager.create(instance.name, instance)
except (ValidationError, ValueError, SyntaxError) as err:
raise web.HTTPError(400, str(err)) from err
except FileNotFoundError as err:
except (MetadataNotFoundError, SchemaNotFoundError) as err:
raise web.HTTPError(404, str(err)) from err
except FileExistsError as err:
except MetadataExistsError as err:
raise web.HTTPError(409, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand Down Expand Up @@ -107,7 +111,7 @@ async def get(self, namespace, resource):
metadata = metadata_manager.get(resource)
except (ValidationError, ValueError, NotImplementedError) as err:
raise web.HTTPError(400, str(err)) from err
except FileNotFoundError as err:
except MetadataNotFoundError as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand All @@ -132,10 +136,10 @@ async def put(self, namespace, resource):
instance = Metadata(**payload)
self.log.debug("MetadataHandler: Updating metadata instance '{}' in namespace '{}'...".
format(resource, namespace))
metadata = metadata_manager.add(resource, instance, replace=True)
metadata = metadata_manager.update(resource, instance)
except (ValidationError, ValueError, NotImplementedError) as err:
raise web.HTTPError(400, str(err)) from err
except FileNotFoundError as err:
except MetadataNotFoundError as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand All @@ -158,7 +162,7 @@ async def delete(self, namespace, resource):
raise web.HTTPError(400, str(err)) from err
except PermissionError as err:
raise web.HTTPError(403, str(err)) from err
except FileNotFoundError as err:
except MetadataNotFoundError as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand All @@ -177,7 +181,7 @@ async def get(self, namespace):
try:
self.log.debug("SchemaHandler: Fetching all schemas for namespace '{}'...".format(namespace))
schemas = schema_manager.get_namespace_schemas(namespace)
except (ValidationError, ValueError, FileNotFoundError) as err:
except (ValidationError, ValueError, SchemaNotFoundError) as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand All @@ -200,7 +204,7 @@ async def get(self, namespace, resource):
self.log.debug("SchemaResourceHandler: Fetching schema '{}' for namespace '{}'...".
format(resource, namespace))
schema = schema_manager.get_schema(namespace, resource)
except (ValidationError, ValueError, FileNotFoundError) as err:
except (ValidationError, ValueError, SchemaNotFoundError) as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand All @@ -218,7 +222,7 @@ async def get(self):
try:
self.log.debug("NamespaceHandler: Fetching namespaces...")
namespaces = schema_manager.get_namespaces()
except (ValidationError, ValueError, FileNotFoundError) as err:
except (ValidationError, ValueError) as err:
raise web.HTTPError(404, str(err)) from err
except Exception as err:
raise web.HTTPError(500, repr(err)) from err
Expand Down
185 changes: 185 additions & 0 deletions elyra/metadata/manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
#
# Copyright 2018-2020 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import io
import json
import os
import re

from jsonschema import validate, ValidationError, draft7_format_checker
from traitlets import Type
from traitlets.config import LoggingConfigurable
from typing import Optional, List

from .metadata import Metadata
from .schema import SchemaManager
from .storage import MetadataStore, FileMetadataStore


class MetadataManager(LoggingConfigurable):

# System-owned namespaces
NAMESPACE_RUNTIMES = "runtimes"
NAMESPACE_CODE_SNIPPETS = "code-snippets"
NAMESPACE_RUNTIME_IMAGES = "runtime-images"

metadata_class = Type(Metadata, config=True,
help="""The metadata class. This is configurable to allow subclassing of
the MetadataManager for customized behavior.""")

def __init__(self, namespace: str, store: Optional[MetadataStore] = None, **kwargs):
"""
Generic object to read Notebook related metadata
:param namespace: the partition where it is stored, this might have
a unique meaning for each of the supported metadata storage
:param store: the metadata store to be used
:param kwargs: additional arguments to be used to instantiate a metadata store
"""
super(MetadataManager, self).__init__(**kwargs)

self.schema_mgr = SchemaManager.instance()
self.schema_mgr.validate_namespace(namespace)
self.namespace = namespace
if store:
self.metadata_store = store
else:
self.metadata_store = FileMetadataStore(namespace, **kwargs)

def namespace_exists(self) -> bool:
"""Returns True if the namespace for this instance exists"""
return self.metadata_store.namespace_exists()

def get_all(self, include_invalid: bool = False) -> List[Metadata]:
"""Returns all metadata instances in summary form (name, display_name, location)"""

instances = []
instance_list = self.metadata_store.fetch_instances()
for metadata_dict in instance_list:
# validate the instance prior to return, include invalid instances as appropriate
metadata = Metadata.from_dict(metadata_dict)
try:
self.validate(metadata.name, metadata)
instances.append(metadata)
except Exception as ex: # Ignore ValidationError and others when fetching all instances
self.log.debug("Fetch of instance '{}' of namespace '{}' encountered an exception: {}".
format(metadata.name, self.namespace, ex))
if include_invalid:
metadata.reason = ex.__class__.__name__
instances.append(metadata)

return instances

def get(self, name: str) -> Metadata:
"""Returns the metadata instance corresponding to the given name"""
instance_list = self.metadata_store.fetch_instances(name=name)
metadata_dict = instance_list[0]
metadata = Metadata.from_dict(metadata_dict)
# validate the instance prior to return...
self.validate(name, metadata)
return metadata

def create(self, name: str, metadata: Metadata) -> Metadata:
"""Creates the given metadata, returning the created instance"""
return self._save(name, metadata)

def update(self, name: str, metadata: Metadata) -> Metadata:
"""Updates the given metadata, returning the updated instance"""
return self._save(name, metadata, for_update=True)

def remove(self, name: str) -> None:
"""Removes the metadata instance corresponding to the given name"""
self.log.info("Removing metadata resource '{}' from namespace '{}'.".format(name, self.namespace))
self.metadata_store.delete_instance(name)

def validate(self, name: str, metadata: Metadata) -> None:
"""Validate metadata against its schema.
Ensure metadata is valid based on its schema. If invalid or schema
is not found, ValidationError will be raised.
"""
metadata_dict = metadata.to_dict(trim=True)
schema_name = metadata_dict.get('schema_name')
if not schema_name:
raise ValidationError("Metadata instance '{}' in namespace '{}' is missing a 'schema_name' field!".
format(name, self.namespace))

schema = self._get_schema(schema_name) # returns a value or throws

self.log.debug("Validating metadata resource '{}' against schema '{}'...".format(name, schema_name))
try:
validate(instance=metadata_dict, schema=schema, format_checker=draft7_format_checker)
except ValidationError as ve:
# Because validation errors are so verbose, only provide the first line.
first_line = str(ve).partition('\n')[0]
msg = "Schema validation failed for metadata '{}' in namespace '{}' with error: {}.".\
format(name, self.namespace, first_line)
self.log.error(msg)
raise ValidationError(msg) from ve

@staticmethod
def _get_normalized_name(name: str) -> str:
# lowercase and replaces spaces with underscore
name = re.sub('\\s+', '_', name.lower())
# remove all invalid characters
name = re.sub('[^a-z0-9-_]+', '', name)
# begin with alpha
if not name[0].isalpha():
name = 'a_' + name
# end with alpha numeric
if not name[-1].isalnum():
name = name + '_0'
return name

def _get_schema(self, schema_name: str) -> dict:
"""Loads the schema based on the schema_name and returns the loaded schema json.
Throws ValidationError if schema file is not present.
"""
schema_json = self.schema_mgr.get_schema(self.namespace, schema_name)
if schema_json is None:
schema_file = os.path.join(os.path.dirname(__file__), 'schemas', schema_name + '.json')
if not os.path.exists(schema_file):
raise ValidationError("Metadata schema file '{}' is missing!".format(schema_file))

self.log.debug("Loading metadata schema from: '{}'".format(schema_file))
with io.open(schema_file, 'r', encoding='utf-8') as f:
schema_json = json.load(f)
self.schema_mgr.add_schema(self.namespace, schema_name, schema_json)

return schema_json

def _save(self, name: str, metadata: Metadata, for_update: bool = False) -> Metadata:
if not metadata:
raise ValueError("An instance of class 'Metadata' was not provided.")

if not isinstance(metadata, Metadata):
raise TypeError("'metadata' is not an instance of class 'Metadata'.")

if not name and not for_update: # name is derived from display_name only on creates
if metadata.display_name:
name = self._get_normalized_name(metadata.display_name)
metadata.name = name

if not name: # At this point, name must be set
raise ValueError('Name of metadata was not provided.')

match = re.search("^[a-z]([a-z0-9-_]*[a-z,0-9])?$", name)
if match is None:
raise ValueError("Name of metadata must be lowercase alphanumeric, beginning with alpha and can include "
"embedded hyphens ('-') and underscores ('_').")

# Validate the metadata prior to storage then store the instance.
self.validate(name, metadata)
metadata_dict = self.metadata_store.store_instance(name, metadata.prepare_write(), for_update=for_update)
return Metadata.from_dict(metadata_dict)
Loading

0 comments on commit d2b0c0f

Please sign in to comment.