Commit 34ab64e5 authored by Yan Sushchynski (EPAM)'s avatar Yan Sushchynski (EPAM)
Browse files

Merge branch 'GONRG-3773_Improve_performance' into 'master'

GONRG-3773: Improve_performance

See merge request !13
parents 6cc4c77e 21d8b7c5
Pipeline #92681 passed with stages
in 3 minutes and 10 seconds
......@@ -217,7 +217,10 @@ class ManifestAnalyzer:
self._find_invalid_nodes()
def _create_entity_node(self, entity: ManifestEntity):
_id = entity.entity_data.get("id", f"surrogate-key:{str(uuid4())}")
try:
_id = entity.entity_data["id"]
except KeyError:
_id = f"surrogate-key:{str(uuid4())}"
entity_id = split_id(_id)
self.entity_id_node_table[entity_id] = EntityNode(
entity_id,
......
......@@ -16,6 +16,7 @@
"""Util functions to work with OSDU Manifests."""
import dataclasses
from functools import lru_cache
from itertools import islice
from typing import Any, Generator, Iterable, List, TypeVar
from uuid import uuid4
......@@ -31,15 +32,12 @@ class EntityId:
raw_value: str
version: str = ""
@property
def srn(self) -> str:
if self.version:
return f"{self.id}:{self.version}"
else:
return self.id
def __post_init__(self):
self.srn = f"{self.id}:{self.version}" if self.version else self.id
self._hash = hash(self.srn)
def __hash__(self) -> int:
return hash(self.srn)
return self._hash
def __eq__(self, other: "EntityId") -> bool:
return self.srn == self.srn
......@@ -54,7 +52,7 @@ def remove_trailing_colon(id_value: str) -> str:
"""
return id_value[:-1] if id_value.endswith(":") else id_value
@lru_cache()
def split_id(id_value: str) -> EntityId:
"""
Get id without a version for searching later.
......
......@@ -16,6 +16,7 @@
"""Provides SchemaValidator."""
import copy
import json
import logging
import re
from functools import lru_cache
......@@ -127,6 +128,7 @@ class SchemaValidator(HeadersMixin):
self.schema_client = SchemaClient(token_refresher=token_refresher, data_partition_id=context.data_partition_id)
self.surrogate_key_fields_paths = surrogate_key_fields_paths or []
self.data_types_with_surrogate_ids = data_types_with_surrogate_ids or []
self._schema_validators = {}
def _clear_data_fields(self, schema_part: Union[dict, list]):
"""
......@@ -170,6 +172,41 @@ class SchemaValidator(HeadersMixin):
return None
return schema
def _get_schema_validator(self, schema: dict, kind: str = None):
"""Create a schema validator for by with using kinds.
Save this validator to reuse it.
If no kind is present, use hash of stringified schema to cash its validator value.
:param schema: schema data
:type schema: dict
:param kind: schema kind
:type kind: str
:return: Validator object
"""
if not kind:
# as schema is a dict, we need to get hash of it to cache a value
schema_id = hash(json.dumps(schema, sort_keys=True, default=str))
else:
schema_id = kind
if self._schema_validators.get(schema_id):
return self._schema_validators[schema_id]
resolver = OSDURefResolver(
base_uri=schema.get("$id", ""),
referrer=schema,
cache_remote=True
)
validator_class = jsonschema.validators.validator_for(schema)
validator = validator_class(
schema=schema,
resolver=resolver,
format_checker=FormatChecker()
)
self._schema_validators[schema_id] = validator
return validator
@staticmethod
def _extend_pattern_with_surrogate_key(pattern: str) -> str:
"""
......@@ -295,17 +332,11 @@ class SchemaValidator(HeadersMixin):
:param data: Any data to validate against schema.
:return:
"""
resolver = OSDURefResolver(
base_uri=schema.get("$id", ""),
referrer=schema,
cache_remote=True
)
jsonschema.validate(
schema=schema,
instance=data,
resolver=resolver,
format_checker=FormatChecker()
)
try:
validator = self._get_schema_validator(schema, data["kind"])
except (KeyError, TypeError):
validator = self._get_schema_validator(schema)
validator.validate(data)
@staticmethod
def get_manifest_kind(manifest: dict) -> str:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment