Commit e992b3ef authored by Yan Sushchynski (EPAM)'s avatar Yan Sushchynski (EPAM)
Browse files

GONRG-2103: Fix entity passes validation with wrong formats

parent e59469e5
Pipeline #38142 passed with stages
in 8 minutes and 4 seconds
......@@ -23,7 +23,7 @@ from typing import Any, List, Union, Tuple, Union
import jsonschema
import requests
import tenacity
from jsonschema import exceptions
from jsonschema import exceptions, FormatChecker
from libs.constants import DATA_SECTION, DATASETS_SECTION, MASTER_DATA_SECTION, \
REFERENCE_DATA_SECTION, WORK_PRODUCT_SECTION, WORK_PRODUCT_COMPONENTS_SECTION
......@@ -305,7 +305,14 @@ class SchemaValidator(HeadersMixin):
handlers=self.resolver_handlers,
cache_remote=True
)
jsonschema.validate(schema=schema, instance=data, resolver=resolver)
jsonschema.validate(
schema=schema,
instance=data,
resolver=resolver,
format_checker=FormatChecker(
formats=("date-time", "time", "date")
)
)
@staticmethod
def get_manifest_kind(manifest: dict) -> str:
......
{
"id": "namespace:dataset--File.Generic:7c6c0a0d-9e6d-5087-bed9-106233ba57ea",
"kind": "osdu:wks:dataset--File.Generic:1.0.0",
"version": 1562066009929332,
"acl": {
"owners": [
"someone@company.com"
],
"viewers": [
"someone@company.com"
]
},
"legal": {
"legaltags": [
"Example legaltags"
],
"otherRelevantDataCountries": [
"US"
],
"status": "compliant"
},
"tags": {
"NameOfKey": "String value"
},
"createTime": "2020-12-16T11:46:20.163Z",
"createUser": "some-user@some-company-cloud.com",
"modifyTime": "2020-12-16T11:52:24.477Z",
"modifyUser": "some-user@some-company-cloud.com",
"ancestry": {
"parents": []
},
"meta": [],
"data": {
"ResourceHomeRegionID": "namespace:reference-data--OSDURegion:AWSEastUSA:",
"ResourceHostRegionIDs": [
"namespace:reference-data--OSDURegion:AWSEastUSA:"
],
"ResourceCurationStatus": "namespace:reference-data--ResourceCurationStatus:CREATED:",
"ResourceLifecycleStatus": "namespace:reference-data--ResourceLifecycleStatus:LOADING:",
"ResourceSecurityClassification": "namespace:reference-data--ResourceSecurityClassification:RESTRICTED:",
"Source": "Example Data Source",
"ExistenceKind": "namespace:reference-data--ExistenceKind:Prototype:",
"Name": "Dataset X221/15",
"Description": "As originally delivered by ACME.com.",
"TotalSize": "13245217273",
"EncodingFormatTypeID": "namespace:reference-data--EncodingFormatType:text%2Fcsv:",
"SchemaFormatTypeID": "namespace:reference-data--SchemaFormatType:CWLS%20LAS3:",
"Endian": "BIG",
"DatasetProperties": {
"FileSourceInfo": {
"FileSource": "s3://default_bucket/r1/data/provided/documents/1000.witsml",
"PreloadFilePath": "s3://staging-area/r7/raw-data/provided/documents/1000.witsml",
"PreloadFileCreateUser": "somebody@acme.org",
"PreloadFileCreateDate": "2019-12-16T11:46:20.163Z",
"PreloadFileModifyUser": "somebody.else@acme.org",
"PreloadFileModifyDate": "2019-12-20T17:20:05.356Z",
"Name": "1000.witsml",
"FileSize": "95463",
"EncodingFormatTypeID": "namespace:reference-data--EncodingFormatType:application%2Fgeo%2Bjson:",
"Checksum": "d41d8cd98f00b204e9800998ecf8427e",
"ChecksumAlgorithm": "SHA-256"
}
},
"Checksum": "d41d8cd98f00b204e9800998ecf8427e",
"ExtensionProperties": {}
}
}
{
"id": "namespace:dataset--File.Generic:7c6c0a0d-9e6d-5087-bed9-106233ba57ea",
"kind": "osdu:wks:dataset--File.Generic:1.0.0",
"version": 1562066009929332,
"acl": {
"owners": [
"someone@company.com"
],
"viewers": [
"someone@company.com"
]
},
"legal": {
"legaltags": [
"Example legaltags"
],
"otherRelevantDataCountries": [
"US"
],
"status": "compliant"
},
"tags": {
"NameOfKey": "String value"
},
"createTime": "2020-12-16T11:46:20.163Z",
"createUser": "some-user@some-company-cloud.com",
"modifyTime": "2020-12-16T11:52:24.477Z",
"modifyUser": "some-user@some-company-cloud.com",
"ancestry": {
"parents": []
},
"meta": [],
"data": {
"ResourceHomeRegionID": "namespace:reference-data--OSDURegion:AWSEastUSA:",
"ResourceHostRegionIDs": [
"namespace:reference-data--OSDURegion:AWSEastUSA:"
],
"ResourceCurationStatus": "namespace:reference-data--ResourceCurationStatus:CREATED:",
"ResourceLifecycleStatus": "namespace:reference-data--ResourceLifecycleStatus:LOADING:",
"ResourceSecurityClassification": "namespace:reference-data--ResourceSecurityClassification:RESTRICTED:",
"Source": "Example Data Source",
"ExistenceKind": "namespace:reference-data--ExistenceKind:Prototype:",
"Name": "Dataset X221/15",
"Description": "As originally delivered by ACME.com.",
"TotalSize": "13245217273",
"EncodingFormatTypeID": "namespace:reference-data--EncodingFormatType:text%2Fcsv:",
"SchemaFormatTypeID": "namespace:reference-data--SchemaFormatType:CWLS%20LAS3:",
"Endian": "BIG",
"DatasetProperties": {
"FileSourceInfo": {
"FileSource": "s3://default_bucket/r1/data/provided/documents/1000.witsml",
"PreloadFilePath": "s3://staging-area/r7/raw-data/provided/documents/1000.witsml",
"PreloadFileCreateUser": "somebody@acme.org",
"PreloadFileCreateDate": "mar 11",
"PreloadFileModifyUser": "somebody.else@acme.org",
"PreloadFileModifyDate": "mar 11",
"Name": "1000.witsml",
"FileSize": "95463",
"EncodingFormatTypeID": "namespace:reference-data--EncodingFormatType:application%2Fgeo%2Bjson:",
"Checksum": "d41d8cd98f00b204e9800998ecf8427e",
"ChecksumAlgorithm": "SHA-256"
}
},
"Checksum": "d41d8cd98f00b204e9800998ecf8427e",
"ExtensionProperties": {}
}
}
......@@ -78,3 +78,5 @@ SCHEMA_WORK_PRODUCT = f"{DATA_PATH_PREFIX}/surrogate/schemas/WorkProduct.1.0.0.j
SURROGATE_WPC_DATA_QUALITY = f"{DATA_PATH_PREFIX}/surrogate/manifests/DataQuality.1.0.0.json"
SURROGATE_WORK_PRODUCT = f"{DATA_PATH_PREFIX}/surrogate/manifests/WorkProduct.1.0.0.json"
FILE_GENERIC_WRONG_DATE_TIME = f"{DATA_PATH_PREFIX}/datasets/File.Generic.1.0.0_wrong_date_time.json"
SCHEMA_FILE_GENERIC = f"{DATA_PATH_PREFIX}/datasets/schema_File.Generic.1.0.0.json"
......@@ -14,21 +14,23 @@
# limitations under the License.
import copy
import http
import requests
import json
import os
import sys
import jsonschema
sys.path.append(f"{os.getenv('AIRFLOW_SRC_DIR')}/plugins")
sys.path.append(f"{os.getenv('AIRFLOW_SRC_DIR')}/dags")
from mock_providers import get_test_credentials
from file_paths import (
DATA_PATH_PREFIX,
MANIFEST_EMPTY_PATH,
FILE_GENERIC_WRONG_DATE_TIME,
SCHEMA_FILE_VALID_PATH,
SCHEMA_FILE_GENERIC,
SCHEMA_GENERIC_MASTERDATA_PATH,
SCHEMA_SEISMIC_TRACE_DATA_VALID_PATH,
SCHEMA_WORK_PRODUCT_VALID_PATH,
......@@ -37,14 +39,11 @@ from file_paths import (
SCHEMA_TEST_MASTERDATA_PATH,
SURROGATE_WPC_DATA_QUALITY,
SURROGATE_WORK_PRODUCT,
MANIFEST_SEISMIC_TRACE_DATA_VALID_PATH,
MANIFEST_WELLBORE_VALID_PATH,
MANIFEST_GENERIC_PATH,
MANIFEST_NEW_GENERIC_SCHEMA_PATH,
SCHEMA_WELLBORE_VALID_PATH,
TRAVERSAL_WELLBORE_VALID_PATH,
TRAVERSAL_SEISMIC_TRACE_DATA_VALID_PATH,
TRAVERSAL_MANIFEST_EMPTY_PATH
)
from mock_responses import MockSchemaResponse
from libs.context import Context
......@@ -291,7 +290,6 @@ class TestSchemaValidator:
manifest_file: str,
schema_file
):
import jsonschema
def mock_validate_against_schema(*args, **kwargs):
raise jsonschema.exceptions.ValidationError("Something wrong")
......@@ -379,3 +377,28 @@ class TestSchemaValidator:
)
schema = validator._add_surrogate_keys_to_patterns(schema)
validator._validate_against_schema(schema, manifest)
@pytest.mark.parametrize(
"schema, data",
[
pytest.param(
SCHEMA_FILE_GENERIC,
FILE_GENERIC_WRONG_DATE_TIME,
id="Wrong date-time"
)
]
)
def test_validate_against_schema_raises_wrong_date_time_format(self, schema, data):
with open(schema) as f:
schema = json.load(f)
with open(data) as f:
data = json.load(f)
context = Context(app_key="", data_partition_id="")
schema_validator = SchemaValidator(
"",
AirflowTokenRefresher(get_test_credentials()),
context
)
with pytest.raises(jsonschema.exceptions.ValidationError) as err:
schema_validator._validate_against_schema(schema, data)
assert "is not a \'date-time\'" in str(err)
......@@ -2,6 +2,7 @@ pip uninstall enum34 -y
pip install pytest
pip install pytest-mock
pip install responses
pip install strict-rfc3339
pip install --upgrade google-api-python-client
chmod +x tests/set_airflow_env.sh
export AIRFLOW_SRC_DIR="/usr/local/airflow/"
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment