AWS M25: EDS Ingestion Failure: WPC+Dataset testcase - Schema validation error on dataset--ConnectedSource.Generic
In AWS/M25 pre-ship. The test case for "Basic Fetch and Ingest – WPC + Dataset" fails with the reason "Entity doesn't pass the schema validation."
- CSRE: osdu:master-data--ConnectedSourceRegistryEntry:arpit-singh-gcp-m25
- CSDJ: osdu:master-data--ConnectedSourceDataJob:arpit_singh_testing_GCP_wpc
- eds_ingest run id: 26e52b22-488e-4065-ae84-ddfa11b085b9
- Osdu_ingest run id: 2d45c69d-c46d-4835-8c6a-ad56537fd5a5
I get Schema Validation error for dataset--ConnectedSource
[
{
"2d45c69d-c46d-4835-8c6a-ad56537fd5a5": [
{
"id": "osdu:dataset--ConnectedSource.Generic:GCP_Logo",
"kind": "osdu:wks:dataset--ConnectedSource.Generic:1.0.1",
"reason": "Entity doesn't pass the schema validation."
}
]
}
]
The logs of Osdu_ingest hints the problem with value "None" where object was expected:
[2025-04-10, 16:57:59 UTC] {validate_schema.py:319} ERROR - Schema validation error. Data field.
[2025-04-10, 16:57:59 UTC] {validate_schema.py:320} ERROR - Manifest kind: osdu:wks:dataset--ConnectedSource.Generic:1.0.1
[2025-04-10, 16:57:59 UTC] {validate_schema.py:321} ERROR - Error: None is not of type 'object'
Failed validating 'type' in schema['properties']['ancestry']:
Interestingly the payload submitted to Osdu_ingest contains a lot of null/None values. This is somewhat similar to #746 (closed) , where values with nulls are being sent instead of omitting the property:
{
"runId": "50af28b6-5afa-4a0f-984e-35ff5b8136a3",
"executionContext": {
"acl": {
"viewers": [
"data.default.viewers@osdu.example.com"
],
"owners": [
"data.default.owners@osdu.example.com"
]
},
"legal": {
"legaltags": [
"osdu-EDS-Legal-Tag-7786769"
],
"otherRelevantDataCountries": [
"US"
]
},
"Payload": {
"AppKey": "test-app",
"data-partition-id": "osdu"
},
"manifest": {
"kind": "osdu:wks:Manifest:1.0.0",
"Data": {
"WorkProductComponents": [
{
"id": "osdu:work-product-component--GenericImage:LogoImage_GCP",
"version": 1744231437161818,
"kind": "osdu:wks:work-product-component--GenericImage:1.2.0",
"acl": {
"viewers": [
"data.default.viewers@osdu.example.com"
],
"owners": [
"data.default.owners@osdu.example.com"
]
},
"legal": {
"legaltags": [
"osdu-EDS-Legal-Tag-7786769"
],
"otherRelevantDataCountries": [
"US"
]
},
"data": {
"AssociatedObjectID": "osdu:master-data--GenericSite:DataCenter_GCP:",
"Datasets": [
"osdu:dataset--ConnectedSource.Generic:GCP_Logo:"
],
"Name": "LogoImage GCP",
"Source": "osdu:master-data--Organisation:NAM:",
"NameAliases": [
{
"AliasName": ":work-product-component--GenericImage:LogoImage_GCP",
"AliasNameTypeID": "osdu:reference-data--AliasNameType:EDSConnectedSourceIdentifier:"
}
]
},
"tags": {
"purpose": "M25_GCP_EDS_testing"
},
"createUser": "preshipping_test_user_m25@gcp.gnrg-osdu.projects.epam.com",
"createTime": "2025-04-02T22:34:06.751Z",
"modifyUser": "preshipping_test_user_m25@gcp.gnrg-osdu.projects.epam.com",
"modifyTime": "2025-04-09T20:43:57.238Z"
}
],
"Datasets": [
{
"ancestry": None,
"data": {
"Endian": None,
"Description": None,
"DatasetProperties": {
"SourceDataPartitionID": "m25",
"SourceRecordID": "dataset--File.Image.JPEG:GCP_Logo",
"ConnectedSourceDataJobID": "osdu:master-data--ConnectedSourceDataJob:arpit_singh_testing_GCP_wpc:",
"ConnectedSourceRegistryEntryID": "osdu:master-data--ConnectedSourceRegistryEntry:arpit_singh_testing_GCP:"
},
"TotalSize": None,
"EncodingFormatTypeID": None,
"Name": None,
"SchemaFormatTypeID": None,
"ResourceHomeRegionID": None,
"ResourceHostRegionIDs": None,
"ResourceLifecycleStatus": None,
"ResourceSecurityClassification": None,
"ResourceCurationStatus": None,
"ExistenceKind": None,
"TechnicalAssuranceID": None,
"Source": None,
"ExtensionProperties": None
},
"kind": "osdu:wks:dataset--ConnectedSource.Generic:1.0.1",
"acl": {
"viewers": [
"data.default.viewers@osdu.example.com"
],
"owners": [
"data.default.owners@osdu.example.com"
]
},
"version": None,
"tags": None,
"modifyUser": None,
"modifyTime": None,
"createTime": None,
"meta": None,
"legal": {
"legaltags": [
"osdu-EDS-Legal-Tag-7786769"
],
"otherRelevantDataCountries": [
"US"
],
"status": None
},
"createUser": None,
"id": "osdu:dataset--ConnectedSource.Generic:GCP_Logo"
}
]
}
}
}
}