Commit 7dac9d1d authored by Thomas Gehrmann [SLB]'s avatar Thomas Gehrmann [SLB]
Browse files

update scripts to support the new schemas convention

parent 738748f5
{
"authority": "osdu",
"source": "osdu",
"majorVersion": 0,
"minorVersion": 3,
"source": "wks",
"majorVersion": 1,
"minorVersion": 0,
"patchVersion": 0,
"createdBy": "OSDU Data Definition Group",
"status": "PUBLISHED",
"status": "DEVELOPMENT",
"scope": "SHARED",
"exclude": []
}
\ No newline at end of file
import argparse
import os
import copy
import pathlib
from Utility import Utility
......@@ -9,27 +11,46 @@ class ImportFromOSDU(object):
def __init__(self):
parser = argparse.ArgumentParser(
description="Given a path to an OSDU schema sub-folder release, move and rename schemas for deployment.")
description="Given a path to an LOAD_SEQUENCE_FOLDERS/SLB schema sub-folder release, "
"move and rename schemas for deployment.")
parser.add_argument('-f', type=str,
help='The folder path relative to "deployments"',
default='osdu-source/R3-json-schema')
help='The Generated folder path relative to "deployments"',
default='osdu-source/R3-json-schema/Generated')
parser.add_argument('-u', type=str,
help='The kind of Universe - OSDU', default='OSDU')
arguments = parser.parse_args()
self.info = None
self.schema_files = dict()
self.dependencies = list()
deployments = Utility.path_to_deployments()
self.target_path = os.path.join(deployments, 'shared-schemas', 'osdu')
self.discover_schemas(deployments, self.__get_sub_folder(arguments.f))
self.copy_and_record_dependencies()
self.order_dependencies()
self.write_load_sequence(deployments)
pass
self.target_path = os.path.join(deployments, 'shared-schemas', arguments.u.lower())
self.load_sequence_path = self.target_path
if arguments.u.lower() == 'osdu':
self.__get_info_file(deployments, self.__get_sub_folder(arguments.f))
self.discover_schemas(deployments, self.__get_sub_folder(arguments.f))
self.copy_and_record_dependencies()
self.order_dependencies()
self.write_load_sequence(deployments)
else:
print('Unrecognized universe: {}'.format(arguments.u))
@staticmethod
def __generated_target(file):
if file.endswith('abstractResources.json'):
return None
return file.replace('Authoring', 'Generated')
def discover_schemas(self, deployments, folder_parts):
files = Utility.find_files(folder_parts, deployments)
files = sorted(files) # this brings abstract to the front, less work for dependency chasing.
default_version = '0.0.0'
if self.info is not None:
default_version = '{}.{}.{}'.format(self.info['majorVersion'],
self.info['minorVersion'],
self.info['patchVersion'])
for file in files:
entity, folders = Utility.get_entity_folder_from_file(file, folder_parts)
group_type, entity, version, folders = Utility.get_entity_folder_from_file(file, folder_parts)
if entity == self.SCHEMA_INFO:
self.info = Utility.load_json(file)
else:
......@@ -39,8 +60,24 @@ class ImportFromOSDU(object):
'source': file,
'target': target
}
if version is not None:
schema_file['version'] = version
else:
schema_file['version'] = default_version
self.schema_files[entity] = schema_file
def __get_info_file(self, deployments, folder_parts):
n = len(folder_parts)
i = 0
infos = Utility.find_files(folder_parts, deployments, self.SCHEMA_INFO + '*')
while len(infos) == 0:
i -= 1
infos = Utility.find_files(folder_parts[:-i], deployments, self.SCHEMA_INFO + '*')
if n + i <= 0:
break
if len(infos) > 0:
self.info = Utility.load_json(infos[-1])
def copy_and_record_dependencies(self):
remove_me = list()
if self.__status() == 'DEVELOPMENT':
......@@ -50,12 +87,12 @@ class ImportFromOSDU(object):
to_be_excluded = self.__to_be_excluded()
for key, schema_file in self.schema_files.items():
if schema_file['entity'] not in to_be_excluded:
kind_file = self.__make_kind(schema_file['entity'], is_file=True) + extension
kind_file = self.__make_kind(schema_file, is_file=True) + extension
os.makedirs(schema_file['target'], exist_ok=True)
path = os.path.join(schema_file['target'], kind_file)
schema = Utility.load_json(schema_file['source'])
schema_file['dependencies'] = self.find_references(schema)
schema_info = self.__make_schema_info(schema_file['entity'])
schema_info = self.__make_schema_info(schema_file, schema)
to_load = {'schemaInfo': schema_info, 'schema': schema}
Utility.save_json(to_load, path)
else:
......@@ -64,7 +101,6 @@ class ImportFromOSDU(object):
if len(remove_me) > 0:
for r in remove_me:
self.schema_files.pop(r)
pass
def find_references(self, schema):
my_dependencies = list()
......@@ -72,20 +108,7 @@ class ImportFromOSDU(object):
return my_dependencies
def order_dependencies(self):
for key, schema_file in self.schema_files.items():
append_this = True
insert_at = len(self.dependencies)
this_kind = self.__make_kind(schema_file['entity'], is_file=False)
if this_kind in self.dependencies:
insert_at = self.dependencies.index(this_kind)
append_this = False
for dep in schema_file['dependencies']:
if dep not in self.dependencies:
self.dependencies.insert(insert_at, dep)
insert_at += 1
if append_this:
self.dependencies.append(this_kind)
self.__build_dependencies_list()
# next round: re-order dependencies
cycle = 0
swapped = True
......@@ -93,7 +116,7 @@ class ImportFromOSDU(object):
cycle += 1 # recursion limit
swapped = False
for key, schema_file in self.schema_files.items():
this_kind = self.__make_kind(schema_file['entity'], is_file=False)
this_kind = self.__make_kind(schema_file, is_file=False)
this_idx = self.dependencies.index(this_kind)
for dep in schema_file['dependencies']:
other_idx = self.dependencies.index(dep)
......@@ -104,51 +127,106 @@ class ImportFromOSDU(object):
# print('Round {}: Moved {} in front of {}'.format(cycle, other, this_kind))
swapped = True
def __build_dependencies_list(self):
for key, schema_file in self.schema_files.items():
append_this = True
insert_at = len(self.dependencies)
this_kind = self.__make_kind(schema_file, is_file=False)
if this_kind in self.dependencies:
insert_at = self.dependencies.index(this_kind)
append_this = False
for dep in schema_file['dependencies']:
if dep not in self.dependencies:
self.dependencies.insert(insert_at, dep)
insert_at += 1
if append_this:
self.dependencies.append(this_kind)
def write_load_sequence(self, base_path):
sequence = list()
for dep in self.dependencies:
dep_as_file = dep.replace(':', '..')
abs_path = Utility.find_file(dep_as_file + '*.json', root=base_path)
parts = dep.split(':')
parts[2] = parts[2].split('.')[-1]
dep_without_group_type = ':'.join(parts)
dep_as_file = dep_without_group_type.replace(':', '..')
abs_path = Utility.find_file(dep_as_file + '*.json', root=self.target_path)
if abs_path is not None:
rel_path = Utility.get_relative_path(base_path, abs_path)
sequence.append({'kind': dep, 'relativePath': rel_path})
else:
print('Error: Reference to {} schema not found.'.format(dep))
path = os.path.join(self.target_path,
path = os.path.join(self.load_sequence_path,
self.LOAD_SEQUENCE_FILE.format(self.__major(), self.__minor(), self.__patch()))
Utility.save_json(sequence, path)
pass
def __process(self, raw, my_dependencies):
if isinstance(raw, dict):
if '$ref' in raw:
value = raw['$ref']
if not value.startswith('#/definitions/'): # ignore internal references
parts = value.split('/')
if len(parts) >= 2: # roll up from the back
entity = parts[-2].replace('.json', '') # should not happen
kind = self.__make_kind(entity, is_file=False)
if kind not in my_dependencies:
my_dependencies.append(kind)
# print('replaced reference {}'.format(kind))
raw['$ref'] = kind
else:
for key, value in raw.items():
self.__process(value, my_dependencies)
self.__process_d_ref(my_dependencies, raw)
elif isinstance(raw, list):
for item in raw:
self.__process(item, my_dependencies)
def __make_schema_info(self, entity):
kind = self.__make_kind(entity, is_file=False)
def __process_d_ref(self, my_dependencies, raw):
if '$ref' in raw:
value = raw['$ref']
if not value.startswith('#/definitions/'): # ignore internal references
if value.startswith('https://schema.osdu.opengroup') or value.startswith('https://schema.sdu'):
self.__swap_record_r2_reference(my_dependencies, raw, value)
else: # standard R3++
self.__swap_record_r3_reference(my_dependencies, raw, value)
else:
for key, value in raw.items():
self.__process(value, my_dependencies)
def __swap_record_r3_reference(self, my_dependencies, raw, value):
kind = self.__make_kind_from_file(value)
if kind not in my_dependencies:
my_dependencies.append(kind)
raw['$ref'] = kind
def __make_kind_from_file(self, file_name):
file_name = pathlib.Path(file_name).as_posix()
parts = file_name.split('.')
if len(parts) < 5:
exit('Error: unexpected $ref: {}'.format(file_name))
entity = parts[-5].split('/')[-1]
entity_info = {'entity': entity, 'version': '.'.join(parts[-4:-1])}
kind = self.__make_kind(entity_info, is_file=False)
return kind
def __swap_record_r2_reference(self, my_dependencies, raw, value):
parts = value.split('/')
if len(parts) >= 2: # roll up from the back
entity = parts[-2].replace('.json', '') # should not happen
kind = self.__make_kind(entity, is_file=False)
if kind not in my_dependencies:
my_dependencies.append(kind)
# print('replaced reference {}'.format(kind))
raw['$ref'] = kind
def __make_schema_info(self, entity_info, schema=None):
if isinstance(schema, dict):
kind = schema.get('x-osdu-schema-source', 'error')
entity_info['authority'] = kind.split(':')[0]
entity_info['source'] = kind.split(':')[1]
entity_info['entity'] = kind.split(':')[2]
entity_info['version'] = kind.split(':')[3]
else:
kind = self.__make_kind(entity_info, is_file=False)
entity = entity_info['entity']
version = entity_info['version']
major = version.split('.')[0]
minor = version.split('.')[1]
patch = version.split('.')[2]
schema_info = {
"schemaIdentity": {
"authority": self.__authority(),
"source": self.__source(),
"entityType": entity,
"schemaVersionMajor": int(self.__major()),
"schemaVersionMinor": int(self.__minor()),
"schemaVersionPatch": int(self.__patch()),
"schemaVersionMajor": int(major),
"schemaVersionMinor": int(minor),
"schemaVersionPatch": int(patch),
"id": kind
},
"createdBy": self.__created_by(),
......@@ -157,8 +235,13 @@ class ImportFromOSDU(object):
}
return schema_info
def __make_kind(self, entity, is_file=True):
version = '.'.join([self.__major(), self.__minor(), self.__patch()])
def __make_kind(self, entity_file, is_file=True):
if isinstance(entity_file, dict):
version = entity_file['version']
entity = entity_file['entity']
else:
version = '.'.join([self.__major(), self.__minor(), self.__patch()])
entity = entity_file
if is_file:
sep = '..'
else:
......
......@@ -2,8 +2,10 @@ import os
import json
import fnmatch
import requests
import pathlib
import urllib.parse
class RunEnv(object):
BEARER_TOKEN = os.environ.get('BEARER_TOKEN')
......@@ -68,9 +70,8 @@ class Utility(object):
return None
@staticmethod
def find_files(directory_parts=None, root=os.path.abspath(__file__)):
def find_files(directory_parts=None, root=os.path.abspath(__file__), search_expression='*.json'):
"""Find all JSON files in optional sub-path components and root"""
wildcard = '*.json'
found = list()
if directory_parts is None:
directory_parts = []
......@@ -78,16 +79,25 @@ class Utility(object):
for part in directory_parts:
path = os.path.join(path, part)
for root, dirs, files in os.walk(path):
for one_file in fnmatch.filter(files, wildcard):
for one_file in fnmatch.filter(files, search_expression):
found.append(os.path.join(root, one_file))
return found
@staticmethod
def get_entity_folder_from_file(file, folder_parts):
version = None # OSDU R2 has no version in the file name
top_level = folder_parts[-1]
parts = os.path.split(file)
entity = parts[1].replace('.json', '')
if '.' in entity: # OSDU R3 contains version in file name
vps = entity.split('.') # filename: <entityType>.major.minor.patch - 4 parts
if len(vps) == 4:
entity = vps[0]
version = '.'.join(vps[1:])
else:
exit('Error in entity name/version: {} expected <entityType>.major.minor.patch.json'.format(entity))
parts = parts[0].split(os.sep)
group_type = parts[-1]
folders = list()
collect = False
for part in parts:
......@@ -95,7 +105,7 @@ class Utility(object):
collect = True
elif collect:
folders.append(part)
return entity, folders
return group_type, entity, version, folders
@staticmethod
def __get_root_path(root):
......@@ -115,12 +125,13 @@ class Utility(object):
@staticmethod
def save_json(schema, path, sort_keys=False):
"Save a JSON schema to a file given as path"
os.makedirs(os.path.dirname(path), exist_ok=True)
with open(path, "w") as text_file:
json.dump(schema, text_file, sort_keys=sort_keys, indent=2)
@staticmethod
def get_relative_path(base_path, path):
return os.path.relpath(path, base_path)
return pathlib.Path(os.path.relpath(path, base_path)).as_posix()
@staticmethod
def path_to_deployments():
......
......@@ -12,7 +12,8 @@ schema authors.
##1. Raw Schemas (e.g. original OSDU)
Original OSDU schemas are kept under the [osdu folder](./osdu).
Original OSDU schemas are kept under the [osdu folder](./osdu). The
[source is located here](https://gitlab.opengroup.org/osdu/subcommittees/data-def/work-products/schema/-/tree/master/Generated).
The contents is produced by the Python script [ImportFromOSDU.py](../scripts/ImportFromOSDU.py) .
The structure of JSON files to register matches the expected payload of the Schema Service
......@@ -23,12 +24,12 @@ POST/PUT requests:
"schemaInfo": {
"schemaIdentity": {
"authority": "osdu",
"source": "osdu",
"entity": "WellLogWorkProductComponent",
"schemaVersionMajor": 0,
"schemaVersionMinor": 2,
"source": "wks",
"entity": "work-product-component.WellLog",
"schemaVersionMajor": 1,
"schemaVersionMinor": 0,
"schemaVersionPatch": 0,
"id": "osdu:osdu:WellLogWorkProductComponent:0.2.0"
"id": "osdu:wks:work-product-component.WellLog:1.0.0"
},
"createdBy": "OSDU Data Definition Group",
"scope": "SHARED",
......@@ -45,7 +46,7 @@ Schemas may refer to abstract entity definitions or other external schema fragme
Schema Service requires the abstract definitions and schema fragments to be registered prior
to the registration of the main entity schema. This is achieved by a file defining the
load sequence per schema version. An example can be found
[here for OSDU R2](./osdu/load_sequence.0.2.0.json).
[here for OSDU R3](../shared-schemas/osdu/load_sequence.1.0.0.json).
##2. Upload schema definitions
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment