From e9d48875aba8ec35d231f3323c4fff7409a7fe7e Mon Sep 17 00:00:00 2001 From: yan <yan_sushchynski@epam.com> Date: Tue, 17 Nov 2020 17:41:56 +0300 Subject: [PATCH] GONRG-1183: Add script to replace invalid values in schemas --- scripts/convert_R2_schemas_to_R3.py | 81 +++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 scripts/convert_R2_schemas_to_R3.py diff --git a/scripts/convert_R2_schemas_to_R3.py b/scripts/convert_R2_schemas_to_R3.py new file mode 100644 index 0000000..fa8572f --- /dev/null +++ b/scripts/convert_R2_schemas_to_R3.py @@ -0,0 +1,81 @@ +import glob +import json +import os +import re + +from collections import UserString + +TENANT = "opendes" +AUTHORITY = "osdu" +SCHEMAS_DIR = os.environ["SCHEMAS_DIR"] + + +class JsonString(UserString): + REF_REGEXP = r"(?P<abstract_perfix>\.\.\/abstract/)(?P<kind_name>\w+)\.(?P<version>\d+\.\d+\.\d+)\.json" + NAMESPACE_REGEXP = r"\<namespace\>" + + def repl_closure(self, match: re.match): + if not match.groups: + print(self.data) + raise Exception + kind_name = match.group('kind_name') + version = match.group('version') + repl = f"{TENANT}:{AUTHORITY}:{kind_name}:{version}" + return repl + + def replace_refs(self): + self.data = re.sub(self.REF_REGEXP, self.repl_closure, self.data) + return self + + def replace_namespaces(self): + self.data = re.sub(self.NAMESPACE_REGEXP, TENANT, self.data) + return self + + @staticmethod + def lower_first_letter(val: str): + if val[0].islower(): + pass + elif val in ( + "ACL", + "Legals", + "ID" + ): + val = val.lower() + else: + val = val.replace(val[0], val[0].lower(), 1) + return val + + def to_pascal_case(self): + tmp_properties = {} + tmp_required = [] + json_file_dict = json.loads(self.data) + try: + if "schemaInfo" in json_file_dict: # if schema has additional fields to be recorded + content = json_file_dict["schema"] + else: + content = json_file_dict + if "properties" in content: + for key, value in content["properties"].items(): + tmp_properties[self.lower_first_letter(key)] = value + content["properties"] = tmp_properties + if "required" in content: + for i in content["required"]: + tmp_required.append(self.lower_first_letter(i)) + content["required"] = tmp_required + self.data = json.dumps(json_file_dict, indent=4) + return self + except Exception as e: + print(self.data) + raise e + + +for file_path in glob.glob(SCHEMAS_DIR + "/*.json"): + try: + with open(file_path, "r") as file: + content = file.read() + content = JsonString(content).replace_refs().replace_namespaces().to_pascal_case().data + with open(file_path, "w") as file: + file.write(content) + except Exception as e: + print(f"Error on file {file_path}") + raise e -- GitLab