Commit dd62c7b9 authored by Mark Hewitt's avatar Mark Hewitt
Browse files

Add first draft of options.

parent 74c95250
Pipeline #85471 failed with stages
in 1 minute and 24 seconds
......@@ -22,7 +22,7 @@ from osducli.click_cli import CustomClickCommand, State, command_with_output
from osducli.cliclient import CliOsduClient, handle_cli_exceptions
from osducli.commands.dataload.status import check_status
from osducli.commands.dataload.verify import batch_verify
from osducli.config import (
from osducli.config import ( # CONFIG_STORAGE_URL,
CONFIG_ACL_OWNER,
CONFIG_ACL_VIEWER,
CONFIG_DATA_PARTITION_ID,
......@@ -52,7 +52,7 @@ VERIFY_BATCH_SIZE = 200
@click.option(
"-b",
"--batch",
help="Batch size (per file). If not specified no batching is performed.",
help="Batch size (per file). If not specified manifests are uploaded as is.",
is_flag=False,
flag_value=200,
type=int,
......@@ -76,6 +76,34 @@ VERIFY_BATCH_SIZE = 200
show_default=True,
)
@click.option("--simulate", help="Simulate ingestion only.", is_flag=True, show_default=True)
@click.option(
"-a",
"--authority",
help="Schema authority to use when generating manifest files.",
default="osdu",
show_default=True,
required=False,
)
@click.option(
"-d",
"--data-partition",
help="A data partition name to use when generating manifest files. If not specified the global value is used.",
)
@click.option(
"-l",
"--legal-tags",
help="Legal tag to use when generating manifest files. If not specified the global value is used.",
)
@click.option(
"-aclo",
"--acl-owners",
help="Acl owners to use when generating manifest files. If not specified the global value is used.",
)
@click.option(
"-aclv",
"--acl-viewers",
help="Acl viewers to use when generating manifest files. If not specified the global value is used.",
)
@handle_cli_exceptions
@command_with_output(None)
def _click_command(
......@@ -87,13 +115,32 @@ def _click_command(
wait: bool = False,
skip_existing: str = False,
simulate: bool = False,
authority: str = None,
data_partition: str = None,
legal_tags: str = None,
acl_owners: str = None,
acl_viewers: str = None,
):
"""Ingest manifest files into OSDU.
This command will take existing manfiest files and load them into OSDU via the workflow
service and Airflow. 'legal' and 'acl' tags will be updated based upon the current
configuration"""
return ingest(state, path, files, batch, runid_log, wait, skip_existing, simulate)
return ingest(
state,
path,
files,
batch,
runid_log,
wait,
skip_existing,
simulate,
authority,
data_partition,
legal_tags,
acl_owners,
acl_viewers,
)
def ingest(
......@@ -105,6 +152,11 @@ def ingest(
wait: bool = False,
skip_existing: bool = False,
simulate: bool = False,
authority: str = None,
data_partition: str = None,
legal_tags: str = None,
acl_owners: str = None,
acl_viewers: str = None,
) -> dict:
"""Ingest manifest files into OSDU
......@@ -352,14 +404,60 @@ def _upload_file(config: CLIConfig, filepath):
if response.status_code not in [200, 201]:
raise CliError(f"({response.status_code}) {response.text[:250]}")
# Need to figure how metadata is handled wrt. ariflow, fileid (returned v's manifest)
# file_metadata = _populate_file_metadata_req_body(file_source)
# _update_legal_and_acl_tags(config, file_metadata)
# print(file_metadata)
# update_metadata_response = connection.cli_post_returning_json(
# CONFIG_FILE_URL,
# "files/metadata",
# json.dumps(file_metadata),
# [200, 201],
# )
# generated_file_id = update_metadata_response.get("id")
# logger.info(
# f"{filepath} is uploaded with file id {generated_file_id} with file source {file_source}"
# )
# # Get record version
# file_record_version_response = connection.cli_get_returning_json(
# CONFIG_STORAGE_URL, "records/versions/" + generated_file_id
# )
# file_record_version = file_record_version_response.get("versions")[0]
# metadata = {
# "file_id": generated_file_id,
# "file_source": file_source,
# "file_record_version": str(file_record_version),
# }
# print(metadata)
# generated_file_id = upload_metadata_response_json.get("id")
# logger.info("%s is uploaded with file id %s with file source %s", filepath, generated_file_id, file_source)
logger.info("%s is uploaded with file source %s", filepath, file_source)
# return generated_file_id, file_source
return file_source
raise CliError(f"No upload location returned: {initiate_upload_response_json}")
def _populate_file_metadata_req_body(file_source):
return {
"kind": "osdu:wks:dataset--File.Generic:1.0.0",
"acl": {
"viewers": [],
"owners": [],
},
"legal": {
"legaltags": [],
"otherRelevantDataCountries": ["US"],
"status": "compliant",
},
"data": {"DatasetProperties": {"FileSourceInfo": {"FileSource": file_source}}},
}
def _update_work_products_metadata(config: CLIConfig, data, files, simulate):
if "WorkProduct" in data:
_update_legal_and_acl_tags(config, data["WorkProduct"])
......@@ -375,16 +473,17 @@ def _update_work_products_metadata(config: CLIConfig, data, files, simulate):
dataset.get("data", {}).get("DatasetProperties", {}).get("FileSourceInfo")
)
# only process if FileSource isn't already specified
if file_source_info and not file_source_info.get("FileSource"):
if not simulate:
file_source_info["FileSource"] = _upload_file(
config, os.path.join(files, file_source_info["Name"])
if file_source_info:
file_path = os.path.join(files, file_source_info["Name"])
if os.path.exists(file_path):
if not simulate:
file_source_info["FileSource"] = _upload_file(config, file_path)
else:
logger.info(
"Local file '%s' not found - skipping.",
file_path,
)
else:
logger.info(
"FileSource already especified for '%s' - skipping.",
file_source_info["Name"],
)
# TO DO: Here we scan by name from filemap
# with open(file_location_map) as file:
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment