Commit 696ce38d authored by Mark Hewitt's avatar Mark Hewitt
Browse files

Fix ingestion batch sizes

parent 6728d3eb
Pipeline #69949 passed with stages
in 2 minutes and 11 seconds
......@@ -62,7 +62,7 @@ VERIFY_BATCH_SIZE = 200
@click.option(
"-rl",
"--runid-log",
help="Path to a file containing run ids to get status of (see dataload ingest -h).",
help="Path to a file to save run ids to to later check status of (see dataload status -h).",
)
@click.option(
"-w", "--wait", help="Whether to wait for runs to complete.", is_flag=True, show_default=True
......@@ -141,17 +141,19 @@ def _ingest_files( # noqa:C901 pylint: disable=R0912,too-many-nested-blocks
logger.error("Error with file %s. File is empty.", filepath)
else:
logger.info("Processing %s.", filepath)
if "ReferenceData" in manifest and len(manifest["ReferenceData"]) > 0:
_update_legal_and_acl_tags_all(config, manifest["ReferenceData"])
if batch_size is None and not skip_existing:
_create_and_submit(config, manifest, runids, runid_log_handle, simulate)
else:
data_objects += manifest["ReferenceData"]
if skip_existing and not batch_size:
batch_size = len(data_objects)
file_batch_size = (
len(data_objects) if skip_existing and not batch_size else batch_size
)
data_objects = _process_batch(
config,
batch_size,
file_batch_size,
"ReferenceData",
data_objects,
runids,
......@@ -165,11 +167,12 @@ def _ingest_files( # noqa:C901 pylint: disable=R0912,too-many-nested-blocks
_create_and_submit(config, manifest, runids, runid_log_handle, simulate)
else:
data_objects += manifest["MasterData"]
if skip_existing and not batch_size:
batch_size = len(data_objects)
file_batch_size = (
len(data_objects) if skip_existing and not batch_size else batch_size
)
data_objects = _process_batch(
config,
batch_size,
file_batch_size,
"MasterData",
data_objects,
runids,
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment