Commit 3a27324e authored by Jim King's avatar Jim King Committed by Jørgen Lind
Browse files

multi scan file for multi-file SEGY

parent 9ffe2943
......@@ -18,3 +18,5 @@ libressl*
dms*
cmakerc*
!dms-git
azure-sdk-for-cpp*
libxml2*
......@@ -55,6 +55,16 @@ struct HeaderField
HeaderField(int byteLocation, FieldWidth fieldWidth) : byteLocation(byteLocation), fieldWidth(fieldWidth) {}
bool Defined() const { return byteLocation != 0; }
bool operator==(const HeaderField& hf) const
{
return byteLocation == hf.byteLocation && fieldWidth == hf.fieldWidth;
}
bool operator!=(const HeaderField& hf) const
{
return !(*this == hf);
}
};
namespace BinaryHeader
......
"""
Global configuration information for SEGYImport pytests
"""
test_output_dir = "c:\\temp\\SEGY\\t"
test_data_dir = "c:\\temp\\SEGY\\RegressionTestData"
import os
import subprocess
from pathlib import Path
from typing import List
import pytest
import openvds
from segyimport_test_config import test_data_dir, test_output_dir
@pytest.fixture
def multi_file_input_base_name() -> str:
return "ST0202R08_Gather_Time_pt"
@pytest.fixture
def multi_file_input_parts_count() -> int:
# the multi-file SEGY consists of this many files
return 11
@pytest.fixture
def multi_file_input_dir() -> str:
return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "MultiFile", "ST0202R08_Gather_Time")
@pytest.fixture
def multi_file_input_glob(multi_file_input_dir) -> str:
return os.path.join(multi_file_input_dir, "ST0202R08_Gather_Time_pt??.segy")
@pytest.fixture
def multi_file_input_files(multi_file_input_parts_count, multi_file_input_base_name, multi_file_input_dir) -> List[str]:
filenames = []
for i in range(1, multi_file_input_parts_count + 1):
input_name = f"{multi_file_input_base_name}{i:02}.segy"
filenames.append(os.path.join(multi_file_input_dir, input_name))
return filenames
@pytest.fixture
def multi_file_scan_file_info_files(multi_file_input_parts_count, multi_file_input_base_name) -> List[str]:
"""File info filenames to be output when using --scan"""
filenames = []
for i in range(1, multi_file_input_parts_count + 1):
file_info_name = f"{multi_file_input_base_name}{i:02}.segy.scan.json"
filenames.append(os.path.join(test_output_dir, file_info_name))
return filenames
@pytest.fixture
def multi_file_input_file_info_files(multi_file_input_parts_count, multi_file_input_base_name, multi_file_input_dir) -> List[str]:
"""File info filenames to be used when importing"""
filenames = []
for i in range(1, multi_file_input_parts_count + 1):
file_info_name = f"{multi_file_input_base_name}{i:02}.segy.scan.json"
filenames.append(os.path.join(multi_file_input_dir, file_info_name))
return filenames
class ImportExecutor:
def __init__(self):
import_exe = os.getenv("SEGYIMPORT_EXECUTABLE")
if not import_exe:
raise EnvironmentError("SEGYIMPORT_EXECUTABLE environment variable is not set")
self.args = [import_exe]
self.run_result = None
def add_arg(self, more_arg: str):
self.args.append(more_arg)
def add_args(self, more_args: List[str]):
self.args.extend(more_args)
def run(self) -> int:
self.run_result = subprocess.run(self.args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
return self.run_result.returncode
def output(self) -> str:
if self.run_result:
return str(self.run_result.stdout)
return ""
def command_line(self) -> str:
"""Convenience method to return a string showing the command and arguments"""
return " ".join(self.args)
pass
def test_multi_file_scan_one_file_info(multi_file_input_glob):
"""
Tests --scan with multiple input SEGY files, but only one file info file specified.
"""
file_info_filename = os.path.join(test_output_dir, "test.scan.json")
ex = ImportExecutor()
ex.add_args(["--prestack", "--scan", "--file-info", file_info_filename, multi_file_input_glob])
result = ex.run()
# import should have failed
assert result > 0, ex.output()
# error message should contain this
assert "Different number of input SEG-Y file names and file-info file names".lower() in ex.output().lower()
def test_multi_file_scan(multi_file_input_files, multi_file_scan_file_info_files):
ex = ImportExecutor()
ex.add_args(["--prestack", "--scan"])
for filename in multi_file_scan_file_info_files:
ex.add_args(["--file-info", filename])
# ensure the output file doesn't exist
if Path(filename).exists():
os.remove(filename)
ex.add_args(multi_file_input_files)
result = ex.run()
# import should have succeeded
assert result == 0, ex.output()
# output files should exist
for filename in multi_file_scan_file_info_files:
assert Path(filename).exists()
def test_multi_file_import_with_file_infos(multi_file_input_files, multi_file_input_file_info_files):
ex = ImportExecutor()
ex.add_arg("--prestack")
vds_filename = os.path.join(test_output_dir, "import_test.vds")
ex.add_args(["--vdsfile", vds_filename])
for filename in multi_file_input_file_info_files:
ex.add_args(["--file-info", filename])
ex.add_args(multi_file_input_files)
result = ex.run()
# import should have succeeded
assert result == 0, ex.output()
# output file should exist
assert Path(vds_filename).exists()
# check dimensions of VDS
with openvds.open(vds_filename, "") as handle:
layout = openvds.getLayout(handle)
assert layout.dimensionality == 4
assert layout.numSamples[0] == 851
assert layout.numSamples[1] == 100
assert layout.numSamples[2] == 71
assert layout.numSamples[3] == 21
......@@ -1079,9 +1079,9 @@ createImportInformationMetadata(const std::vector<DataProvider> &dataProviders,
}
bool
parseSEGYFileInfoFile(DataProvider &dataProvider, SEGYFileInfo& fileInfo, OpenVDS::Error &error)
parseSEGYFileInfoFile(const DataProvider& dataProvider, SEGYFileInfo& fileInfo, const size_t fileIndex, const bool isValidateHeader, OpenVDS::Error& error)
{
int64_t fileSize = dataProvider.Size(error);
const int64_t fileSize = dataProvider.Size(error);
if (error.code != 0)
{
......@@ -1129,22 +1129,52 @@ parseSEGYFileInfoFile(DataProvider &dataProvider, SEGYFileInfo& fileInfo, OpenVD
return false;
}
fileInfo.m_persistentID = strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16);
fileInfo.m_headerEndianness = EndiannessFromJson(jsonFileInfo["headerEndianness"]);
fileInfo.m_dataSampleFormatCode = SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt());
fileInfo.m_sampleCount = jsonFileInfo["sampleCount"].asInt();
fileInfo.m_startTimeMilliseconds = jsonFileInfo["startTime"].asDouble();
fileInfo.m_sampleIntervalMilliseconds = jsonFileInfo["sampleInterval"].asDouble();
fileInfo.m_traceCounts.clear();
fileInfo.m_traceCounts.push_back(jsonFileInfo["traceCount"].asInt64());
fileInfo.m_primaryKey = HeaderFieldFromJson(jsonFileInfo["primaryKey"]);
fileInfo.m_secondaryKey = HeaderFieldFromJson(jsonFileInfo["secondaryKey"]);
if (isValidateHeader)
{
const bool
isValid =
fileInfo.m_persistentID == strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16) &&
fileInfo.m_headerEndianness == EndiannessFromJson(jsonFileInfo["headerEndianness"]) &&
fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()) &&
fileInfo.m_sampleCount == jsonFileInfo["sampleCount"].asInt() &&
fileInfo.m_sampleIntervalMilliseconds == jsonFileInfo["sampleInterval"].asDouble() &&
fileInfo.m_primaryKey == HeaderFieldFromJson(jsonFileInfo["primaryKey"]) &&
fileInfo.m_secondaryKey == HeaderFieldFromJson(jsonFileInfo["secondaryKey"]);
if (!isValid)
{
error.string = "SEGY header data in JSON scan file does not match existing SEGY header data";
error.code = -1;
return false;
}
}
else
{
fileInfo.m_persistentID = strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16);
fileInfo.m_headerEndianness = EndiannessFromJson(jsonFileInfo["headerEndianness"]);
fileInfo.m_dataSampleFormatCode = SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt());
fileInfo.m_sampleCount = jsonFileInfo["sampleCount"].asInt();
fileInfo.m_startTimeMilliseconds = jsonFileInfo["startTime"].asDouble();
fileInfo.m_sampleIntervalMilliseconds = jsonFileInfo["sampleInterval"].asDouble();
fileInfo.m_traceCounts.clear();
fileInfo.m_traceCounts.push_back(jsonFileInfo["traceCount"].asInt64());
fileInfo.m_primaryKey = HeaderFieldFromJson(jsonFileInfo["primaryKey"]);
fileInfo.m_secondaryKey = HeaderFieldFromJson(jsonFileInfo["secondaryKey"]);
}
fileInfo.m_segmentInfoLists.clear();
fileInfo.m_segmentInfoLists.emplace_back();
if (fileInfo.m_traceCounts.size() <= fileIndex)
{
fileInfo.m_traceCounts.resize(fileIndex + 1);
}
fileInfo.m_traceCounts[fileIndex] = jsonFileInfo["traceCount"].asInt64();
if (fileInfo.m_segmentInfoLists.size() <= fileIndex)
{
fileInfo.m_segmentInfoLists.resize(fileIndex + 1);
}
auto&
segmentInfo = fileInfo.m_segmentInfoLists.back();
for (Json::Value jsonSegmentInfo : jsonFileInfo["segmentInfo"])
segmentInfo = fileInfo.m_segmentInfoLists[fileIndex];
segmentInfo.clear();
for (const auto& jsonSegmentInfo : jsonFileInfo["segmentInfo"])
{
segmentInfo.push_back(segmentInfoFromJson(jsonSegmentInfo));
}
......@@ -1457,7 +1487,7 @@ main(int argc, char* argv[])
double sampleStart = 0;
bool littleEndian = false;
bool scan = false;
std::string fileInfoFileName;
std::vector<std::string> fileInfoFileNames;
int brickSize = 64;
int margin = 0;
bool force = false;
......@@ -1499,7 +1529,7 @@ main(int argc, char* argv[])
options.add_option("", "", "crs-wkt", "A coordinate reference system in well-known text format can optionally be provided", cxxopts::value<std::string>(crsWkt), "<string>");
options.add_option("", "l", "little-endian", "Force little-endian trace headers.", cxxopts::value<bool>(littleEndian), "");
options.add_option("", "", "scan", "Generate a JSON file containing information about the input SEG-Y file.", cxxopts::value<bool>(scan), "");
options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about the input SEG-Y file.", cxxopts::value<std::string>(fileInfoFileName), "<file>");
options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about an input SEG-Y file.", cxxopts::value<std::vector<std::string>>(fileInfoFileNames), "<file>");
options.add_option("", "b", "brick-size", "The brick size for the volume data store.", cxxopts::value<int>(brickSize), "<value>");
options.add_option("", "", "margin", "The margin size (overlap) of the bricks.", cxxopts::value<int>(margin), "<value>");
options.add_option("", "f", "force", "Continue on upload error.", cxxopts::value<bool>(force), "");
......@@ -1543,6 +1573,12 @@ main(int argc, char* argv[])
return EXIT_FAILURE;
}
if (!fileInfoFileNames.empty() && fileInfoFileNames.size() != fileNames.size())
{
OpenVDS::printError(jsonOutput, "Args", "Different number of input SEG-Y file names and file-info file names. Use multiple --file-info arguments to specify a file-info file for each input SEG-Y file.");
return EXIT_FAILURE;
}
if (help)
{
OpenVDS::printInfo(jsonOutput, "Args", options.help());
......@@ -1733,7 +1769,7 @@ main(int argc, char* argv[])
fileInfo.m_segyType = segyType;
// Scan the file if '--scan' was passed or we're uploading but no fileInfo file was specified
if(scan || fileInfoFileName.empty())
if(scan || fileInfoFileNames.empty())
{
if(!uniqueID)
{
......@@ -1764,98 +1800,107 @@ main(int argc, char* argv[])
// If we are in scan mode we serialize the result of the file scan either to a fileInfo file (if specified) or to stdout and exit
if(scan)
{
// TODO if we have multiple input files we need to serialize multiple scan files
Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, 0);
Json::StreamWriterBuilder wbuilder;
wbuilder["indentation"] = " ";
std::string document = Json::writeString(wbuilder, jsonFileInfo);
if (fileInfoFileName.empty())
{
fmt::print(stdout, "{}", document);
}
else
for (int fileIndex = 0; fileIndex < fileNames.size(); ++fileIndex)
{
OpenVDS::Error
error;
Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, fileIndex);
if (OpenVDS::IsSupportedProtocol(fileInfoFileName))
Json::StreamWriterBuilder wbuilder;
wbuilder["indentation"] = " ";
std::string document = Json::writeString(wbuilder, jsonFileInfo);
if (fileInfoFileNames.empty())
{
std::string dirname;
std::string basename;
std::string parameters;
splitUrl(fileInfoFileName, dirname, basename, parameters, error);
if (error.code)
{
OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
std::string scanUrl = dirname + parameters;
std::unique_ptr<OpenVDS::IOManager> ioManager(OpenVDS::IOManager::CreateIOManager(scanUrl, urlConnection, OpenVDS::IOManager::ReadWrite, error));
if (error.code)
{
OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
auto shared_data = std::make_shared<std::vector<uint8_t>>();
shared_data->insert(shared_data->end(), document.begin(), document.end());
auto req = ioManager->WriteObject(basename, "", "text/plain", {}, shared_data, {});
req->WaitForFinish(error);
if (error.code)
{
OpenVDS::printError(jsonOutput, "IO", "Failed to write", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
fmt::print(stdout, "{}", document);
}
else
{
OpenVDS::File
fileInfoFile;
OpenVDS::Error
error;
const auto
& fileInfoFileName = fileInfoFileNames[fileIndex];
fileInfoFile.Open(fileInfoFileName.c_str(), true, false, true, error);
if (error.code != 0)
if (OpenVDS::IsSupportedProtocol(fileInfoFileName))
{
OpenVDS::printError(jsonOutput, "IO", "Could not create file info file", fileInfoFileName);
return EXIT_FAILURE;
std::string dirname;
std::string basename;
std::string parameters;
splitUrl(fileInfoFileName, dirname, basename, parameters, error);
if (error.code)
{
OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
std::string scanUrl = dirname + parameters;
std::unique_ptr<OpenVDS::IOManager> ioManager(OpenVDS::IOManager::CreateIOManager(scanUrl, urlConnection, OpenVDS::IOManager::ReadWrite, error));
if (error.code)
{
OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
auto shared_data = std::make_shared<std::vector<uint8_t>>();
shared_data->insert(shared_data->end(), document.begin(), document.end());
auto req = ioManager->WriteObject(basename, "", "text/plain", {}, shared_data, {});
req->WaitForFinish(error);
if (error.code)
{
OpenVDS::printError(jsonOutput, "IO", "Failed to write", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
}
else
{
OpenVDS::File
fileInfoFile;
fileInfoFile.Write(document.data(), 0, (int32_t)document.size(), error);
fileInfoFile.Open(fileInfoFileName, true, false, true, error);
if (error.code != 0)
{
OpenVDS::printError(jsonOutput, "IO", "Could not write file info to file", fileInfoFileName);
return EXIT_FAILURE;
if (error.code != 0)
{
OpenVDS::printError(jsonOutput, "IO", "Could not create file info file", fileInfoFileName);
return EXIT_FAILURE;
}
fileInfoFile.Write(document.data(), 0, (int32_t)document.size(), error);
if (error.code != 0)
{
OpenVDS::printError(jsonOutput, "IO", "Could not write file info to file", fileInfoFileName);
return EXIT_FAILURE;
}
}
}
}
return EXIT_SUCCESS;
}
}
else if (!fileInfoFileName.empty())
else if (!fileInfoFileNames.empty())
{
OpenVDS::Error
error;
DataProvider fileInfoDataProvider = CreateDataProvider(fileInfoFileName, inputConnection, error);
if (error.code != 0)
for (int fileIndex = 0; fileIndex < fileInfoFileNames.size(); ++fileIndex)
{
OpenVDS::printError(jsonOutput, "IO", "Could not create data provider for", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
const auto
& fileInfoFileName = fileInfoFileNames[fileIndex];
bool success = parseSEGYFileInfoFile(fileInfoDataProvider, fileInfo, error);
DataProvider fileInfoDataProvider = CreateDataProvider(fileInfoFileName, inputConnection, error);
if (!success)
{
OpenVDS::printError(jsonOutput, "FileInfo", "Parse SEGYFileInfo", fileInfoFileName, error.string);
return EXIT_FAILURE;
if (error.code != 0)
{
OpenVDS::printError(jsonOutput, "IO", "Could not create data provider for", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
bool success = parseSEGYFileInfoFile(fileInfoDataProvider, fileInfo, fileIndex, fileIndex != 0, error);
if (!success)
{
OpenVDS::printError(jsonOutput, "FileInfo", "Parse SEGYFileInfo", fileInfoFileName, error.string);
return EXIT_FAILURE;
}
}
if(overrideSampleStart)
if (overrideSampleStart)
{
fileInfo.m_startTimeMilliseconds = sampleStart;
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment