From 3a27324ed00187b8c27084ea820fd504f845d321 Mon Sep 17 00:00:00 2001 From: Jim King Date: Thu, 2 Sep 2021 13:20:08 -0500 Subject: [PATCH 01/46] multi scan file for multi-file SEGY --- 3rdparty/.gitignore | 2 + src/SEGYUtils/SEGYUtils/SEGY.h | 10 + tests/tools/SEGYImport/pytests/.gitignore | 1 + .../pytests/segyimport_test_config.py | 6 + .../pytests/test_multi_file_scan.py | 158 +++++++++++++ tools/SEGYImport/SEGYImport.cpp | 215 +++++++++++------- 6 files changed, 307 insertions(+), 85 deletions(-) create mode 100644 tests/tools/SEGYImport/pytests/.gitignore create mode 100644 tests/tools/SEGYImport/pytests/segyimport_test_config.py create mode 100644 tests/tools/SEGYImport/pytests/test_multi_file_scan.py diff --git a/3rdparty/.gitignore b/3rdparty/.gitignore index e8d3639a..2e418824 100644 --- a/3rdparty/.gitignore +++ b/3rdparty/.gitignore @@ -18,3 +18,5 @@ libressl* dms* cmakerc* !dms-git +azure-sdk-for-cpp* +libxml2* diff --git a/src/SEGYUtils/SEGYUtils/SEGY.h b/src/SEGYUtils/SEGYUtils/SEGY.h index 37038f63..6a0b57c7 100644 --- a/src/SEGYUtils/SEGYUtils/SEGY.h +++ b/src/SEGYUtils/SEGYUtils/SEGY.h @@ -55,6 +55,16 @@ struct HeaderField HeaderField(int byteLocation, FieldWidth fieldWidth) : byteLocation(byteLocation), fieldWidth(fieldWidth) {} bool Defined() const { return byteLocation != 0; } + + bool operator==(const HeaderField& hf) const + { + return byteLocation == hf.byteLocation && fieldWidth == hf.fieldWidth; + } + + bool operator!=(const HeaderField& hf) const + { + return !(*this == hf); + } }; namespace BinaryHeader diff --git a/tests/tools/SEGYImport/pytests/.gitignore b/tests/tools/SEGYImport/pytests/.gitignore new file mode 100644 index 00000000..a348e504 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/.gitignore @@ -0,0 +1 @@ +/__pycache__/ diff --git a/tests/tools/SEGYImport/pytests/segyimport_test_config.py b/tests/tools/SEGYImport/pytests/segyimport_test_config.py new file mode 100644 index 00000000..19b2fa08 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/segyimport_test_config.py @@ -0,0 +1,6 @@ +""" +Global configuration information for SEGYImport pytests +""" + +test_output_dir = "c:\\temp\\SEGY\\t" +test_data_dir = "c:\\temp\\SEGY\\RegressionTestData" diff --git a/tests/tools/SEGYImport/pytests/test_multi_file_scan.py b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py new file mode 100644 index 00000000..fd297edb --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py @@ -0,0 +1,158 @@ +import os +import subprocess +from pathlib import Path +from typing import List + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, test_output_dir + + +@pytest.fixture +def multi_file_input_base_name() -> str: + return "ST0202R08_Gather_Time_pt" + + +@pytest.fixture +def multi_file_input_parts_count() -> int: + # the multi-file SEGY consists of this many files + return 11 + + +@pytest.fixture +def multi_file_input_dir() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "MultiFile", "ST0202R08_Gather_Time") + + +@pytest.fixture +def multi_file_input_glob(multi_file_input_dir) -> str: + return os.path.join(multi_file_input_dir, "ST0202R08_Gather_Time_pt??.segy") + + +@pytest.fixture +def multi_file_input_files(multi_file_input_parts_count, multi_file_input_base_name, multi_file_input_dir) -> List[str]: + filenames = [] + for i in range(1, multi_file_input_parts_count + 1): + input_name = f"{multi_file_input_base_name}{i:02}.segy" + filenames.append(os.path.join(multi_file_input_dir, input_name)) + return filenames + + +@pytest.fixture +def multi_file_scan_file_info_files(multi_file_input_parts_count, multi_file_input_base_name) -> List[str]: + """File info filenames to be output when using --scan""" + filenames = [] + for i in range(1, multi_file_input_parts_count + 1): + file_info_name = f"{multi_file_input_base_name}{i:02}.segy.scan.json" + filenames.append(os.path.join(test_output_dir, file_info_name)) + return filenames + + +@pytest.fixture +def multi_file_input_file_info_files(multi_file_input_parts_count, multi_file_input_base_name, multi_file_input_dir) -> List[str]: + """File info filenames to be used when importing""" + filenames = [] + for i in range(1, multi_file_input_parts_count + 1): + file_info_name = f"{multi_file_input_base_name}{i:02}.segy.scan.json" + filenames.append(os.path.join(multi_file_input_dir, file_info_name)) + return filenames + + +class ImportExecutor: + def __init__(self): + import_exe = os.getenv("SEGYIMPORT_EXECUTABLE") + if not import_exe: + raise EnvironmentError("SEGYIMPORT_EXECUTABLE environment variable is not set") + self.args = [import_exe] + self.run_result = None + + def add_arg(self, more_arg: str): + self.args.append(more_arg) + + def add_args(self, more_args: List[str]): + self.args.extend(more_args) + + def run(self) -> int: + self.run_result = subprocess.run(self.args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + return self.run_result.returncode + + def output(self) -> str: + if self.run_result: + return str(self.run_result.stdout) + return "" + + def command_line(self) -> str: + """Convenience method to return a string showing the command and arguments""" + return " ".join(self.args) + pass + + +def test_multi_file_scan_one_file_info(multi_file_input_glob): + """ + Tests --scan with multiple input SEGY files, but only one file info file specified. + """ + file_info_filename = os.path.join(test_output_dir, "test.scan.json") + + ex = ImportExecutor() + ex.add_args(["--prestack", "--scan", "--file-info", file_info_filename, multi_file_input_glob]) + result = ex.run() + + # import should have failed + assert result > 0, ex.output() + + # error message should contain this + assert "Different number of input SEG-Y file names and file-info file names".lower() in ex.output().lower() + + +def test_multi_file_scan(multi_file_input_files, multi_file_scan_file_info_files): + ex = ImportExecutor() + ex.add_args(["--prestack", "--scan"]) + + for filename in multi_file_scan_file_info_files: + ex.add_args(["--file-info", filename]) + + # ensure the output file doesn't exist + if Path(filename).exists(): + os.remove(filename) + + ex.add_args(multi_file_input_files) + + result = ex.run() + + # import should have succeeded + assert result == 0, ex.output() + + # output files should exist + for filename in multi_file_scan_file_info_files: + assert Path(filename).exists() + + +def test_multi_file_import_with_file_infos(multi_file_input_files, multi_file_input_file_info_files): + ex = ImportExecutor() + ex.add_arg("--prestack") + + vds_filename = os.path.join(test_output_dir, "import_test.vds") + ex.add_args(["--vdsfile", vds_filename]) + + for filename in multi_file_input_file_info_files: + ex.add_args(["--file-info", filename]) + + ex.add_args(multi_file_input_files) + + result = ex.run() + + # import should have succeeded + assert result == 0, ex.output() + + # output file should exist + assert Path(vds_filename).exists() + + # check dimensions of VDS + with openvds.open(vds_filename, "") as handle: + layout = openvds.getLayout(handle) + assert layout.dimensionality == 4 + assert layout.numSamples[0] == 851 + assert layout.numSamples[1] == 100 + assert layout.numSamples[2] == 71 + assert layout.numSamples[3] == 21 diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index f45f094a..8d6332ed 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -1079,9 +1079,9 @@ createImportInformationMetadata(const std::vector &dataProviders, } bool -parseSEGYFileInfoFile(DataProvider &dataProvider, SEGYFileInfo& fileInfo, OpenVDS::Error &error) +parseSEGYFileInfoFile(const DataProvider& dataProvider, SEGYFileInfo& fileInfo, const size_t fileIndex, const bool isValidateHeader, OpenVDS::Error& error) { - int64_t fileSize = dataProvider.Size(error); + const int64_t fileSize = dataProvider.Size(error); if (error.code != 0) { @@ -1129,22 +1129,52 @@ parseSEGYFileInfoFile(DataProvider &dataProvider, SEGYFileInfo& fileInfo, OpenVD return false; } - fileInfo.m_persistentID = strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16); - fileInfo.m_headerEndianness = EndiannessFromJson(jsonFileInfo["headerEndianness"]); - fileInfo.m_dataSampleFormatCode = SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()); - fileInfo.m_sampleCount = jsonFileInfo["sampleCount"].asInt(); - fileInfo.m_startTimeMilliseconds = jsonFileInfo["startTime"].asDouble(); - fileInfo.m_sampleIntervalMilliseconds = jsonFileInfo["sampleInterval"].asDouble(); - fileInfo.m_traceCounts.clear(); - fileInfo.m_traceCounts.push_back(jsonFileInfo["traceCount"].asInt64()); - fileInfo.m_primaryKey = HeaderFieldFromJson(jsonFileInfo["primaryKey"]); - fileInfo.m_secondaryKey = HeaderFieldFromJson(jsonFileInfo["secondaryKey"]); + if (isValidateHeader) + { + const bool + isValid = + fileInfo.m_persistentID == strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16) && + fileInfo.m_headerEndianness == EndiannessFromJson(jsonFileInfo["headerEndianness"]) && + fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()) && + fileInfo.m_sampleCount == jsonFileInfo["sampleCount"].asInt() && + fileInfo.m_sampleIntervalMilliseconds == jsonFileInfo["sampleInterval"].asDouble() && + fileInfo.m_primaryKey == HeaderFieldFromJson(jsonFileInfo["primaryKey"]) && + fileInfo.m_secondaryKey == HeaderFieldFromJson(jsonFileInfo["secondaryKey"]); + if (!isValid) + { + error.string = "SEGY header data in JSON scan file does not match existing SEGY header data"; + error.code = -1; + return false; + } + } + else + { + fileInfo.m_persistentID = strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16); + fileInfo.m_headerEndianness = EndiannessFromJson(jsonFileInfo["headerEndianness"]); + fileInfo.m_dataSampleFormatCode = SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()); + fileInfo.m_sampleCount = jsonFileInfo["sampleCount"].asInt(); + fileInfo.m_startTimeMilliseconds = jsonFileInfo["startTime"].asDouble(); + fileInfo.m_sampleIntervalMilliseconds = jsonFileInfo["sampleInterval"].asDouble(); + fileInfo.m_traceCounts.clear(); + fileInfo.m_traceCounts.push_back(jsonFileInfo["traceCount"].asInt64()); + fileInfo.m_primaryKey = HeaderFieldFromJson(jsonFileInfo["primaryKey"]); + fileInfo.m_secondaryKey = HeaderFieldFromJson(jsonFileInfo["secondaryKey"]); + } - fileInfo.m_segmentInfoLists.clear(); - fileInfo.m_segmentInfoLists.emplace_back(); + if (fileInfo.m_traceCounts.size() <= fileIndex) + { + fileInfo.m_traceCounts.resize(fileIndex + 1); + } + fileInfo.m_traceCounts[fileIndex] = jsonFileInfo["traceCount"].asInt64(); + + if (fileInfo.m_segmentInfoLists.size() <= fileIndex) + { + fileInfo.m_segmentInfoLists.resize(fileIndex + 1); + } auto& - segmentInfo = fileInfo.m_segmentInfoLists.back(); - for (Json::Value jsonSegmentInfo : jsonFileInfo["segmentInfo"]) + segmentInfo = fileInfo.m_segmentInfoLists[fileIndex]; + segmentInfo.clear(); + for (const auto& jsonSegmentInfo : jsonFileInfo["segmentInfo"]) { segmentInfo.push_back(segmentInfoFromJson(jsonSegmentInfo)); } @@ -1457,7 +1487,7 @@ main(int argc, char* argv[]) double sampleStart = 0; bool littleEndian = false; bool scan = false; - std::string fileInfoFileName; + std::vector fileInfoFileNames; int brickSize = 64; int margin = 0; bool force = false; @@ -1499,7 +1529,7 @@ main(int argc, char* argv[]) options.add_option("", "", "crs-wkt", "A coordinate reference system in well-known text format can optionally be provided", cxxopts::value(crsWkt), ""); options.add_option("", "l", "little-endian", "Force little-endian trace headers.", cxxopts::value(littleEndian), ""); options.add_option("", "", "scan", "Generate a JSON file containing information about the input SEG-Y file.", cxxopts::value(scan), ""); - options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about the input SEG-Y file.", cxxopts::value(fileInfoFileName), ""); + options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about an input SEG-Y file.", cxxopts::value>(fileInfoFileNames), ""); options.add_option("", "b", "brick-size", "The brick size for the volume data store.", cxxopts::value(brickSize), ""); options.add_option("", "", "margin", "The margin size (overlap) of the bricks.", cxxopts::value(margin), ""); options.add_option("", "f", "force", "Continue on upload error.", cxxopts::value(force), ""); @@ -1543,6 +1573,12 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } + if (!fileInfoFileNames.empty() && fileInfoFileNames.size() != fileNames.size()) + { + OpenVDS::printError(jsonOutput, "Args", "Different number of input SEG-Y file names and file-info file names. Use multiple --file-info arguments to specify a file-info file for each input SEG-Y file."); + return EXIT_FAILURE; + } + if (help) { OpenVDS::printInfo(jsonOutput, "Args", options.help()); @@ -1733,7 +1769,7 @@ main(int argc, char* argv[]) fileInfo.m_segyType = segyType; // Scan the file if '--scan' was passed or we're uploading but no fileInfo file was specified - if(scan || fileInfoFileName.empty()) + if(scan || fileInfoFileNames.empty()) { if(!uniqueID) { @@ -1764,98 +1800,107 @@ main(int argc, char* argv[]) // If we are in scan mode we serialize the result of the file scan either to a fileInfo file (if specified) or to stdout and exit if(scan) { - // TODO if we have multiple input files we need to serialize multiple scan files - Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, 0); - - Json::StreamWriterBuilder wbuilder; - wbuilder["indentation"] = " "; - std::string document = Json::writeString(wbuilder, jsonFileInfo); - - if (fileInfoFileName.empty()) - { - fmt::print(stdout, "{}", document); - } - else + for (int fileIndex = 0; fileIndex < fileNames.size(); ++fileIndex) { - OpenVDS::Error - error; + Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, fileIndex); - if (OpenVDS::IsSupportedProtocol(fileInfoFileName)) + Json::StreamWriterBuilder wbuilder; + wbuilder["indentation"] = " "; + std::string document = Json::writeString(wbuilder, jsonFileInfo); + + if (fileInfoFileNames.empty()) { - std::string dirname; - std::string basename; - std::string parameters; - splitUrl(fileInfoFileName, dirname, basename, parameters, error); - if (error.code) - { - OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); - return EXIT_FAILURE; - } - std::string scanUrl = dirname + parameters; - std::unique_ptr ioManager(OpenVDS::IOManager::CreateIOManager(scanUrl, urlConnection, OpenVDS::IOManager::ReadWrite, error)); - if (error.code) - { - OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); - return EXIT_FAILURE; - } - auto shared_data = std::make_shared>(); - shared_data->insert(shared_data->end(), document.begin(), document.end()); - auto req = ioManager->WriteObject(basename, "", "text/plain", {}, shared_data, {}); - req->WaitForFinish(error); - if (error.code) - { - OpenVDS::printError(jsonOutput, "IO", "Failed to write", fileInfoFileName, error.string); - return EXIT_FAILURE; - } + fmt::print(stdout, "{}", document); } else { - OpenVDS::File - fileInfoFile; + OpenVDS::Error + error; + const auto + & fileInfoFileName = fileInfoFileNames[fileIndex]; - fileInfoFile.Open(fileInfoFileName.c_str(), true, false, true, error); - - if (error.code != 0) + if (OpenVDS::IsSupportedProtocol(fileInfoFileName)) { - OpenVDS::printError(jsonOutput, "IO", "Could not create file info file", fileInfoFileName); - return EXIT_FAILURE; + std::string dirname; + std::string basename; + std::string parameters; + splitUrl(fileInfoFileName, dirname, basename, parameters, error); + if (error.code) + { + OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); + return EXIT_FAILURE; + } + std::string scanUrl = dirname + parameters; + std::unique_ptr ioManager(OpenVDS::IOManager::CreateIOManager(scanUrl, urlConnection, OpenVDS::IOManager::ReadWrite, error)); + if (error.code) + { + OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); + return EXIT_FAILURE; + } + auto shared_data = std::make_shared>(); + shared_data->insert(shared_data->end(), document.begin(), document.end()); + auto req = ioManager->WriteObject(basename, "", "text/plain", {}, shared_data, {}); + req->WaitForFinish(error); + if (error.code) + { + OpenVDS::printError(jsonOutput, "IO", "Failed to write", fileInfoFileName, error.string); + return EXIT_FAILURE; + } } + else + { + OpenVDS::File + fileInfoFile; - fileInfoFile.Write(document.data(), 0, (int32_t)document.size(), error); + fileInfoFile.Open(fileInfoFileName, true, false, true, error); - if (error.code != 0) - { - OpenVDS::printError(jsonOutput, "IO", "Could not write file info to file", fileInfoFileName); - return EXIT_FAILURE; + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "IO", "Could not create file info file", fileInfoFileName); + return EXIT_FAILURE; + } + + fileInfoFile.Write(document.data(), 0, (int32_t)document.size(), error); + + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "IO", "Could not write file info to file", fileInfoFileName); + return EXIT_FAILURE; + } } } - } return EXIT_SUCCESS; } } - else if (!fileInfoFileName.empty()) + else if (!fileInfoFileNames.empty()) { OpenVDS::Error error; - DataProvider fileInfoDataProvider = CreateDataProvider(fileInfoFileName, inputConnection, error); - - if (error.code != 0) + for (int fileIndex = 0; fileIndex < fileInfoFileNames.size(); ++fileIndex) { - OpenVDS::printError(jsonOutput, "IO", "Could not create data provider for", fileInfoFileName, error.string); - return EXIT_FAILURE; - } + const auto + & fileInfoFileName = fileInfoFileNames[fileIndex]; - bool success = parseSEGYFileInfoFile(fileInfoDataProvider, fileInfo, error); + DataProvider fileInfoDataProvider = CreateDataProvider(fileInfoFileName, inputConnection, error); - if (!success) - { - OpenVDS::printError(jsonOutput, "FileInfo", "Parse SEGYFileInfo", fileInfoFileName, error.string); - return EXIT_FAILURE; + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "IO", "Could not create data provider for", fileInfoFileName, error.string); + return EXIT_FAILURE; + } + + bool success = parseSEGYFileInfoFile(fileInfoDataProvider, fileInfo, fileIndex, fileIndex != 0, error); + + if (!success) + { + OpenVDS::printError(jsonOutput, "FileInfo", "Parse SEGYFileInfo", fileInfoFileName, error.string); + return EXIT_FAILURE; + } } - if(overrideSampleStart) + if (overrideSampleStart) { fileInfo.m_startTimeMilliseconds = sampleStart; } -- GitLab From d0f57c28581517655c7b433bb470de89a10b878c Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 3 Sep 2021 12:56:05 -0500 Subject: [PATCH 02/46] add options for attribute name and unit --- .../pytests/segyimport_test_config.py | 32 +++++ .../SEGYImport/pytests/test_attribute_name.py | 116 ++++++++++++++++++ .../pytests/test_multi_file_scan.py | 32 +---- tools/SEGYImport/SEGYImport.cpp | 102 +++++++++------ 4 files changed, 213 insertions(+), 69 deletions(-) create mode 100644 tests/tools/SEGYImport/pytests/test_attribute_name.py diff --git a/tests/tools/SEGYImport/pytests/segyimport_test_config.py b/tests/tools/SEGYImport/pytests/segyimport_test_config.py index 19b2fa08..b69135f5 100644 --- a/tests/tools/SEGYImport/pytests/segyimport_test_config.py +++ b/tests/tools/SEGYImport/pytests/segyimport_test_config.py @@ -1,6 +1,38 @@ """ Global configuration information for SEGYImport pytests """ +import os +import subprocess +from typing import List test_output_dir = "c:\\temp\\SEGY\\t" test_data_dir = "c:\\temp\\SEGY\\RegressionTestData" + + +class ImportExecutor: + def __init__(self): + import_exe = os.getenv("SEGYIMPORT_EXECUTABLE") + if not import_exe: + raise EnvironmentError("SEGYIMPORT_EXECUTABLE environment variable is not set") + self.args = [import_exe] + self.run_result = None + + def add_arg(self, more_arg: str): + self.args.append(more_arg) + + def add_args(self, more_args: List[str]): + self.args.extend(more_args) + + def run(self) -> int: + self.run_result = subprocess.run(self.args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + return self.run_result.returncode + + def output(self) -> str: + if self.run_result: + return str(self.run_result.stdout) + return "" + + def command_line(self) -> str: + """Convenience method to return a string showing the command and arguments""" + return " ".join(self.args) + pass \ No newline at end of file diff --git a/tests/tools/SEGYImport/pytests/test_attribute_name.py b/tests/tools/SEGYImport/pytests/test_attribute_name.py new file mode 100644 index 00000000..b9c65377 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_attribute_name.py @@ -0,0 +1,116 @@ +import os +from pathlib import Path + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, test_output_dir, ImportExecutor + + +@pytest.fixture +def poststack_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport", "Teleport_Trim", + "3D_Stack", "ST0202R08_TIME.segy") + + +@pytest.fixture +def output_vds() -> str: + vds_filename = os.path.join(test_output_dir, "import_test.vds") + if Path(vds_filename).exists(): + os.remove(vds_filename) + return vds_filename + + +def test_default_attribute_and_unit(poststack_segy, output_vds): + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds).exists() + + with openvds.open(output_vds, "") as handle: + layout = openvds.getLayout(handle) + descriptor = layout.getChannelDescriptor(0) + assert descriptor.name == "Amplitude" + assert descriptor.unit == "" + + +def test_custom_attribute_name(poststack_segy, output_vds): + custom_attribute_name = "Vrms" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--attribute-name", custom_attribute_name]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds).exists() + + with openvds.open(output_vds, "") as handle: + layout = openvds.getLayout(handle) + descriptor = layout.getChannelDescriptor(0) + assert descriptor.name == custom_attribute_name + assert descriptor.unit == "" + + +def test_custom_attribute_unit(poststack_segy, output_vds): + custom_attribute_unit = "Hz" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--attribute-unit", custom_attribute_unit]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds).exists() + + with openvds.open(output_vds, "") as handle: + layout = openvds.getLayout(handle) + descriptor = layout.getChannelDescriptor(0) + assert descriptor.name == "Amplitude" + assert descriptor.unit == custom_attribute_unit + + +def test_invalid_attribute_name(poststack_segy, output_vds): + custom_attribute_name = "JulesVerne" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--attribute-name", custom_attribute_name]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result > 0, ex.output() + assert not Path(output_vds).exists() + + +def test_invalid_attribute_unit(poststack_segy, output_vds): + custom_attribute_unit = "furlong/fortnight" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--attribute-unit", custom_attribute_unit]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result > 0, ex.output() + assert not Path(output_vds).exists() diff --git a/tests/tools/SEGYImport/pytests/test_multi_file_scan.py b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py index fd297edb..e7894590 100644 --- a/tests/tools/SEGYImport/pytests/test_multi_file_scan.py +++ b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py @@ -1,12 +1,11 @@ import os -import subprocess from pathlib import Path from typing import List import pytest import openvds -from segyimport_test_config import test_data_dir, test_output_dir +from segyimport_test_config import test_data_dir, test_output_dir, ImportExecutor @pytest.fixture @@ -59,35 +58,6 @@ def multi_file_input_file_info_files(multi_file_input_parts_count, multi_file_in return filenames -class ImportExecutor: - def __init__(self): - import_exe = os.getenv("SEGYIMPORT_EXECUTABLE") - if not import_exe: - raise EnvironmentError("SEGYIMPORT_EXECUTABLE environment variable is not set") - self.args = [import_exe] - self.run_result = None - - def add_arg(self, more_arg: str): - self.args.append(more_arg) - - def add_args(self, more_args: List[str]): - self.args.extend(more_args) - - def run(self) -> int: - self.run_result = subprocess.run(self.args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - return self.run_result.returncode - - def output(self) -> str: - if self.run_result: - return str(self.run_result.stdout) - return "" - - def command_line(self) -> str: - """Convenience method to return a string showing the command and arguments""" - return " ".join(self.args) - pass - - def test_multi_file_scan_one_file_info(multi_file_input_glob): """ Tests --scan with multiple input SEGY files, but only one file info file specified. diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 8d6332ed..8d1a65af 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -1306,13 +1306,13 @@ struct OffsetChannelInfo }; std::vector -createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const& valueRange, const OffsetChannelInfo& offsetInfo) +createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const& valueRange, const OffsetChannelInfo& offsetInfo, const std::string& attributeName, const std::string& attributeUnit) { std::vector channelDescriptors; // Primary channel - channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, AMPLITUDE_ATTRIBUTE_NAME, "", valueRange.Min, valueRange.Max); + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, attributeName.c_str(), attributeUnit.c_str(), valueRange.Min, valueRange.Max); // Trace defined flag channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_U8, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Trace", "", 0.0f, 1.0f, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::DiscreteData); @@ -1507,14 +1507,16 @@ main(int argc, char* argv[]) bool help = false; bool helpConnection = false; bool version = false; + std::string attributeName = AMPLITUDE_ATTRIBUTE_NAME; + std::string attributeUnit; std::string supportedCompressionMethods = "None"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Wavelet)) supportedCompressionMethods += ", Wavelet"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::RLE)) supportedCompressionMethods += ", RLE"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Zip)) supportedCompressionMethods += ", Zip"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlock)) supportedCompressionMethods += ", WaveletNormalizeBlock"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletLossless)) supportedCompressionMethods += ", WaveletLossless"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless)) supportedCompressionMethods += ", WaveletNormalizeBlockLossless"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Wavelet)) supportedCompressionMethods += ", Wavelet"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::RLE)) supportedCompressionMethods += ", RLE"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Zip)) supportedCompressionMethods += ", Zip"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlock)) supportedCompressionMethods += ", WaveletNormalizeBlock"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletLossless)) supportedCompressionMethods += ", WaveletLossless"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless)) supportedCompressionMethods += ", WaveletNormalizeBlockLossless"; std::vector fileNames; @@ -1531,7 +1533,7 @@ main(int argc, char* argv[]) options.add_option("", "", "scan", "Generate a JSON file containing information about the input SEG-Y file.", cxxopts::value(scan), ""); options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about an input SEG-Y file.", cxxopts::value>(fileInfoFileNames), ""); options.add_option("", "b", "brick-size", "The brick size for the volume data store.", cxxopts::value(brickSize), ""); - options.add_option("", "", "margin", "The margin size (overlap) of the bricks.", cxxopts::value(margin), ""); + options.add_option("", "", "margin", "The margin size (overlap) of the bricks.", cxxopts::value(margin), ""); options.add_option("", "f", "force", "Continue on upload error.", cxxopts::value(force), ""); options.add_option("", "", "ignore-warnings", "Ignore warnings about import parameters.", cxxopts::value(ignoreWarnings), ""); options.add_option("", "", "compression-method", std::string("Compression method. Supported compression methods are: ") + supportedCompressionMethods + ".", cxxopts::value(compressionMethodString), ""); @@ -1545,6 +1547,8 @@ main(int argc, char* argv[]) options.add_option("", "", "uniqueID", "Generate a new globally unique ID when scanning the input SEG-Y file.", cxxopts::value(uniqueID), ""); options.add_option("", "", "disable-persistentID", "Disable the persistentID usage, placing the VDS directly into the url location.", cxxopts::value(disablePersistentID), ""); options.add_option("", "", "json-output", "Enable json output.", cxxopts::value(jsonOutput), ""); + options.add_option("", "", "attribute-name", "The name of the primary VDS channel. The name may be Amplitude (default), Attribute, Depth, Probability, Time, Vavg, Vint, or Vrms", cxxopts::value(attributeName)->default_value(AMPLITUDE_ATTRIBUTE_NAME), ""); + options.add_option("", "", "attribute-unit", "The units of the primary VDS channel. The unit name may be blank (default), ft, ft/s, Hz, m, m/s, ms, or s", cxxopts::value(attributeUnit), ""); // TODO add option for turning off traceOrderByOffset options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); @@ -1567,7 +1571,7 @@ main(int argc, char* argv[]) overrideBrickSize = result.count("brick-size") != 0; overrideMargin = result.count("margin") != 0; } - catch (cxxopts::OptionParseException &e) + catch (cxxopts::OptionParseException& e) { OpenVDS::printError(jsonOutput, "Args", e.what()); return EXIT_FAILURE; @@ -1601,33 +1605,33 @@ main(int argc, char* argv[]) std::transform(compressionMethodString.begin(), compressionMethodString.end(), compressionMethodString.begin(), asciitolower); - if(compressionMethodString.empty()) compressionMethod = OpenVDS::CompressionMethod::None; - else if(compressionMethodString == "none") compressionMethod = OpenVDS::CompressionMethod::None; - else if(compressionMethodString == "wavelet") compressionMethod = OpenVDS::CompressionMethod::Wavelet; - else if(compressionMethodString == "rle") compressionMethod = OpenVDS::CompressionMethod::RLE; - else if(compressionMethodString == "zip") compressionMethod = OpenVDS::CompressionMethod::Zip; - else if(compressionMethodString == "waveletnormalizeblock") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlock; - else if(compressionMethodString == "waveletlossless") compressionMethod = OpenVDS::CompressionMethod::WaveletLossless; - else if(compressionMethodString == "waveletnormalizeblocklossless") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless; + if (compressionMethodString.empty()) compressionMethod = OpenVDS::CompressionMethod::None; + else if (compressionMethodString == "none") compressionMethod = OpenVDS::CompressionMethod::None; + else if (compressionMethodString == "wavelet") compressionMethod = OpenVDS::CompressionMethod::Wavelet; + else if (compressionMethodString == "rle") compressionMethod = OpenVDS::CompressionMethod::RLE; + else if (compressionMethodString == "zip") compressionMethod = OpenVDS::CompressionMethod::Zip; + else if (compressionMethodString == "waveletnormalizeblock") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlock; + else if (compressionMethodString == "waveletlossless") compressionMethod = OpenVDS::CompressionMethod::WaveletLossless; + else if (compressionMethodString == "waveletnormalizeblocklossless") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless; else { OpenVDS::printError(jsonOutput, "CompressionMethod", "Unknown compression method", compressionMethodString); return EXIT_FAILURE; } - if(!OpenVDS::IsCompressionMethodSupported(compressionMethod)) + if (!OpenVDS::IsCompressionMethodSupported(compressionMethod)) { OpenVDS::printError(jsonOutput, "CompressionMethod", "Unsupported compression method", compressionMethodString); return EXIT_FAILURE; } - if(compressionMethod == OpenVDS::CompressionMethod::Wavelet || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlock || compressionMethod == OpenVDS::CompressionMethod::WaveletLossless || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless) + if (compressionMethod == OpenVDS::CompressionMethod::Wavelet || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlock || compressionMethod == OpenVDS::CompressionMethod::WaveletLossless || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless) { - if(!overrideBrickSize) + if (!overrideBrickSize) { brickSize = 128; } - if(!overrideMargin) + if (!overrideMargin) { margin = 4; } @@ -1639,9 +1643,9 @@ main(int argc, char* argv[]) SEGY::SEGYType segyType = SEGY::SEGYType::Poststack; - if (primaryKey == "inlinenumber" || primaryKey == "crosslinenumber" ) + if (primaryKey == "inlinenumber" || primaryKey == "crosslinenumber") { - if(prestack) + if (prestack) { segyType = SEGY::SEGYType::Prestack; } @@ -1684,13 +1688,13 @@ main(int argc, char* argv[]) if (singleConnection) inputConnection = urlConnection; - if(uniqueID && !persistentID.empty()) + if (uniqueID && !persistentID.empty()) { OpenVDS::printError(jsonOutput, "Args", "--uniqueID does not make sense when the persistentID is specified"); return EXIT_FAILURE; } - - if(disablePersistentID && !persistentID.empty()) + + if (disablePersistentID && !persistentID.empty()) { OpenVDS::printError(jsonOutput, "Args", "--disable-PersistentID does not make sense when the persistentID is specified"); return EXIT_FAILURE; @@ -1769,14 +1773,14 @@ main(int argc, char* argv[]) fileInfo.m_segyType = segyType; // Scan the file if '--scan' was passed or we're uploading but no fileInfo file was specified - if(scan || fileInfoFileNames.empty()) + if (scan || fileInfoFileNames.empty()) { - if(!uniqueID) + if (!uniqueID) { OpenVDS::HashCombiner hash; - for(std::string const &fileName : fileNames) + for (std::string const& fileName : fileNames) { hash.Add(fileName); } @@ -1792,13 +1796,13 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } - if(overrideSampleStart) + if (overrideSampleStart) { fileInfo.m_startTimeMilliseconds = sampleStart; } // If we are in scan mode we serialize the result of the file scan either to a fileInfo file (if specified) or to stdout and exit - if(scan) + if (scan) { for (int fileIndex = 0; fileIndex < fileNames.size(); ++fileIndex) { @@ -1918,7 +1922,7 @@ main(int argc, char* argv[]) // Check for only a single segment - if(fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1) + if (fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1) { OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SegmentInfoList", "Warning: There is only one segment, either this is (as of now unsupported) 2D data or this usually indicates using the wrong header format for the input dataset.", "Use --ignore-warnings to force the import to go ahead."); } @@ -1949,7 +1953,7 @@ main(int argc, char* argv[]) } else if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Poststack2D) { - if(fold > 1) + if (fold > 1) { OpenVDS::printError(jsonOutput, "SEGY", fmt::format("Detected a fold of '{0}', this usually indicates using the wrong header format or primary key for the input dataset or that the input data is binned prestack data (PSTM/PSDM gathers) in which case the --prestack option should be used.", fold)); return EXIT_FAILURE; @@ -1991,15 +1995,15 @@ main(int argc, char* argv[]) SEGY::SampleUnits sampleUnits; - if(sampleUnit.empty() || sampleUnit == "ms" || sampleUnit == "millisecond" || sampleUnit == "milliseconds") + if (sampleUnit.empty() || sampleUnit == "ms" || sampleUnit == "millisecond" || sampleUnit == "milliseconds") { sampleUnits = SEGY::SampleUnits::Milliseconds; } - else if(sampleUnit == "m" || sampleUnit == "meter" || sampleUnit == "meters") + else if (sampleUnit == "m" || sampleUnit == "meter" || sampleUnit == "meters") { sampleUnits = SEGY::SampleUnits::Meters; } - else if(sampleUnit == "ft" || sampleUnit == "foot" || sampleUnit == "feet") + else if (sampleUnit == "ft" || sampleUnit == "foot" || sampleUnit == "feet") { sampleUnits = SEGY::SampleUnits::Feet; } @@ -2009,6 +2013,28 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } + if (!(attributeName == DEFAULT_ATTRIBUTE_NAME || attributeName == AMPLITUDE_ATTRIBUTE_NAME || attributeName == DEPTH_ATTRIBUTE_NAME + || attributeName == PROBABILITY_ATTRIBUTE_NAME || attributeName == TIME_ATTRIBUTE_NAME || attributeName == AVERAGE_VELOCITY_ATTRIBUTE_NAME + || attributeName == INTERVAL_VELOCITY_ATTRIBUTE_NAME || attributeName == RMS_VELOCITY_ATTRIBUTE_NAME)) + { + const auto msg = fmt::format("Unknown attribute name: {}, legal names are '{}', '{}', '{}', '{}', '{}', '{}', '{}', or '{}'\n", + attributeName, DEFAULT_ATTRIBUTE_NAME, AMPLITUDE_ATTRIBUTE_NAME, DEPTH_ATTRIBUTE_NAME, PROBABILITY_ATTRIBUTE_NAME, TIME_ATTRIBUTE_NAME, AVERAGE_VELOCITY_ATTRIBUTE_NAME, + INTERVAL_VELOCITY_ATTRIBUTE_NAME, RMS_VELOCITY_ATTRIBUTE_NAME); + OpenVDS::printError(jsonOutput, "Args", msg); + return EXIT_FAILURE; + } + + if (!(attributeUnit.empty() || attributeUnit == KNOWNMETADATA_UNIT_FOOT || attributeUnit == KNOWNMETADATA_UNIT_FEET_PER_SECOND + || attributeUnit == "Hz" || attributeUnit == KNOWNMETADATA_UNIT_METER || attributeUnit == KNOWNMETADATA_UNIT_METERS_PER_SECOND + || attributeUnit == KNOWNMETADATA_UNIT_MILLISECOND || attributeUnit == KNOWNMETADATA_UNIT_SECOND)) + { + const auto msg = fmt::format("Unknown attribute unit: {}, legal units are blank (no units), '{}', '{}', '{}', '{}', '{}', '{}', or '{}'\n", + attributeUnit, KNOWNMETADATA_UNIT_FOOT, KNOWNMETADATA_UNIT_FEET_PER_SECOND, "Hz", KNOWNMETADATA_UNIT_METER, KNOWNMETADATA_UNIT_METERS_PER_SECOND, + KNOWNMETADATA_UNIT_MILLISECOND, KNOWNMETADATA_UNIT_SECOND); + OpenVDS::printError(jsonOutput, "Args", msg); + return EXIT_FAILURE; + } + // Create axis descriptors std::vector axisDescriptors = createAxisDescriptors(fileInfo, sampleUnits, fold, primaryStep, secondaryStep); @@ -2033,7 +2059,7 @@ main(int argc, char* argv[]) } // Create channel descriptors - std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, OffsetChannelInfo(fileInfo.HasGatherOffset(), offsetStart, offsetEnd, offsetStep)); + std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, OffsetChannelInfo(fileInfo.HasGatherOffset(), offsetStart, offsetEnd, offsetStep), attributeName, attributeUnit); // Create metadata OpenVDS::MetadataContainer -- GitLab From b1404cd7c4a1a1f26f3c25baf18e3e81e55c9071 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 3 Sep 2021 16:15:33 -0500 Subject: [PATCH 03/46] use TemporaryDirectory for output files in attribute name/unit tests --- .../SEGYImport/pytests/test_attribute_name.py | 42 ++++++++++--------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_attribute_name.py b/tests/tools/SEGYImport/pytests/test_attribute_name.py index b9c65377..e7e859c1 100644 --- a/tests/tools/SEGYImport/pytests/test_attribute_name.py +++ b/tests/tools/SEGYImport/pytests/test_attribute_name.py @@ -1,10 +1,17 @@ import os +import tempfile from pathlib import Path import pytest import openvds -from segyimport_test_config import test_data_dir, test_output_dir, ImportExecutor +from segyimport_test_config import test_data_dir, ImportExecutor + + +class TempVDSGuard: + def __init__(self, base_name="import_test"): + self.temp_dir = tempfile.TemporaryDirectory() + self.filename = os.path.join(self.temp_dir.name, base_name + ".vds") @pytest.fixture @@ -14,26 +21,23 @@ def poststack_segy() -> str: @pytest.fixture -def output_vds() -> str: - vds_filename = os.path.join(test_output_dir, "import_test.vds") - if Path(vds_filename).exists(): - os.remove(vds_filename) - return vds_filename +def output_vds() -> TempVDSGuard: + return TempVDSGuard() def test_default_attribute_and_unit(poststack_segy, output_vds): ex = ImportExecutor() - ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--vdsfile", output_vds.filename]) ex.add_arg(poststack_segy) result = ex.run() assert result == 0, ex.output() - assert Path(output_vds).exists() + assert Path(output_vds.filename).exists() - with openvds.open(output_vds, "") as handle: + with openvds.open(output_vds.filename, "") as handle: layout = openvds.getLayout(handle) descriptor = layout.getChannelDescriptor(0) assert descriptor.name == "Amplitude" @@ -45,7 +49,7 @@ def test_custom_attribute_name(poststack_segy, output_vds): ex = ImportExecutor() - ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--vdsfile", output_vds.filename]) ex.add_args(["--attribute-name", custom_attribute_name]) ex.add_arg(poststack_segy) @@ -53,9 +57,9 @@ def test_custom_attribute_name(poststack_segy, output_vds): result = ex.run() assert result == 0, ex.output() - assert Path(output_vds).exists() + assert Path(output_vds.filename).exists() - with openvds.open(output_vds, "") as handle: + with openvds.open(output_vds.filename, "") as handle: layout = openvds.getLayout(handle) descriptor = layout.getChannelDescriptor(0) assert descriptor.name == custom_attribute_name @@ -67,7 +71,7 @@ def test_custom_attribute_unit(poststack_segy, output_vds): ex = ImportExecutor() - ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--vdsfile", output_vds.filename]) ex.add_args(["--attribute-unit", custom_attribute_unit]) ex.add_arg(poststack_segy) @@ -75,9 +79,9 @@ def test_custom_attribute_unit(poststack_segy, output_vds): result = ex.run() assert result == 0, ex.output() - assert Path(output_vds).exists() + assert Path(output_vds.filename).exists() - with openvds.open(output_vds, "") as handle: + with openvds.open(output_vds.filename, "") as handle: layout = openvds.getLayout(handle) descriptor = layout.getChannelDescriptor(0) assert descriptor.name == "Amplitude" @@ -89,7 +93,7 @@ def test_invalid_attribute_name(poststack_segy, output_vds): ex = ImportExecutor() - ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--vdsfile", output_vds.filename]) ex.add_args(["--attribute-name", custom_attribute_name]) ex.add_arg(poststack_segy) @@ -97,7 +101,7 @@ def test_invalid_attribute_name(poststack_segy, output_vds): result = ex.run() assert result > 0, ex.output() - assert not Path(output_vds).exists() + assert not Path(output_vds.filename).exists() def test_invalid_attribute_unit(poststack_segy, output_vds): @@ -105,7 +109,7 @@ def test_invalid_attribute_unit(poststack_segy, output_vds): ex = ImportExecutor() - ex.add_args(["--vdsfile", output_vds]) + ex.add_args(["--vdsfile", output_vds.filename]) ex.add_args(["--attribute-unit", custom_attribute_unit]) ex.add_arg(poststack_segy) @@ -113,4 +117,4 @@ def test_invalid_attribute_unit(poststack_segy, output_vds): result = ex.run() assert result > 0, ex.output() - assert not Path(output_vds).exists() + assert not Path(output_vds.filename).exists() -- GitLab From 4f59ec35cbef8c616d33c80f9325d6a689efe708 Mon Sep 17 00:00:00 2001 From: Jim King Date: Mon, 13 Sep 2021 10:03:54 -0500 Subject: [PATCH 04/46] use TemporaryDirectory for output files in other tests --- .../pytests/segyimport_test_config.py | 30 ++++++++++++++-- .../SEGYImport/pytests/test_attribute_name.py | 9 +---- .../pytests/test_multi_file_scan.py | 34 +++++++++---------- 3 files changed, 45 insertions(+), 28 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/segyimport_test_config.py b/tests/tools/SEGYImport/pytests/segyimport_test_config.py index b69135f5..1a69a068 100644 --- a/tests/tools/SEGYImport/pytests/segyimport_test_config.py +++ b/tests/tools/SEGYImport/pytests/segyimport_test_config.py @@ -3,11 +3,14 @@ Global configuration information for SEGYImport pytests """ import os import subprocess -from typing import List +import tempfile +import weakref +from typing import List, Union, Any -test_output_dir = "c:\\temp\\SEGY\\t" test_data_dir = "c:\\temp\\SEGY\\RegressionTestData" +_temp_dir = None + class ImportExecutor: def __init__(self): @@ -35,4 +38,25 @@ class ImportExecutor: def command_line(self) -> str: """Convenience method to return a string showing the command and arguments""" return " ".join(self.args) - pass \ No newline at end of file + pass + + +class TempFileGuard: + def __init__(self, base_name: str, extension: str): + global _temp_dir + if not _temp_dir or not _temp_dir(): + self.temp_dir = tempfile.TemporaryDirectory() + _temp_dir = weakref.ref(self.temp_dir) + else: + self.temp_dir = _temp_dir() + self.filename = os.path.join(self.temp_dir.name, base_name + extension) + + +class TempVDSGuard(TempFileGuard): + def __init__(self, base_name="import_test"): + super().__init__(base_name, ".vds") + + +class TempScanFileGuard(TempFileGuard): + def __init__(self, base_name="scan_test"): + super().__init__(base_name, ".scan.json") diff --git a/tests/tools/SEGYImport/pytests/test_attribute_name.py b/tests/tools/SEGYImport/pytests/test_attribute_name.py index e7e859c1..c79f3611 100644 --- a/tests/tools/SEGYImport/pytests/test_attribute_name.py +++ b/tests/tools/SEGYImport/pytests/test_attribute_name.py @@ -1,17 +1,10 @@ import os -import tempfile from pathlib import Path import pytest import openvds -from segyimport_test_config import test_data_dir, ImportExecutor - - -class TempVDSGuard: - def __init__(self, base_name="import_test"): - self.temp_dir = tempfile.TemporaryDirectory() - self.filename = os.path.join(self.temp_dir.name, base_name + ".vds") +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard @pytest.fixture diff --git a/tests/tools/SEGYImport/pytests/test_multi_file_scan.py b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py index e7894590..4b8126f9 100644 --- a/tests/tools/SEGYImport/pytests/test_multi_file_scan.py +++ b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py @@ -5,7 +5,7 @@ from typing import List import pytest import openvds -from segyimport_test_config import test_data_dir, test_output_dir, ImportExecutor +from segyimport_test_config import test_data_dir, ImportExecutor, TempScanFileGuard, TempVDSGuard @pytest.fixture @@ -39,13 +39,13 @@ def multi_file_input_files(multi_file_input_parts_count, multi_file_input_base_n @pytest.fixture -def multi_file_scan_file_info_files(multi_file_input_parts_count, multi_file_input_base_name) -> List[str]: +def multi_file_scan_file_info_files(multi_file_input_parts_count, multi_file_input_base_name) -> List[TempScanFileGuard]: """File info filenames to be output when using --scan""" - filenames = [] + guards = [] for i in range(1, multi_file_input_parts_count + 1): - file_info_name = f"{multi_file_input_base_name}{i:02}.segy.scan.json" - filenames.append(os.path.join(test_output_dir, file_info_name)) - return filenames + file_info_base_name = f"{multi_file_input_base_name}{i:02}.segy" + guards.append(TempScanFileGuard(file_info_base_name)) + return guards @pytest.fixture @@ -62,10 +62,10 @@ def test_multi_file_scan_one_file_info(multi_file_input_glob): """ Tests --scan with multiple input SEGY files, but only one file info file specified. """ - file_info_filename = os.path.join(test_output_dir, "test.scan.json") + file_info_guard = TempScanFileGuard("multi_single_test") ex = ImportExecutor() - ex.add_args(["--prestack", "--scan", "--file-info", file_info_filename, multi_file_input_glob]) + ex.add_args(["--prestack", "--scan", "--file-info", file_info_guard.filename, multi_file_input_glob]) result = ex.run() # import should have failed @@ -79,12 +79,12 @@ def test_multi_file_scan(multi_file_input_files, multi_file_scan_file_info_files ex = ImportExecutor() ex.add_args(["--prestack", "--scan"]) - for filename in multi_file_scan_file_info_files: - ex.add_args(["--file-info", filename]) + for scan_file_guard in multi_file_scan_file_info_files: + ex.add_args(["--file-info", scan_file_guard.filename]) # ensure the output file doesn't exist - if Path(filename).exists(): - os.remove(filename) + if Path(scan_file_guard.filename).exists(): + os.remove(scan_file_guard.filename) ex.add_args(multi_file_input_files) @@ -95,15 +95,15 @@ def test_multi_file_scan(multi_file_input_files, multi_file_scan_file_info_files # output files should exist for filename in multi_file_scan_file_info_files: - assert Path(filename).exists() + assert Path(scan_file_guard.filename).exists() def test_multi_file_import_with_file_infos(multi_file_input_files, multi_file_input_file_info_files): ex = ImportExecutor() ex.add_arg("--prestack") - vds_filename = os.path.join(test_output_dir, "import_test.vds") - ex.add_args(["--vdsfile", vds_filename]) + vds_guard = TempVDSGuard("import_test") + ex.add_args(["--vdsfile", vds_guard.filename]) for filename in multi_file_input_file_info_files: ex.add_args(["--file-info", filename]) @@ -116,10 +116,10 @@ def test_multi_file_import_with_file_infos(multi_file_input_files, multi_file_in assert result == 0, ex.output() # output file should exist - assert Path(vds_filename).exists() + assert Path(vds_guard.filename).exists() # check dimensions of VDS - with openvds.open(vds_filename, "") as handle: + with openvds.open(vds_guard.filename, "") as handle: layout = openvds.getLayout(handle) assert layout.dimensionality == 4 assert layout.numSamples[0] == 851 -- GitLab From 5547b8b3ed7354305d46f7c8af91780dc995c247 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 14 Sep 2021 18:25:38 -0500 Subject: [PATCH 05/46] add some 2D tests --- tests/tools/SEGYImport/pytests/test_2d.py | 202 ++++++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 tests/tools/SEGYImport/pytests/test_2d.py diff --git a/tests/tools/SEGYImport/pytests/test_2d.py b/tests/tools/SEGYImport/pytests/test_2d.py new file mode 100644 index 00000000..ddfade33 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_2d.py @@ -0,0 +1,202 @@ +import os +from pathlib import Path + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import2d_test") + + +@pytest.fixture +def poststack_2d_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "PICEANCE-2D", + "A_raw_migr_stack_FF04_03.segy") + + +@pytest.fixture +def prestack_2d_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "PICEANCE-2D", + "PostMigration_CDP_NMO_FF04_03_031314.sgy") + + +def test_2d_poststack_volume_info(poststack_2d_segy, output_vds): + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(poststack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + assert layout.getDimensionality() == 2 + assert layout.getChannelCount() == 3 + assert layout.getChannelName(0) == "Amplitude" + assert layout.getChannelIndex("Trace") > 0 + assert layout.getChannelIndex("SEGYTraceHeader") > 0 + + descriptor = layout.getAxisDescriptor(0) + assert descriptor.name == "Sample" + assert descriptor.unit == "ms" + assert descriptor.coordinateMin == 0.0 + assert descriptor.coordinateMax == 5000.0 + assert descriptor.coordinateStep == 2.0 + + descriptor = layout.getAxisDescriptor(1) + assert descriptor.name == "CDP" + assert descriptor.unit == "unitless" + assert descriptor.coordinateMin == 1.0 + assert descriptor.coordinateMax == 1977.0 + assert descriptor.coordinateStep == 1.0 + + assert layout.getChannelValueRangeMin(0) == -302681.94 + assert layout.getChannelValueRangeMax(0) == 303745.38 + + # check lattice metadata is NOT there + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + # check 2D trace info metadata is present + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.tracePositions().category, + openvds.KnownMetadata.tracePositions().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.traceVerticalOffsets().category, + openvds.KnownMetadata.traceVerticalOffsets().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.energySourcePointNumbers().category, + openvds.KnownMetadata.energySourcePointNumbers().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.ensembleNumbers().category, + openvds.KnownMetadata.ensembleNumbers().name) + + +def test_2d_poststack_read(poststack_2d_segy, output_vds): + # run importer + # open VDS + # read VDS (entire thing at once) + # check that each trace is non-zero + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(poststack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + +def test_2d_prestack_volume_info(prestack_2d_segy, output_vds): + # run importer + # open VDS + # check dimensionality (3), channel count (4), channel names + # check 3 axis descriptors + # check sample value range + # check lattice metadata is NOT there + # check 2D trace info metadata + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(prestack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + +def test_2d_prestack_read(prestack_2d_segy, output_vds): + # run importer + # open VDS + # read VDS (entire thing at once) + # check that each trace is non-zero + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(prestack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + assert layout.getDimensionality() == 3 + assert layout.getChannelCount() == 4 + assert layout.getChannelName(0) == "Amplitude" + assert layout.getChannelIndex("Offset") > 0 + assert layout.getChannelIndex("Trace") > 0 + assert layout.getChannelIndex("SEGYTraceHeader") > 0 + + descriptor = layout.getAxisDescriptor(0) + assert descriptor.name == "Sample" + assert descriptor.unit == "ms" + assert descriptor.coordinateMin == 0.0 + assert descriptor.coordinateMax == 5000.0 + assert descriptor.coordinateStep == 2.0 + + descriptor = layout.getAxisDescriptor(1) + assert descriptor.name == "Trace (offset)" + assert descriptor.unit == "unitless" + assert descriptor.coordinateMin == 1.0 + assert descriptor.coordinateMax == 64.0 + assert descriptor.coordinateStep == 1.0 + + descriptor = layout.getAxisDescriptor(2) + assert descriptor.name == "CDP" + assert descriptor.unit == "unitless" + assert descriptor.coordinateMin == 1.0 + assert descriptor.coordinateMax == 1977.0 + assert descriptor.coordinateStep == 1.0 + + assert layout.getChannelValueRangeMin(0) == -420799.12 + assert layout.getChannelValueRangeMax(0) == 421478.38 + + # check lattice metadata is NOT there + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + # check 2D trace info metadata is present + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.tracePositions().category, + openvds.KnownMetadata.tracePositions().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.traceVerticalOffsets().category, + openvds.KnownMetadata.traceVerticalOffsets().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.energySourcePointNumbers().category, + openvds.KnownMetadata.energySourcePointNumbers().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.ensembleNumbers().category, + openvds.KnownMetadata.ensembleNumbers().name) -- GitLab From 5a3db32f08a042fe2ec31cd9c643a5ae5d3a3e5a Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 22 Sep 2021 18:34:07 -0500 Subject: [PATCH 06/46] poststack passes tests, prestack offset and trace flag isn't correct --- src/SEGYUtils/SEGYFileInfo.cpp | 8 +- src/SEGYUtils/SEGYUtils/SEGYFileInfo.h | 2 + tests/tools/SEGYImport/pytests/test_2d.py | 109 ++++-- tools/SEGYImport/CMakeLists.txt | 2 + tools/SEGYImport/SEGYImport.cpp | 409 +++++++++++++++++----- tools/SEGYImport/TraceInfo2DManager.cpp | 125 +++++++ tools/SEGYImport/TraceInfo2DManager.h | 100 ++++++ 7 files changed, 647 insertions(+), 108 deletions(-) create mode 100644 tools/SEGYImport/TraceInfo2DManager.cpp create mode 100644 tools/SEGYImport/TraceInfo2DManager.h diff --git a/src/SEGYUtils/SEGYFileInfo.cpp b/src/SEGYUtils/SEGYFileInfo.cpp index 6edcb364..944413e3 100644 --- a/src/SEGYUtils/SEGYFileInfo.cpp +++ b/src/SEGYUtils/SEGYFileInfo.cpp @@ -45,7 +45,13 @@ bool SEGYFileInfo::HasGatherOffset() const { // TODO unbinned gathers may be angle gathers? - return m_segyType == SEGY::SEGYType::Prestack || IsUnbinned(); + return m_segyType == SEGYType::Prestack || m_segyType == SEGYType::Prestack2D || IsUnbinned(); +} + +bool +SEGYFileInfo::Is2D() const +{ + return m_segyType == SEGYType::Poststack2D || m_segyType == SEGYType::Prestack2D; } SEGYBinInfo diff --git a/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h b/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h index 333715e6..4af81353 100644 --- a/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h +++ b/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h @@ -117,6 +117,8 @@ struct SEGYFileInfo OPENVDS_EXPORT bool IsUnbinned() const; OPENVDS_EXPORT bool HasGatherOffset() const; + + OPENVDS_EXPORT bool Is2D() const; }; #endif // SEGY_FILE_INFO_H diff --git a/tests/tools/SEGYImport/pytests/test_2d.py b/tests/tools/SEGYImport/pytests/test_2d.py index ddfade33..5200f194 100644 --- a/tests/tools/SEGYImport/pytests/test_2d.py +++ b/tests/tools/SEGYImport/pytests/test_2d.py @@ -60,8 +60,8 @@ def test_2d_poststack_volume_info(poststack_2d_segy, output_vds): assert descriptor.coordinateMax == 1977.0 assert descriptor.coordinateStep == 1.0 - assert layout.getChannelValueRangeMin(0) == -302681.94 - assert layout.getChannelValueRangeMax(0) == 303745.38 + assert layout.getChannelValueRangeMin(0) == -302681.9375 + assert layout.getChannelValueRangeMax(0) == 303745.375 # check lattice metadata is NOT there assert not layout.isMetadataDoubleVector2Available( @@ -103,6 +103,21 @@ def test_2d_poststack_read(poststack_2d_segy, output_vds): assert result == 0, ex.output() assert Path(output_vds.filename).exists() + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + dim0_size = layout.getDimensionNumSamples(0) + dim1_size = layout.getDimensionNumSamples(1) + request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), (dim0_size, dim1_size, 1, 1, 1, 1), + dimensionsND=openvds.DimensionsND.Dimensions_01, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32) + data = request.data.reshape(dim1_size, dim0_size) + for dim1 in range(dim1_size): + total = 0 + for dim0 in range(dim0_size): + total += abs(data[dim1, dim0]) + assert total > 0.0, f"trace at {dim1}" + def test_2d_prestack_volume_info(prestack_2d_segy, output_vds): # run importer @@ -126,26 +141,6 @@ def test_2d_prestack_volume_info(prestack_2d_segy, output_vds): assert result == 0, ex.output() assert Path(output_vds.filename).exists() - -def test_2d_prestack_read(prestack_2d_segy, output_vds): - # run importer - # open VDS - # read VDS (entire thing at once) - # check that each trace is non-zero - - ex = ImportExecutor() - - ex.add_arg("--2d") - ex.add_arg("--prestack") - ex.add_args(["--vdsfile", output_vds.filename]) - - ex.add_arg(prestack_2d_segy) - - result = ex.run() - - assert result == 0, ex.output() - assert Path(output_vds.filename).exists() - with openvds.open(output_vds.filename, "") as handle: layout = openvds.getLayout(handle) @@ -177,8 +172,8 @@ def test_2d_prestack_read(prestack_2d_segy, output_vds): assert descriptor.coordinateMax == 1977.0 assert descriptor.coordinateStep == 1.0 - assert layout.getChannelValueRangeMin(0) == -420799.12 - assert layout.getChannelValueRangeMax(0) == 421478.38 + assert layout.getChannelValueRangeMin(0) == -420799.125 + assert layout.getChannelValueRangeMax(0) == 421478.375 # check lattice metadata is NOT there assert not layout.isMetadataDoubleVector2Available( @@ -200,3 +195,69 @@ def test_2d_prestack_read(prestack_2d_segy, output_vds): openvds.KnownMetadata.energySourcePointNumbers().name) assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.ensembleNumbers().category, openvds.KnownMetadata.ensembleNumbers().name) + + +def test_2d_prestack_read(prestack_2d_segy, output_vds): + # run importer + # open VDS + # read VDS (entire thing at once) + # check that each trace is non-zero + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(prestack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + dim0_size = layout.getDimensionNumSamples(0) + dim1_size = layout.getDimensionNumSamples(1) + dim2_size = layout.getDimensionNumSamples(2) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + offset_channel = layout.getChannelIndex("Offset") + assert offset_channel > 0 + + request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (dim0_size, dim1_size, dim2_size, 1, 1, 1), + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + offset_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (1, dim1_size, dim2_size, 1, 1, 1), + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + trace_flag_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (1, dim1_size, dim2_size, 1, 1, 1), + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + data = request.data.reshape(dim2_size, dim1_size, dim0_size) + offset_data = offset_request.data.reshape(dim2_size, dim1_size) + trace_flag_data = trace_flag_request.data.reshape(dim2_size, dim1_size) + + for dim2 in range(dim2_size): + for dim1 in range(dim1_size): + trace_flag = trace_flag_data[dim2, dim1] + trace_offset = offset_data[dim2, dim1] + ensemble_number = layout.getAxisDescriptor(2).sampleIndexToCoordinate(dim2) + + total = 0 + for dim0 in range(dim0_size): + total += abs(data[dim1, dim0]) + + message = f"trace at dim1 {dim1} dim2 {dim2} offset {trace_offset} CDP {ensemble_number}" + + if trace_flag == 0: + assert total == 0.0, f"dead {message}" + else: + assert total > 0.0, message diff --git a/tools/SEGYImport/CMakeLists.txt b/tools/SEGYImport/CMakeLists.txt index b09f076d..71496143 100644 --- a/tools/SEGYImport/CMakeLists.txt +++ b/tools/SEGYImport/CMakeLists.txt @@ -1,5 +1,7 @@ add_executable(SEGYImport SEGYImport.cpp + TraceInfo2DManager.cpp + TraceInfo2DManager.h ) target_link_libraries(SEGYImport PRIVATE Threads::Threads openvds segyutils jsoncpp_lib_static fmt::fmt) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 8d1a65af..057849bb 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -21,6 +21,7 @@ #include "VDS/Hash.h" #include #include +#include "TraceInfo2DManager.h" #include #include @@ -59,6 +60,8 @@ #include #include +constexpr double METERS_PER_FOOT = 0.3048; + int64_t GetTotalSystemMemory() { MEMORYSTATUSEX status; @@ -701,9 +704,12 @@ copySamples(const void* data, SEGY::BinaryHeader::DataSampleFormatCode dataSampl } bool -analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, bool jsonOutput, OpenVDS::Error& error) +analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, TraceInfo2DManager * pTraceInfo2DManager, bool jsonOutput, OpenVDS::Error& error) { assert(segmentInfo.m_traceStop >= segmentInfo.m_traceStart && "A valid segment info should always have a stop trace greater or equal to the start trace"); + assert(pTraceInfo2DManager != nullptr); + + pTraceInfo2DManager->Clear(); bool success = true; @@ -747,7 +753,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg traceBufferStart = trace; traceBufferSize = (segmentInfo.m_traceStop - trace + 1) < 1000 ? int(segmentInfo.m_traceStop - trace + 1) : 1000; - buffer.reset(new char[traceByteSize * traceBufferSize]); + buffer.reset(new char[static_cast(traceByteSize) * traceBufferSize]); int64_t offset = SEGY::TextualFileHeaderSize + SEGY::BinaryFileHeaderSize + traceByteSize * traceBufferStart; success = dataProvider.Read(buffer.get(), offset, traceByteSize * traceBufferSize, error); @@ -760,7 +766,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg const void *header = buffer.get() + traceByteSize * (trace - traceBufferStart); const void *data = buffer.get() + traceByteSize * (trace - traceBufferStart) + SEGY::TraceHeaderSize; - int tracePrimaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness); + int tracePrimaryKey = fileInfo.Is2D() ? 0 : SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness); int traceSecondaryKey = fileInfo.IsUnbinned() ? static_cast(trace - segmentInfo.m_traceStart + 1) : SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); if(tracePrimaryKey != segmentInfo.m_primaryKey) @@ -769,6 +775,8 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg continue; } + pTraceInfo2DManager->AddTraceInfo(static_cast(header)); + if(gatherFold > 0 && traceSecondaryKey == gatherSecondaryKey) { gatherFold++; @@ -882,6 +890,38 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg return success; } +double +ConvertDistance(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, double distance) +{ + // convert SEGY distance to meters + if (segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + { + return distance * METERS_PER_FOOT; + } + + // if measurement system field in header is Meters or Unknown then return the value unchanged + return distance; +} + +float +ConvertDistance(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, float distance) +{ + return static_cast(ConvertDistance(segyMeasurementSystem, static_cast(distance))); +} + +double +ConvertDistanceInverse(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, double distance) +{ + // convert meters to SEGY distance + if (segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + { + return distance / METERS_PER_FOOT; + } + + // if measurement system field in header is Meters or Unknown then return the value unchanged + return distance; +} + bool createSEGYMetadata(DataProvider &dataProvider, SEGYFileInfo const &fileInfo, OpenVDS::MetadataContainer& metadataContainer, SEGY::BinaryHeader::MeasurementSystem &measurementSystem, OpenVDS::Error& error) { @@ -907,10 +947,12 @@ createSEGYMetadata(DataProvider &dataProvider, SEGYFileInfo const &fileInfo, Ope } void -createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem measurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer) +createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer) { if (fileInfo.m_segmentInfoLists.empty()) return; + if (fileInfo.Is2D()) return; + double inlineSpacing[2] = { 0, 0 }; double crosslineSpacing[2] = { 0, 0 }; @@ -991,19 +1033,19 @@ createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryH origin[1] -= crosslineSpacing[1] * firstSegmentInfo.m_binInfoStart.m_crosslineNumber; // Set coordinate system - metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_ORIGIN, OpenVDS::DoubleVector2(origin[0], origin[1])); - metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_INLINE_SPACING, OpenVDS::DoubleVector2(inlineSpacing[0], inlineSpacing[1])); - metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_CROSSLINE_SPACING, OpenVDS::DoubleVector2(crosslineSpacing[0], crosslineSpacing[1])); + metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_ORIGIN, OpenVDS::DoubleVector2(ConvertDistance(segyMeasurementSystem, origin[0]), ConvertDistance(segyMeasurementSystem, origin[1]))); + metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_INLINE_SPACING, OpenVDS::DoubleVector2(ConvertDistance(segyMeasurementSystem, inlineSpacing[0]), ConvertDistance(segyMeasurementSystem, inlineSpacing[1]))); + metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_CROSSLINE_SPACING, OpenVDS::DoubleVector2(ConvertDistance(segyMeasurementSystem, crosslineSpacing[0]), ConvertDistance(segyMeasurementSystem, crosslineSpacing[1]))); if(!crsWkt.empty()) { metadataContainer.SetMetadataString(LATTICE_CATEGORY, CRS_WKT, crsWkt); } - if(measurementSystem == SEGY::BinaryHeader::MeasurementSystem::Meters) + if(segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Meters) { metadataContainer.SetMetadataString(LATTICE_CATEGORY, LATTICE_UNIT, KNOWNMETADATA_UNIT_METER); } - else if(measurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + else if(segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) { metadataContainer.SetMetadataString(LATTICE_CATEGORY, LATTICE_UNIT, KNOWNMETADATA_UNIT_FOOT); } @@ -1078,6 +1120,54 @@ createImportInformationMetadata(const std::vector &dataProviders, return true; } +bool +create2DTraceInformationMetadata(const bool is2D, TraceInfo2DManager * pTraceInfo2DManager, OpenVDS::MetadataContainer& metadataContainer, OpenVDS::Error& error) +{ + if (!is2D || pTraceInfo2DManager == nullptr || pTraceInfo2DManager->Count() == 0) + { + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEPOSITIONS); + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEVERTICALOFFSETS); + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENERGYSOURCEPOINTNUMBERS); + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENSEMBLENUMBERS); + return true; + } + + // create KNOWNMETADATA_TRACECOORDINATES metadata items: KNOWNMETADATA_TRACEPOSITIONS, KNOWNMETADATA_TRACEVERTICALOFFSETS, KNOWNMETADATA_ENERGYSOURCEPOINTNUMBERS, KNOWNMETADATA_ENSEMBLENUMBERS + + const auto + nItems = pTraceInfo2DManager->Count(); + + std::vector + tracePositions, + verticalOffsets; + std::vector + espNumbers, + ensembleNumbers; + + tracePositions.reserve(nItems * 2); + verticalOffsets.reserve(nItems); + espNumbers.reserve(nItems); + ensembleNumbers.reserve(nItems); + + for (int i = 0; i < nItems; ++i) + { + const auto + & item = pTraceInfo2DManager->Get(i); + tracePositions.push_back(item.x); + tracePositions.push_back(item.y); + verticalOffsets.push_back(item.startTime); + espNumbers.push_back(item.energySourcePointNumber); + ensembleNumbers.push_back(item.ensembleNumber); + } + + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEPOSITIONS, tracePositions); + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEVERTICALOFFSETS, verticalOffsets); + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENERGYSOURCEPOINTNUMBERS, espNumbers); + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENSEMBLENUMBERS, ensembleNumbers); + + return true; +} + bool parseSEGYFileInfoFile(const DataProvider& dataProvider, SEGYFileInfo& fileInfo, const size_t fileIndex, const bool isValidateHeader, OpenVDS::Error& error) { @@ -1208,7 +1298,24 @@ createAxisDescriptors(SEGYFileInfo const& fileInfo, SEGY::SampleUnits sampleUnit axisDescriptors.emplace_back(fileInfo.m_sampleCount, KNOWNMETADATA_SURVEYCOORDINATE_INLINECROSSLINE_AXISNAME_SAMPLE, sampleUnit, (float)fileInfo.m_startTimeMilliseconds, (float)fileInfo.m_startTimeMilliseconds + (fileInfo.m_sampleCount - 1) * (float)fileInfo.m_sampleIntervalMilliseconds); - if (fileInfo.IsUnbinned()) + if (fileInfo.Is2D()) + { + assert(fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1); + + if (fold > 1) + { + axisDescriptors.emplace_back(fold, VDS_DIMENSION_TRACE_NAME(VDS_DIMENSION_TRACE_SORT_OFFSET), KNOWNMETADATA_UNIT_UNITLESS, 1.0f, static_cast(fold)); + } + + const auto + & segmentInfo = fileInfo.m_segmentInfoLists[0][0]; + + // the secondary key axis will be 1..N + const auto + secondaryKeyCount = segmentInfo.m_binInfoStop.m_crosslineNumber - segmentInfo.m_binInfoStart.m_crosslineNumber + 1; + axisDescriptors.emplace_back(secondaryKeyCount, VDS_DIMENSION_CDP_NAME, KNOWNMETADATA_UNIT_UNITLESS, 1.0f, static_cast(secondaryKeyCount)); + } + else if (fileInfo.IsUnbinned()) { int maxTraceNumber = 0; @@ -1297,12 +1404,12 @@ createAxisDescriptors(SEGYFileInfo const& fileInfo, SEGY::SampleUnits sampleUnit struct OffsetChannelInfo { - int offsetStart; - int offsetEnd; - int offsetStep; + float offsetStart; + float offsetEnd; + float offsetStep; bool hasOffset; - OffsetChannelInfo(bool has, int start, int end, int step) : offsetStart(start), offsetEnd(end), offsetStep(step), hasOffset(has) {} + OffsetChannelInfo(bool has, float start, float end, float step) : offsetStart(start), offsetEnd(end), offsetStep(step), hasOffset(has) {} }; std::vector @@ -1323,7 +1430,7 @@ createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const if (offsetInfo.hasOffset) { // offset channel - channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Offset", KNOWNMETADATA_UNIT_METER, static_cast(offsetInfo.offsetStart), static_cast(offsetInfo.offsetEnd),OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Offset", KNOWNMETADATA_UNIT_METER, offsetInfo.offsetStart, offsetInfo.offsetEnd, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); // TODO channels for other gather types - "Angle", "Vrms", "Frequency" } @@ -1444,9 +1551,63 @@ findFirstTrace(TraceDataManager& traceDataManager, const SEGYSegmentInfo& segmen } int -SecondaryKeyDimension(const SEGYFileInfo& fileInfo) +VoxelIndexToDataIndex(const SEGYFileInfo& fileInfo, const int primaryIndex, const int secondaryIndex, const int tertiaryIndex, const int chunkMin[6], const int pitch[6]) { + // The given voxel position consists of the indices for sample, trace (offset), secondary key, primary key. Which of those indices is actually used will depend on the SEGY type. + + // In all cases we assume the sample index is 0, and we won't use it when computing the data index + + if (fileInfo.Is2D()) + { + if (fileInfo.HasGatherOffset()) + { + // use indices for trace, secondary key + return (secondaryIndex - chunkMin[2]) * pitch[2] + (tertiaryIndex - chunkMin[1]) * pitch[1]; + } + // else use index for secondary key + return (secondaryIndex - chunkMin[1]) * pitch[1]; + } + if (fileInfo.Is4D()) + { + // use all the indices + return (primaryIndex - chunkMin[3]) * pitch[3] + (secondaryIndex - chunkMin[2]) * pitch[2] + (tertiaryIndex - chunkMin[1]) * pitch[1]; + } + + // else it's 3D poststack; use the indices for secondary key, primary key + return (primaryIndex - chunkMin[2]) * pitch[2] + (secondaryIndex - chunkMin[1]) * pitch[1]; +} + +int +TraceIndex2DToEnsembleNumber(TraceInfo2DManager * pTraceInfo2DManager, int traceIndex, OpenVDS::Error & error) +{ + assert(pTraceInfo2DManager != nullptr); + + error = {}; + + if (pTraceInfo2DManager == nullptr) + { + error.code = 1; + error.string = "TraceIndex2DToEnsembleNumber: 2D trace information is missing"; + return 0; + } + + assert(traceIndex >= 0); + assert(traceIndex < pTraceInfo2DManager->Count()); + if (traceIndex < 0 || traceIndex >= pTraceInfo2DManager->Count()) + { + error.code = 1; + error.string = "TraceIndex2DToEnsembleNumber: Requested trace index is missing from 2D trace info"; + return 0; + } + + return pTraceInfo2DManager->Get(traceIndex).ensembleNumber; +} + +int +SecondaryKeyDimension(const SEGYFileInfo& fileInfo) +{ + if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D) { return 2; } @@ -1456,12 +1617,14 @@ SecondaryKeyDimension(const SEGYFileInfo& fileInfo) int PrimaryKeyDimension(const SEGYFileInfo& fileInfo) { - if (fileInfo.Is4D()) + if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D) { + // Prestack2D doesn't really have a primary key dimension, but we'll use the same one as for Prestack as sort of a placeholder. return 3; } return 2; } + int main(int argc, char* argv[]) { @@ -1476,8 +1639,8 @@ main(int argc, char* argv[]) std::string headerFormatFileName; std::vector headerFields; - std::string primaryKey = "InlineNumber"; - std::string secondaryKey = "CrosslineNumber"; + std::string primaryKey; + std::string secondaryKey; std::string sampleUnit; std::string crsWkt; double scale = 0; @@ -1502,6 +1665,7 @@ main(int argc, char* argv[]) bool uniqueID = false; bool disablePersistentID = false; bool prestack = false; + bool is2D = false; bool traceOrderByOffset = true; bool jsonOutput = false; bool help = false; @@ -1510,6 +1674,11 @@ main(int argc, char* argv[]) std::string attributeName = AMPLITUDE_ATTRIBUTE_NAME; std::string attributeUnit; + // default key names used if not supplied by user + std::string defaultPrimaryKey = "InlineNumber"; + std::string defaultSecondaryKey = "CrosslineNumber"; + + std::string supportedCompressionMethods = "None"; if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Wavelet)) supportedCompressionMethods += ", Wavelet"; if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::RLE)) supportedCompressionMethods += ", RLE"; @@ -1522,8 +1691,8 @@ main(int argc, char* argv[]) options.add_option("", "", "header-format", "A JSON file defining the header format for the input SEG-Y file. The expected format is a dictonary of strings (field names) to pairs (byte position, field width) where field width can be \"TwoByte\" or \"FourByte\". Additionally, an \"Endianness\" key can be specified as \"BigEndian\" or \"LittleEndian\".", cxxopts::value(headerFormatFileName), ""); options.add_option("", "", "header-field", "A single definition of a header field. The expected format is a \"fieldname=offset:width\" where the \":width\" is optional. Its also possible to specify range: \"fieldname=begin-end\". Multiple header-fields is specified by providing multiple --header-field arguments.", cxxopts::value>(headerFields), "header_name=offset:width"); - options.add_option("", "p", "primary-key", "The name of the trace header field to use as the primary key.", cxxopts::value(primaryKey)->default_value("Inline"), ""); - options.add_option("", "s", "secondary-key", "The name of the trace header field to use as the secondary key.", cxxopts::value(secondaryKey)->default_value("Crossline"), ""); + options.add_option("", "p", "primary-key", "The name of the trace header field to use as the primary key.", cxxopts::value(primaryKey), ""); + options.add_option("", "s", "secondary-key", "The name of the trace header field to use as the secondary key.", cxxopts::value(secondaryKey), ""); options.add_option("", "", "prestack", "Import binned prestack data (PSTM/PSDM gathers).", cxxopts::value(prestack), ""); options.add_option("", "", "scale", "If a scale override (floating point) is given, it is used to scale the coordinates in the header instead of determining the scale factor from the coordinate scale trace header field.", cxxopts::value(scale), ""); options.add_option("", "", "sample-unit", "A sample unit of 'ms' is used for datasets in the time domain (default), while a sample unit of 'm' or 'ft' is used for datasets in the depth domain", cxxopts::value(sampleUnit), ""); @@ -1549,6 +1718,7 @@ main(int argc, char* argv[]) options.add_option("", "", "json-output", "Enable json output.", cxxopts::value(jsonOutput), ""); options.add_option("", "", "attribute-name", "The name of the primary VDS channel. The name may be Amplitude (default), Attribute, Depth, Probability, Time, Vavg, Vint, or Vrms", cxxopts::value(attributeName)->default_value(AMPLITUDE_ATTRIBUTE_NAME), ""); options.add_option("", "", "attribute-unit", "The units of the primary VDS channel. The unit name may be blank (default), ft, ft/s, Hz, m, m/s, ms, or s", cxxopts::value(attributeUnit), ""); + options.add_option("", "", "2d", "Import 2D data.", cxxopts::value(is2D), ""); // TODO add option for turning off traceOrderByOffset options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); @@ -1637,13 +1807,38 @@ main(int argc, char* argv[]) } } + if (is2D) + { + defaultSecondaryKey = "EnsembleNumber"; + } + + if (primaryKey.empty()) + { + primaryKey = defaultPrimaryKey; + } + if (secondaryKey.empty()) + { + secondaryKey = defaultSecondaryKey; + } + // get the canonical field name for the primary and secondary key ResolveAlias(primaryKey); ResolveAlias(secondaryKey); SEGY::SEGYType segyType = SEGY::SEGYType::Poststack; - if (primaryKey == "inlinenumber" || primaryKey == "crosslinenumber") + if (is2D) + { + if (prestack) + { + segyType = SEGY::SEGYType::Prestack2D; + } + else + { + segyType = SEGY::SEGYType::Poststack2D; + } + } + else if (primaryKey == "inlinenumber" || primaryKey == "crosslinenumber") { if (prestack) { @@ -1754,9 +1949,6 @@ main(int argc, char* argv[]) SEGY::HeaderField startTimeHeaderField = g_traceHeaderFields["starttime"]; - SEGYBinInfoHeaderFields - binInfoHeaderFields(g_traceHeaderFields[primaryKey], g_traceHeaderFields[secondaryKey], g_traceHeaderFields["coordinatescale"], g_traceHeaderFields["ensemblexcoordinate"], g_traceHeaderFields["ensembleycoordinate"], scale); - OpenVDS::Error error; std::string errorFileName; @@ -1772,6 +1964,13 @@ main(int argc, char* argv[]) fileInfo(headerEndianness); fileInfo.m_segyType = segyType; + const auto + xcoordHeaderFieldIndex = fileInfo.Is2D() ? "sourcexcoordinate" : "ensemblexcoordinate", + ycoordHeaderFieldIndex = fileInfo.Is2D() ? "sourceycoordinate" : "ensembleycoordinate"; + + SEGYBinInfoHeaderFields + binInfoHeaderFields(g_traceHeaderFields[primaryKey], g_traceHeaderFields[secondaryKey], g_traceHeaderFields["coordinatescale"], g_traceHeaderFields[xcoordHeaderFieldIndex], g_traceHeaderFields[ycoordHeaderFieldIndex], scale); + // Scan the file if '--scan' was passed or we're uploading but no fileInfo file was specified if (scan || fileInfoFileNames.empty()) { @@ -1922,9 +2121,9 @@ main(int argc, char* argv[]) // Check for only a single segment - if (fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1) + if (!is2D && fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1) { - OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SegmentInfoList", "Warning: There is only one segment, either this is (as of now unsupported) 2D data or this usually indicates using the wrong header format for the input dataset.", "Use --ignore-warnings to force the import to go ahead."); + OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SegmentInfoList", "Warning: There is only one segment, either this is 2D data or this usually indicates using the wrong header format for the input dataset.", "Use --2d for 2D data. Use --ignore-warnings to force the import to go ahead."); } // Determine value range, fold and primary/secondary step @@ -1935,9 +2134,20 @@ main(int argc, char* argv[]) const float valueRangePercentile = 99.5f; // 99.5f is the same default as Petrel uses. + // Create the appropriate TraceInfo2DManager here so that it can be called during AnalzyeSegment. + // + // Note: Piggybacking the gathering of 2D trace info onto AnalyzeSegment is possible because + // a) 2D SEGY has exactly one segment, and b) AnalyzeSegment reads every trace in the segment its + // analyzing. If (b) is no longer true because we've changed the analysis strategy then something + // will need to take the place of this piggybacking. + // + auto traceInfo2DManager = fileInfo.Is2D() + ? std::unique_ptr(new TraceInfo2DManagerImpl(fileInfo.m_headerEndianness, scale, g_traceHeaderFields["coordinatescale"], g_traceHeaderFields["sourcexcoordinate"], g_traceHeaderFields["sourceycoordinate"], g_traceHeaderFields["starttime"], g_traceHeaderFields["energysourcepointnumber"], g_traceHeaderFields["ensemblenumber"])) + : std::unique_ptr(new TraceInfo2DManager()); + int fileIndex; - auto representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); - analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, jsonOutput, error); + const auto& representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); + analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, traceInfo2DManager.get(), jsonOutput, error); if (error.code != 0) { @@ -2058,9 +2268,6 @@ main(int argc, char* argv[]) OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SEGY", msg, fatal_msg); } - // Create channel descriptors - std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, OffsetChannelInfo(fileInfo.HasGatherOffset(), offsetStart, offsetEnd, offsetStep), attributeName, attributeUnit); - // Create metadata OpenVDS::MetadataContainer metadataContainer; @@ -2074,10 +2281,10 @@ main(int argc, char* argv[]) } SEGY::BinaryHeader::MeasurementSystem - measurementSystem; + segyMeasurementSystem; // get SEGY metadata from first file - createSEGYMetadata(dataProviders[0], fileInfo, metadataContainer, measurementSystem, error); + createSEGYMetadata(dataProviders[0], fileInfo, metadataContainer, segyMeasurementSystem, error); if (error.code != 0) { @@ -2085,9 +2292,23 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } + OffsetChannelInfo + offsetInfo(fileInfo.HasGatherOffset(), ConvertDistance(segyMeasurementSystem, static_cast(offsetStart)), ConvertDistance(segyMeasurementSystem, static_cast(offsetEnd)), ConvertDistance(segyMeasurementSystem, static_cast(offsetStep))); + + // Create channel descriptors + std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit); + if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Prestack) { - createSurveyCoordinateSystemMetadata(fileInfo, measurementSystem, crsWkt, metadataContainer); + createSurveyCoordinateSystemMetadata(fileInfo, segyMeasurementSystem, crsWkt, metadataContainer); + } + + create2DTraceInformationMetadata(is2D, traceInfo2DManager.get(), metadataContainer, error); + + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "Metadata", error.string); + return EXIT_FAILURE; } OpenVDS::Error createError; @@ -2130,7 +2351,7 @@ main(int argc, char* argv[]) OpenVDS::DimensionsND writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_012; - if (IsSEGYTypeUnbinned(segyType)) + if (IsSEGYTypeUnbinned(segyType) || segyType == SEGY::SEGYType::Poststack2D) { writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_01; } @@ -2195,11 +2416,19 @@ main(int argc, char* argv[]) chunkInfo.sampleStart = chunkInfo.min[0]; chunkInfo.sampleCount = chunkInfo.max[0] - chunkInfo.min[0]; - chunkInfo.secondaryKeyStart = (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[SecondaryKeyDimension(fileInfo)]) + 0.5f); - chunkInfo.secondaryKeyStop = (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[SecondaryKeyDimension(fileInfo)] - 1) + 0.5f); + OpenVDS::Error traceIndexError; + + chunkInfo.secondaryKeyStart = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.min[SecondaryKeyDimension(fileInfo)], traceIndexError) : (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[SecondaryKeyDimension(fileInfo)]) + 0.5f); + chunkInfo.secondaryKeyStop = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.max[SecondaryKeyDimension(fileInfo)] - 1, traceIndexError) : (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[SecondaryKeyDimension(fileInfo)] - 1) + 0.5f); - chunkInfo.primaryKeyStart = (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[PrimaryKeyDimension(fileInfo)]) + 0.5f); - chunkInfo.primaryKeyStop = (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[PrimaryKeyDimension(fileInfo)] - 1) + 0.5f); + if (fileInfo.Is2D() && traceIndexError.code != 0) + { + OpenVDS::printWarning(jsonOutput, "2DEnsembleIndex", "Could not translate trace index to ensemble number", fmt::format("{}", error.code), error.string); + break; + } + + chunkInfo.primaryKeyStart = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[PrimaryKeyDimension(fileInfo)]) + 0.5f); + chunkInfo.primaryKeyStop = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[PrimaryKeyDimension(fileInfo)] - 1) + 0.5f); // For each input file, find the lower/upper segments and then add data requests to that file's traceDataManager @@ -2225,7 +2454,13 @@ main(int argc, char* argv[]) lower, upper; - if (fileInfo.IsUnbinned()) + if (fileInfo.Is2D()) + { + // Hopefully 2D files always a single line + lower = segmentInfoList.begin(); + upper = segmentInfoList.end(); + } + else if (fileInfo.IsUnbinned()) { // For unbinned data we set lower and upper based on the 1-based indices instead of searching on the segment primary keys. // When we implement raw gathers mode we'll need to modify this. @@ -2392,7 +2627,7 @@ main(int argc, char* argv[]) const void* data = header + SEGY::TraceHeaderSize; - int primaryTest = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + int primaryTest = fileInfo.Is2D() ? 0 : SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), secondaryTest = fileInfo.IsUnbinned() ? static_cast(trace - segment->m_traceStart + 1) : SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); // Check if the trace is outside the secondary range and go to the next segment if it is @@ -2411,7 +2646,12 @@ main(int argc, char* argv[]) int primaryIndex, secondaryIndex; - if (fileInfo.IsUnbinned()) + if (fileInfo.Is2D()) + { + primaryIndex = 0; + secondaryIndex = traceInfo2DManager->GetIndexOfEnsembleNumber(secondaryTest); + } + else if (fileInfo.IsUnbinned()) { primaryIndex = static_cast(segment - segmentInfo.begin()); secondaryIndex = secondaryTest - 1; @@ -2446,65 +2686,68 @@ main(int argc, char* argv[]) } { - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * amplitudePitch[3] + (secondaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (tertiaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (secondaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; - } + //int targetOffset; + //if (fileInfo.Is4D()) + //{ + // targetOffset = (primaryIndex - chunkInfo.min[3]) * amplitudePitch[3] + (secondaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (tertiaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; + //} + //else + //{ + // targetOffset = (primaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (secondaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; + //} + const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, &reinterpret_cast(amplitudeBuffer)[targetOffset], chunkInfo.sampleStart, chunkInfo.sampleCount); } if (traceFlagBuffer) { - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * traceFlagPitch[3] + (secondaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (secondaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; - } + //int targetOffset; + //if (fileInfo.Is4D()) + //{ + // targetOffset = (primaryIndex - chunkInfo.min[3]) * traceFlagPitch[3] + (secondaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; + //} + //else + //{ + // targetOffset = (primaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (secondaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; + //} + const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, traceFlagPitch); reinterpret_cast(traceFlagBuffer)[targetOffset] = true; } if (segyTraceHeaderBuffer) { - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * segyTraceHeaderPitch[3] + (secondaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (secondaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; - } + //int targetOffset; + //if (fileInfo.Is4D()) + //{ + // targetOffset = (primaryIndex - chunkInfo.min[3]) * segyTraceHeaderPitch[3] + (secondaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; + //} + //else + //{ + // targetOffset = (primaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (secondaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; + //} + const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, segyTraceHeaderPitch); memcpy(&reinterpret_cast(segyTraceHeaderBuffer)[targetOffset], header, SEGY::TraceHeaderSize); } if (offsetBuffer) { - // offset is only applicable to 4D? - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * offsetPitch[3] + (secondaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * offsetPitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (secondaryIndex - chunkInfo.min[1]) * offsetPitch[1]; - } + //int targetOffset; + //if (fileInfo.Is4D()) + //{ + // targetOffset = (primaryIndex - chunkInfo.min[3]) * offsetPitch[3] + (secondaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * offsetPitch[1]; + //} + //else + //{ + // targetOffset = (primaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (secondaryIndex - chunkInfo.min[1]) * offsetPitch[1]; + //} + const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, offsetPitch); - const int - traceOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - reinterpret_cast(offsetBuffer)[targetOffset] = static_cast(traceOffset); + const auto + traceOffset = ConvertDistance(segyMeasurementSystem, static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness))); + static_cast(offsetBuffer)[targetOffset] = traceOffset; } } } diff --git a/tools/SEGYImport/TraceInfo2DManager.cpp b/tools/SEGYImport/TraceInfo2DManager.cpp new file mode 100644 index 00000000..3ce5f514 --- /dev/null +++ b/tools/SEGYImport/TraceInfo2DManager.cpp @@ -0,0 +1,125 @@ +/**************************************************************************** +** Copyright 2021 The Open Group +** Copyright 2021 Bluware, Inc. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +****************************************************************************/ + +#include "TraceInfo2DManager.h" + +TraceInfo2DManagerStore::TraceInfo2DManagerStore(const TraceInfo2D* pData, int64_t count) : m_traceInfo(pData, pData + count) +{ +} + +void TraceInfo2DManagerStore::Clear() +{ + m_traceInfo.clear(); + ClearEnsembleNumberMap(); +} + +int TraceInfo2DManagerStore::Count() +{ + return static_cast(m_traceInfo.size()); +} + +const TraceInfo2D& TraceInfo2DManagerStore::Get(int traceNumber) +{ + return m_traceInfo.at(traceNumber); +} + +const TraceInfo2D& TraceInfo2DManagerStore::Front() +{ + return m_traceInfo.front(); +} + +int TraceInfo2DManagerStore::GetIndexOfEnsembleNumber(int ensembleNumber) +{ + if (m_ensembleToIndex.size() != m_traceInfo.size()) + { + BuildEnsembleNumberMap(); + } + + const auto + iter = m_ensembleToIndex.find(ensembleNumber); + assert(iter != m_ensembleToIndex.end()); + if (iter == m_ensembleToIndex.end()) + { + return 0; + } + + return iter->second; +} + +void TraceInfo2DManagerStore::BuildEnsembleNumberMap() +{ + m_ensembleToIndex.clear(); + for (int index = 0; index < m_traceInfo.size(); ++index) + { + m_ensembleToIndex.insert(std::make_pair(m_traceInfo[index].ensembleNumber, index)); + } +} + +void TraceInfo2DManagerStore::ClearEnsembleNumberMap() +{ + m_ensembleToIndex.clear(); +} + +TraceInfo2DManagerImpl::TraceInfo2DManagerImpl(SEGY::Endianness endianness, double scaleOverride, const SEGY::HeaderField& coordinateScaleHeaderField, const SEGY::HeaderField& xCoordinateHeaderField, const SEGY::HeaderField& yCoordinateHeaderField, const SEGY::HeaderField& startTimeHeaderField, const SEGY::HeaderField& espnHeaderField, const SEGY::HeaderField& ensembleNumberHeaderField) + : m_endianness(endianness), m_scaleOverride(scaleOverride), m_coordinateScaleHeaderField(coordinateScaleHeaderField), m_xCoordinateHeaderField(xCoordinateHeaderField), m_yCoordinateHeaderField(yCoordinateHeaderField), m_startTimeHeaderField(startTimeHeaderField), m_espnHeaderField(espnHeaderField), m_ensembleNumberHeaderField(ensembleNumberHeaderField), m_previousEnsembleNumber(-1) +{ +} + +void TraceInfo2DManagerImpl::Clear() +{ + TraceInfo2DManagerStore::Clear(); + m_previousEnsembleNumber = -1; +} + +void TraceInfo2DManagerImpl::AddTraceInfo(const char* traceHeader) +{ + double coordScaleFactor = 1.0; + + if (m_scaleOverride == 0.0) + { + int scale = SEGY::ReadFieldFromHeader(traceHeader, m_coordinateScaleHeaderField, m_endianness); + if (scale < 0) + { + coordScaleFactor = 1.0 / static_cast(std::abs(scale)); + } + else if (scale > 0) + { + coordScaleFactor = 1.0 * static_cast(std::abs(scale)); + } + } + else + { + coordScaleFactor = m_scaleOverride; + } + + TraceInfo2D traceInfo{}; + + traceInfo.energySourcePointNumber = SEGY::ReadFieldFromHeader(traceHeader, m_espnHeaderField, m_endianness); + traceInfo.ensembleNumber = SEGY::ReadFieldFromHeader(traceHeader, m_ensembleNumberHeaderField, m_endianness); + traceInfo.x = coordScaleFactor * SEGY::ReadFieldFromHeader(traceHeader, m_xCoordinateHeaderField, m_endianness); + traceInfo.y = coordScaleFactor * SEGY::ReadFieldFromHeader(traceHeader, m_yCoordinateHeaderField, m_endianness); + traceInfo.startTime = SEGY::ReadFieldFromHeader(traceHeader, m_startTimeHeaderField, m_endianness); + + if (m_previousEnsembleNumber != traceInfo.ensembleNumber) + { + m_traceInfo.push_back(traceInfo); + m_previousEnsembleNumber = traceInfo.ensembleNumber; + + // any modification to trace info collection should invalidate the ensemble number mapping + ClearEnsembleNumberMap(); + } +} diff --git a/tools/SEGYImport/TraceInfo2DManager.h b/tools/SEGYImport/TraceInfo2DManager.h new file mode 100644 index 00000000..0b7be871 --- /dev/null +++ b/tools/SEGYImport/TraceInfo2DManager.h @@ -0,0 +1,100 @@ +/**************************************************************************** +** Copyright 2021 The Open Group +** Copyright 2021 Bluware, Inc. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +****************************************************************************/ + +#pragma once + +#include "SEGYUtils/SEGYFileInfo.h" +#include + +struct TraceInfo2D +{ + int energySourcePointNumber; + int ensembleNumber; + double x; + double y; + int startTime; +}; + +class TraceInfo2DManager +{ +public: + virtual ~TraceInfo2DManager() = default; + + virtual void Clear() {} + + virtual void AddTraceInfo(const char* traceHeader) {} + + virtual int Count() { return 0; } + + virtual const TraceInfo2D& Get(int traceNumber) { return m_placeHolder; } + + virtual const TraceInfo2D& Front() { return m_placeHolder; } + + virtual int GetIndexOfEnsembleNumber(int ensembleNumber) { return 0; } + +private: + TraceInfo2D m_placeHolder{}; +}; + +class TraceInfo2DManagerStore : public TraceInfo2DManager +{ +public: + TraceInfo2DManagerStore() = default; + + TraceInfo2DManagerStore(const TraceInfo2D* pData, int64_t count); + + void Clear() override; + + int Count() override; + + const TraceInfo2D& Get(int traceNumber) override; + + const TraceInfo2D& Front() override; + + int GetIndexOfEnsembleNumber(int ensembleNumber) override; + +protected: + std::vector m_traceInfo; + std::unordered_map + m_ensembleToIndex; + + void BuildEnsembleNumberMap(); + + void ClearEnsembleNumberMap(); +}; + +class TraceInfo2DManagerImpl : public TraceInfo2DManagerStore +{ +public: + TraceInfo2DManagerImpl(SEGY::Endianness endianness, double scaleOverride, const SEGY::HeaderField& coordinateScaleHeaderField, const SEGY::HeaderField& xCoordinateHeaderField, const SEGY::HeaderField& yCoordinateHeaderField, const SEGY::HeaderField& startTimeHeaderField, const SEGY::HeaderField& espnHeaderField, const SEGY::HeaderField& ensembleNumberHeaderField); + + void Clear() override; + + void AddTraceInfo(const char* traceHeader) override; + +private: + SEGY::Endianness m_endianness; + double m_scaleOverride; + SEGY::HeaderField m_coordinateScaleHeaderField; + SEGY::HeaderField m_xCoordinateHeaderField; + SEGY::HeaderField m_yCoordinateHeaderField; + SEGY::HeaderField m_startTimeHeaderField; + SEGY::HeaderField m_espnHeaderField; + SEGY::HeaderField m_ensembleNumberHeaderField; + + int m_previousEnsembleNumber; +}; \ No newline at end of file -- GitLab From c7735d12d81fdb3b34baec8c9a3f265de13ebbe0 Mon Sep 17 00:00:00 2001 From: Jim King Date: Thu, 23 Sep 2021 10:42:38 -0500 Subject: [PATCH 07/46] fix prestack test to use the correct channel numbers for offset and valid trace --- tests/tools/SEGYImport/pytests/test_2d.py | 31 +++++++++++++++++++---- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_2d.py b/tests/tools/SEGYImport/pytests/test_2d.py index 5200f194..ec3caeb7 100644 --- a/tests/tools/SEGYImport/pytests/test_2d.py +++ b/tests/tools/SEGYImport/pytests/test_2d.py @@ -108,15 +108,33 @@ def test_2d_poststack_read(poststack_2d_segy, output_vds): access_manager = openvds.getAccessManager(handle) dim0_size = layout.getDimensionNumSamples(0) dim1_size = layout.getDimensionNumSamples(1) - request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), (dim0_size, dim1_size, 1, 1, 1, 1), - dimensionsND=openvds.DimensionsND.Dimensions_01, - format=openvds.VolumeDataChannelDescriptor.Format.Format_R32) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (dim0_size, dim1_size, 1, 1, 1, 1), + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_01) + trace_flag_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (1, dim1_size, 1, 1, 1, 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_01) + data = request.data.reshape(dim1_size, dim0_size) + trace_flag_data = trace_flag_request.data + for dim1 in range(dim1_size): total = 0 for dim0 in range(dim0_size): total += abs(data[dim1, dim0]) - assert total > 0.0, f"trace at {dim1}" + + if trace_flag_data[dim1] == 0: + assert total == 0.0, f"dead trace at {dim1}" + else: + assert total > 0.0, f"trace at {dim1}" def test_2d_prestack_volume_info(prestack_2d_segy, output_vds): @@ -230,14 +248,17 @@ def test_2d_prestack_read(prestack_2d_segy, output_vds): request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), (dim0_size, dim1_size, dim2_size, 1, 1, 1), + channel=0, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, dimensionsND=openvds.DimensionsND.Dimensions_012) offset_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), (1, dim1_size, dim2_size, 1, 1, 1), + channel=offset_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, dimensionsND=openvds.DimensionsND.Dimensions_012) trace_flag_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), (1, dim1_size, dim2_size, 1, 1, 1), + channel=trace_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, dimensionsND=openvds.DimensionsND.Dimensions_012) @@ -253,7 +274,7 @@ def test_2d_prestack_read(prestack_2d_segy, output_vds): total = 0 for dim0 in range(dim0_size): - total += abs(data[dim1, dim0]) + total += abs(data[dim2, dim1, dim0]) message = f"trace at dim1 {dim1} dim2 {dim2} offset {trace_offset} CDP {ensemble_number}" -- GitLab From db7421925f3c2dfe453daeba141583d865fe6898 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 24 Sep 2021 11:13:47 -0500 Subject: [PATCH 08/46] pytests for offset-sorted --- .../SEGYImport/pytests/test_offset_sorted.py | 313 ++++++++++++++++++ 1 file changed, 313 insertions(+) create mode 100644 tests/tools/SEGYImport/pytests/test_offset_sorted.py diff --git a/tests/tools/SEGYImport/pytests/test_offset_sorted.py b/tests/tools/SEGYImport/pytests/test_offset_sorted.py new file mode 100644 index 00000000..3f8e1c2f --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_offset_sorted.py @@ -0,0 +1,313 @@ +import os +from pathlib import Path +from typing import Tuple + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard, TempScanFileGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_offset_sorted_test") + + +@pytest.fixture +def output_scan() -> TempScanFileGuard: + return TempScanFileGuard("ST0202.segy.test.scan.json") + + +@pytest.fixture +def offset_sorted_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "CrosslineSorted", "ST0202.segy") + + +@pytest.fixture +def offset_sorted_scan() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "CrosslineSorted", + "ST0202.segy.scan.json") + + +@pytest.fixture +def conventional_sorted_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "CrosslineSorted", + "ST0202_conventional.segy") + + +@pytest.fixture +def conventional_output_vds() -> TempVDSGuard: + return TempVDSGuard("import_conventional_sorted_test") + + +@pytest.fixture +def offset_sorted_executor(offset_sorted_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for offset-sorted SEGY""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(offset_sorted_segy) + + return ex, output_vds + + +@pytest.fixture +def conventional_sorted_executor(conventional_sorted_segy, conventional_output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for conventionally sorted SEGY""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", conventional_output_vds.filename]) + + ex.add_arg(conventional_sorted_segy) + + return ex, conventional_output_vds + + +def test_produce_status(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_02) != openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_03) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_013) == openvds.VDSProduceStatus.Unavailable + + +def test_survey_coordinate_system(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + # verify that coordinate system metadata exists + assert layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + # verify coordinate system component values + value = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert value.x == pytest.approx(431961.93, abs=0.05) + assert value.y == pytest.approx(6348554.9, abs=0.05) + + value = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert value.x == pytest.approx(6.05, abs=0.05) + assert value.y == pytest.approx(24.26, abs=0.05) + + value = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + assert value.x == pytest.approx(-12.13, abs=0.05) + assert value.y == pytest.approx(3.02, abs=0.05) + + +def test_read_gather(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + dim0_size = layout.getDimensionNumSamples(0) + dim1_size = layout.getDimensionNumSamples(1) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + inline_index = layout.getDimensionNumSamples(3) // 2 + crossline_index = layout.getDimensionNumSamples(2) // 2 + + request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (dim0_size, dim1_size, crossline_index + 1, inline_index + 1, 1, 1), + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_02) + trace_flag_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (1, dim1_size, crossline_index + 1, inline_index + 1, 1, 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_02) + + data = request.data.reshape(dim1_size, dim0_size) + trace_flag_data = trace_flag_request.data + + for dim1 in range(dim1_size): + total = 0 + for dim0 in range(dim0_size): + total += abs(data[dim1, dim0]) + + if trace_flag_data[dim1] == 0: + assert total == 0.0, f"dead trace at {dim1}" + else: + assert total > 0.0, f"trace at {dim1}" + + +def test_compare_with_conventional_sorted(offset_sorted_executor, conventional_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + conventional_ex, conventional_vds = conventional_sorted_executor + + result = conventional_ex.run() + + assert result == 0, conventional_ex.output() + assert Path(conventional_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + with openvds.open(conventional_vds.filename, "") as handle_conv: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + access_manager_conv = openvds.getAccessManager(handle_conv) + layout_conv = openvds.getLayout(handle_conv) + + num_z = layout.getDimensionNumSamples(0) + fold = layout.getDimensionNumSamples(1) + num_crossline = layout.getDimensionNumSamples(2) + num_inline = layout.getDimensionNumSamples(3) + + assert num_z == layout_conv.getDimensionNumSamples(0) + assert fold == layout_conv.getDimensionNumSamples(1) + assert num_crossline == layout_conv.getDimensionNumSamples(2) + assert num_inline == layout_conv.getDimensionNumSamples(3) + + trace_channel = layout.getChannelIndex("Trace") + trace_channel_conv = layout_conv.getChannelIndex("Trace") + offset_channel = layout.getChannelIndex("Offset") + offset_channel_conv = layout_conv.getChannelIndex("Offset") + + assert trace_channel > 0 + assert trace_channel_conv > 0 + assert offset_channel > 0 + assert offset_channel_conv > 0 + + for inline_index in range(num_inline): + for crossline_index in range(num_crossline): + voxel_min = (0, 0, crossline_index, inline_index, 0, 0) + voxel_max = (num_z, fold, crossline_index + 1, inline_index + 1, 1, 1) + trace_voxel_max = (1, fold, crossline_index + 1, inline_index + 1, 1, 1) + + request = access_manager.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_02) + request_conv = access_manager_conv.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_01) + + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_02) + trace_flag_request_conv = access_manager_conv.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=trace_channel_conv, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_01) + + offset_request = access_manager.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=offset_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_02) + offset_request_conv = access_manager_conv.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=offset_channel_conv, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_01) + + sample_data = request.data.reshape(fold, num_z) + sample_data_conv = request_conv.data.reshape(fold, num_z) + + for trace_index in range(fold): + assert trace_flag_request.data[trace_index] == trace_flag_request_conv.data[trace_index] + assert offset_request.data[trace_index] == offset_request_conv.data[trace_index] + + if trace_flag_request.data[trace_index] != 0: + for sample_index in range(num_z): + assert sample_data[trace_index, sample_index] == sample_data_conv[trace_index, sample_index] + + +def test_create_scan_file(offset_sorted_segy, output_scan): + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_arg("--scan") + ex.add_args(["--file-info", output_scan.filename]) + + ex.add_arg(offset_sorted_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_scan.filename).exists() + + # might be nice to load the scan file do some light verification of contents... + + +def test_with_scan_file(offset_sorted_segy, offset_sorted_scan, output_vds): + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_args(["--file-info", offset_sorted_scan]) + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(offset_sorted_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_scan.filename).exists() + + # check produce status as a quick test for output VDS validity + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_02) != openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_03) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_013) == openvds.VDSProduceStatus.Unavailable -- GitLab From 4b104c5bc58d6cbd9d567ad38eda5efb3c967113 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 24 Sep 2021 12:32:43 -0500 Subject: [PATCH 09/46] remove obsolete commented out code --- tools/SEGYImport/SEGYImport.cpp | 36 --------------------------------- 1 file changed, 36 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 057849bb..9b18b864 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2686,15 +2686,6 @@ main(int argc, char* argv[]) } { - //int targetOffset; - //if (fileInfo.Is4D()) - //{ - // targetOffset = (primaryIndex - chunkInfo.min[3]) * amplitudePitch[3] + (secondaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (tertiaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; - //} - //else - //{ - // targetOffset = (primaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (secondaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; - //} const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, &reinterpret_cast(amplitudeBuffer)[targetOffset], chunkInfo.sampleStart, chunkInfo.sampleCount); @@ -2702,15 +2693,6 @@ main(int argc, char* argv[]) if (traceFlagBuffer) { - //int targetOffset; - //if (fileInfo.Is4D()) - //{ - // targetOffset = (primaryIndex - chunkInfo.min[3]) * traceFlagPitch[3] + (secondaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; - //} - //else - //{ - // targetOffset = (primaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (secondaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; - //} const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, traceFlagPitch); reinterpret_cast(traceFlagBuffer)[targetOffset] = true; @@ -2718,15 +2700,6 @@ main(int argc, char* argv[]) if (segyTraceHeaderBuffer) { - //int targetOffset; - //if (fileInfo.Is4D()) - //{ - // targetOffset = (primaryIndex - chunkInfo.min[3]) * segyTraceHeaderPitch[3] + (secondaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; - //} - //else - //{ - // targetOffset = (primaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (secondaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; - //} const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, segyTraceHeaderPitch); memcpy(&reinterpret_cast(segyTraceHeaderBuffer)[targetOffset], header, SEGY::TraceHeaderSize); @@ -2734,15 +2707,6 @@ main(int argc, char* argv[]) if (offsetBuffer) { - //int targetOffset; - //if (fileInfo.Is4D()) - //{ - // targetOffset = (primaryIndex - chunkInfo.min[3]) * offsetPitch[3] + (secondaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * offsetPitch[1]; - //} - //else - //{ - // targetOffset = (primaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (secondaryIndex - chunkInfo.min[1]) * offsetPitch[1]; - //} const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, offsetPitch); const auto -- GitLab From 19e6a8cdd250050479ccfff033989a4e305aa3f1 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 24 Sep 2021 16:44:53 -0500 Subject: [PATCH 10/46] supporting changes for offset sorted --- src/SEGYUtils/SEGYFileInfo.cpp | 70 ++++++++++++++++----- src/SEGYUtils/SEGYUtils/SEGY.h | 1 + src/SEGYUtils/SEGYUtils/SEGYFileInfo.h | 9 ++- tools/SEGYImport/SEGYImport.cpp | 86 +++++++++++++++++++++----- 4 files changed, 135 insertions(+), 31 deletions(-) diff --git a/src/SEGYUtils/SEGYFileInfo.cpp b/src/SEGYUtils/SEGYFileInfo.cpp index 944413e3..179bf826 100644 --- a/src/SEGYUtils/SEGYFileInfo.cpp +++ b/src/SEGYUtils/SEGYFileInfo.cpp @@ -54,6 +54,12 @@ SEGYFileInfo::Is2D() const return m_segyType == SEGYType::Poststack2D || m_segyType == SEGYType::Prestack2D; } +bool +SEGYFileInfo::IsOffsetSorted() const +{ + return m_segyType == SEGYType::PrestackOffsetSorted; +} + SEGYBinInfo SEGYFileInfo::readBinInfoFromHeader(const char *header, SEGYBinInfoHeaderFields const &headerFields, Endianness endianness, int segmentTraceIndex) const { @@ -99,8 +105,15 @@ SEGYFileInfo::StaticGetUniqueID() } bool -SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Error &error, HeaderField const &primaryKeyHeaderField, HeaderField const &secondaryKeyHeaderField, SEGY::HeaderField const &startTimeHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields) +SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Error &error, HeaderField const &primaryKeyHeaderField, HeaderField const &secondaryKeyHeaderField, SEGY::HeaderField const &startTimeHeaderField, SEGY::HeaderField const& offsetHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields) { + std::function + readOffset = [](const char* traceHeader) { return -1; }; + if (IsOffsetSorted()) + { + readOffset = [offsetHeaderField, this](const char* traceHeader) { return ReadFieldFromHeader(traceHeader, offsetHeaderField, m_headerEndianness); }; + } + char textualFileHeader[TextualFileHeaderSize]; char binaryFileHeader[BinaryFileHeaderSize]; char traceHeader[TraceHeaderSize]; @@ -148,14 +161,6 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro for (const auto& dataProvider : dataProviders) { - m_segmentInfoLists.emplace_back(); - m_traceCounts.emplace_back(); - - auto& - segmentInfos = m_segmentInfoLists.back(); - auto& - traceCount = m_traceCounts.back(); - int64_t fileSize = dataProvider.Size(error); if (error.code != 0) @@ -176,6 +181,32 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro return false; } + int + currentOffset = readOffset(traceHeader); + + std::function + pushSegmentInfo; + + if (IsOffsetSorted()) + { + m_segmentInfoListsByOffset.emplace_back(); + auto& + offsetMap = m_segmentInfoListsByOffset.back(); + offsetMap.emplace(currentOffset, std::vector()); + + pushSegmentInfo = [this](int offset, const SEGYSegmentInfo& segmentInfo) { m_segmentInfoListsByOffset.back()[offset].push_back(segmentInfo); }; + } + else + { + m_segmentInfoLists.emplace_back(); + + pushSegmentInfo = [this](int offset, const SEGYSegmentInfo& segmentInfo) { m_segmentInfoLists.back().push_back(segmentInfo); }; + } + + m_traceCounts.emplace_back(); + auto& + traceCount = m_traceCounts.back(); + // If the sample count is not set in the binary header we try to find it from the first trace header if (m_sampleCount == 0) { @@ -188,7 +219,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro m_sampleIntervalMilliseconds = ReadFieldFromHeader(traceHeader, TraceHeader::SampleIntervalHeaderField, m_headerEndianness) / 1000.0; } - int64_t traceDataSize = (fileSize - TextualFileHeaderSize - BinaryFileHeaderSize); + const int64_t traceDataSize = (fileSize - TextualFileHeaderSize - BinaryFileHeaderSize); traceCount = traceDataSize / TraceByteSize(); @@ -217,6 +248,10 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro int readCount = 1; + int + testOffset, + nextOffset; + while (segmentInfo.m_traceStop != lastTrace) { dataProvider.Read(traceHeader, TextualFileHeaderSize + BinaryFileHeaderSize + trace * TraceByteSize(), TraceHeaderSize, error); @@ -227,9 +262,10 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro } readCount++; - int primaryKey = ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness); + primaryKey = ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness); + testOffset = readOffset(traceHeader); - if (primaryKey == segmentInfo.m_primaryKey) // expand current segment if the primary key matches + if (primaryKey == segmentInfo.m_primaryKey && (!IsOffsetSorted() || testOffset == currentOffset)) // expand current segment if the primary key matches { assert(trace > segmentInfo.m_traceStop); segmentInfo.m_traceStop = trace; @@ -241,17 +277,21 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro outsideTrace = trace; outsideBinInfo = readBinInfoFromHeader(traceHeader, binInfoHeaderFields, m_headerEndianness, 1); nextPrimaryKey = primaryKey; + nextOffset = testOffset; } if (outsideTrace == segmentInfo.m_traceStop + 1) // current segment is finished { - segmentInfos.push_back(segmentInfo); - int64_t segmentLength = segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; + pushSegmentInfo(currentOffset, segmentInfo); + + const int64_t + segmentLength = segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; // start a new segment segmentInfo = SEGYSegmentInfo(nextPrimaryKey, outsideTrace, outsideBinInfo); trace = std::min(lastTrace, outsideTrace + segmentLength); outsideTrace = 0, jump = 1; + currentOffset = nextOffset; } else if (outsideTrace == 0) // looking for a trace outside the current segment { @@ -270,7 +310,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro } // final segment in this file is finished - segmentInfos.push_back(segmentInfo); + pushSegmentInfo(testOffset, segmentInfo); } return true; diff --git a/src/SEGYUtils/SEGYUtils/SEGY.h b/src/SEGYUtils/SEGYUtils/SEGY.h index 6a0b57c7..5363039e 100644 --- a/src/SEGYUtils/SEGYUtils/SEGY.h +++ b/src/SEGYUtils/SEGYUtils/SEGY.h @@ -327,6 +327,7 @@ enum class SEGYType CDPGathers = 4, ShotGathers = 5, ReceiverGathers = 6, + PrestackOffsetSorted = 7 }; OPENVDS_EXPORT bool IsSEGYTypeUnbinned(SEGYType segyType); diff --git a/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h b/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h index 4af81353..0bbae4cf 100644 --- a/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h +++ b/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h @@ -93,6 +93,11 @@ struct SEGYFileInfo std::vector> m_segmentInfoLists; + // vector of per-file offset-vector maps for SEGYType::PrestackOffsetSorted + std::vector>> + m_segmentInfoListsByOffset; + + SEGY::HeaderField m_primaryKey, m_secondaryKey; @@ -108,7 +113,7 @@ struct SEGYFileInfo OPENVDS_EXPORT int TraceByteSize() const; - OPENVDS_EXPORT bool Scan(const std::vector& dataProviders, OpenVDS::Error &error, SEGY::HeaderField const &primaryKeyHeaderField, SEGY::HeaderField const &secondaryKeyHeaderField = SEGY::HeaderField(), SEGY::HeaderField const &startTimeHeaderField = SEGY::TraceHeader::StartTimeHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields = SEGYBinInfoHeaderFields::StandardHeaderFields()); + OPENVDS_EXPORT bool Scan(const std::vector& dataProviders, OpenVDS::Error &error, SEGY::HeaderField const &primaryKeyHeaderField, SEGY::HeaderField const &secondaryKeyHeaderField = SEGY::HeaderField(), SEGY::HeaderField const &startTimeHeaderField = SEGY::TraceHeader::StartTimeHeaderField, SEGY::HeaderField const& offsetHeaderField = SEGY::TraceHeader::OffsetHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields = SEGYBinInfoHeaderFields::StandardHeaderFields()); OPENVDS_EXPORT SEGYBinInfo readBinInfoFromHeader(const char* header, SEGYBinInfoHeaderFields const& headerFields, SEGY::Endianness endianness, int segmentTraceIndex) const; @@ -119,6 +124,8 @@ struct SEGYFileInfo OPENVDS_EXPORT bool HasGatherOffset() const; OPENVDS_EXPORT bool Is2D() const; + + OPENVDS_EXPORT bool IsOffsetSorted() const; }; #endif // SEGY_FILE_INFO_H diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 9b18b864..134526cc 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -218,15 +218,38 @@ SerializeSEGYFileInfo(SEGYFileInfo const& fileInfo, const int fileIndex) jsonFileInfo["primaryKey"] = SerializeSEGYHeaderField(fileInfo.m_primaryKey); jsonFileInfo["secondaryKey"] = SerializeSEGYHeaderField(fileInfo.m_secondaryKey); - Json::Value - jsonSegmentInfoArray(Json::ValueType::arrayValue); - - for (auto const& segmentInfo : fileInfo.m_segmentInfoLists[fileIndex]) + if (fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) { - jsonSegmentInfoArray.append(SerializeSEGYSegmentInfo(segmentInfo)); + Json::Value + jsonOffsetMap; + + for (const auto& entry : fileInfo.m_segmentInfoListsByOffset[fileIndex]) + { + Json::Value + jsonSegmentInfoArray(Json::ValueType::arrayValue); + + for (auto const& segmentInfo : entry.second) + { + jsonSegmentInfoArray.append(SerializeSEGYSegmentInfo(segmentInfo)); + } + + jsonOffsetMap[std::to_string(entry.first)] = jsonSegmentInfoArray; + } + + jsonFileInfo["segmentInfo"] = jsonOffsetMap; } + else + { + Json::Value + jsonSegmentInfoArray(Json::ValueType::arrayValue); - jsonFileInfo["segmentInfo"] = jsonSegmentInfoArray; + for (auto const& segmentInfo : fileInfo.m_segmentInfoLists[fileIndex]) + { + jsonSegmentInfoArray.append(SerializeSEGYSegmentInfo(segmentInfo)); + } + + jsonFileInfo["segmentInfo"] = jsonSegmentInfoArray; + } return jsonFileInfo; } @@ -1257,16 +1280,42 @@ parseSEGYFileInfoFile(const DataProvider& dataProvider, SEGYFileInfo& fileInfo, } fileInfo.m_traceCounts[fileIndex] = jsonFileInfo["traceCount"].asInt64(); - if (fileInfo.m_segmentInfoLists.size() <= fileIndex) + if (fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) { - fileInfo.m_segmentInfoLists.resize(fileIndex + 1); + if (fileInfo.m_segmentInfoListsByOffset.size() <= fileIndex) + { + fileInfo.m_segmentInfoListsByOffset.resize(fileIndex + 1); + } + auto& + offsetMap = fileInfo.m_segmentInfoListsByOffset[fileIndex]; + offsetMap.clear(); + + const auto& + jsonSegmentInfo = jsonFileInfo["segmentInfo"]; + for (Json::ValueConstIterator iter = jsonSegmentInfo.begin(); iter != jsonSegmentInfo.end(); ++iter) + { + const auto + offsetValue = std::stoi(iter.key().asString()); + auto& + segmentInfoList = offsetMap[offsetValue]; + for (const auto& jsonSegmentInfo : *iter) + { + segmentInfoList.push_back(segmentInfoFromJson(jsonSegmentInfo)); + } + } } - auto& - segmentInfo = fileInfo.m_segmentInfoLists[fileIndex]; - segmentInfo.clear(); - for (const auto& jsonSegmentInfo : jsonFileInfo["segmentInfo"]) + else { - segmentInfo.push_back(segmentInfoFromJson(jsonSegmentInfo)); + if (fileInfo.m_segmentInfoLists.size() <= fileIndex) + { + fileInfo.m_segmentInfoLists.resize(fileIndex + 1); + } + auto& + segmentInfo = fileInfo.m_segmentInfoLists[fileIndex]; + for (Json::Value jsonSegmentInfo : jsonFileInfo["segmentInfo"]) + { + segmentInfo.push_back(segmentInfoFromJson(jsonSegmentInfo)); + } } } catch (Json::Exception &e) @@ -1666,6 +1715,7 @@ main(int argc, char* argv[]) bool disablePersistentID = false; bool prestack = false; bool is2D = false; + bool isOffsetSorted = false; bool traceOrderByOffset = true; bool jsonOutput = false; bool help = false; @@ -1719,6 +1769,7 @@ main(int argc, char* argv[]) options.add_option("", "", "attribute-name", "The name of the primary VDS channel. The name may be Amplitude (default), Attribute, Depth, Probability, Time, Vavg, Vint, or Vrms", cxxopts::value(attributeName)->default_value(AMPLITUDE_ATTRIBUTE_NAME), ""); options.add_option("", "", "attribute-unit", "The units of the primary VDS channel. The unit name may be blank (default), ft, ft/s, Hz, m, m/s, ms, or s", cxxopts::value(attributeUnit), ""); options.add_option("", "", "2d", "Import 2D data.", cxxopts::value(is2D), ""); + options.add_option("", "", "offset-sorted", "Import prestack data sorted by trace header Offset value.", cxxopts::value(isOffsetSorted), ""); // TODO add option for turning off traceOrderByOffset options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); @@ -1827,6 +1878,10 @@ main(int argc, char* argv[]) SEGY::SEGYType segyType = SEGY::SEGYType::Poststack; + if (isOffsetSorted) + { + segyType = SEGY::SEGYType::PrestackOffsetSorted; + } if (is2D) { if (prestack) @@ -1947,7 +2002,8 @@ main(int argc, char* argv[]) } SEGY::HeaderField - startTimeHeaderField = g_traceHeaderFields["starttime"]; + startTimeHeaderField = g_traceHeaderFields["starttime"], + offsetHeaderField = g_traceHeaderFields["offset"]; OpenVDS::Error error; @@ -1987,7 +2043,7 @@ main(int argc, char* argv[]) fileInfo.m_persistentID = OpenVDS::HashCombiner(hash); } - bool success = fileInfo.Scan(dataProviders, error, primaryKeyHeaderField, secondaryKeyHeaderField, startTimeHeaderField, binInfoHeaderFields); + bool success = fileInfo.Scan(dataProviders, error, primaryKeyHeaderField, secondaryKeyHeaderField, startTimeHeaderField, offsetHeaderField, binInfoHeaderFields); if (!success) { -- GitLab From 35ebaf56b7a7c9c95e04738233eded589faeadfd Mon Sep 17 00:00:00 2001 From: Jim King Date: Mon, 27 Sep 2021 17:34:10 -0500 Subject: [PATCH 11/46] add test for offset sorted axis dimensions --- .../SEGYImport/pytests/test_offset_sorted.py | 30 +++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/tools/SEGYImport/pytests/test_offset_sorted.py b/tests/tools/SEGYImport/pytests/test_offset_sorted.py index 3f8e1c2f..92681d59 100644 --- a/tests/tools/SEGYImport/pytests/test_offset_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_offset_sorted.py @@ -132,6 +132,36 @@ def test_survey_coordinate_system(offset_sorted_executor): assert value.y == pytest.approx(3.02, abs=0.05) +def test_axes(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + assert 4 == layout.getDimensionality() + + assert 1876 == layout.getDimensionNumSamples(0) + assert 0.0 == layout.getDimensionMin(0) + assert 7500.0 == layout.getDimensionMax(0) + + assert 65 == layout.getDimensionNumSamples(1) + assert 1.0 == layout.getDimensionMin(1) + assert 65.0 == layout.getDimensionMax(1) + + assert 160 == layout.getDimensionNumSamples(2) + assert 1961.0 == layout.getDimensionMin(2) + assert 2120.0 == layout.getDimensionMax(2) + + assert 40 == layout.getDimensionNumSamples(3) + assert 4982.0 == layout.getDimensionMin(3) + assert 5021.0 == layout.getDimensionMax(3) + + def test_read_gather(offset_sorted_executor): ex, output_vds = offset_sorted_executor -- GitLab From eeb15eb0614112329821c438b55640947aebaec6 Mon Sep 17 00:00:00 2001 From: Jim King Date: Mon, 27 Sep 2021 17:36:24 -0500 Subject: [PATCH 12/46] mostly ported; probably need to pull in SEGYValueRangeEstimator to avoid code duplication in analyzeSegment/analyzePrimaryKey --- tools/SEGYImport/SEGYImport.cpp | 705 ++++++++++++++++++++++++++++---- 1 file changed, 628 insertions(+), 77 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 134526cc..41a1b66e 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -52,6 +52,7 @@ #include #include +#include #if defined(WIN32) #undef WIN32_LEAN_AND_MEAN // avoid warnings if defined on command line @@ -590,29 +591,56 @@ getOrderedSegmentListIndices(SEGYFileInfo const& fileInfo, size_t& globalTotalSe std::vector orderedListIndices; - size_t - longestList = 0; globalTotalSegments = 0; - for (size_t i = 0; i < fileInfo.m_segmentInfoLists.size(); ++i) + + if (fileInfo.IsOffsetSorted()) { - orderedListIndices.push_back(static_cast(i)); - globalTotalSegments += fileInfo.m_segmentInfoLists[i].size(); - if (fileInfo.m_segmentInfoLists[i].size() > fileInfo.m_segmentInfoLists[longestList].size()) + for (int i = 0; i < fileInfo.m_segmentInfoListsByOffset.size(); ++i) { - longestList = i; + orderedListIndices.push_back(i); + for (const auto& entry : fileInfo.m_segmentInfoListsByOffset[i]) + { + globalTotalSegments += entry.second.size(); + } } + + // don't need to check for ascending/descending primary key because the map will have offsets (keys) sorted in ascending order + + auto + comparator = [&](int i1, int i2) + { + const auto + & v1 = fileInfo.m_segmentInfoListsByOffset[i1], + & v2 = fileInfo.m_segmentInfoListsByOffset[i2]; + return (*v1.begin()).first < (*v2.begin()).first; + }; + std::sort(orderedListIndices.begin(), orderedListIndices.end(), comparator); } - const bool - isAscending = fileInfo.m_segmentInfoLists[longestList].front().m_binInfoStart.m_inlineNumber <= fileInfo.m_segmentInfoLists[longestList].back().m_binInfoStart.m_inlineNumber; - auto - comparator = [&](int i1, int i2) + else { - const auto - & v1 = fileInfo.m_segmentInfoLists[i1], - & v2 = fileInfo.m_segmentInfoLists[i2]; - return isAscending ? v1.front().m_binInfoStart.m_inlineNumber < v2.front().m_binInfoStart.m_inlineNumber : v2.front().m_binInfoStart.m_inlineNumber < v1.front().m_binInfoStart.m_inlineNumber; - }; - std::sort(orderedListIndices.begin(), orderedListIndices.end(), comparator); + size_t + longestList = 0; + for (size_t i = 0; i < fileInfo.m_segmentInfoLists.size(); ++i) + { + orderedListIndices.push_back(static_cast(i)); + globalTotalSegments += fileInfo.m_segmentInfoLists[i].size(); + if (fileInfo.m_segmentInfoLists[i].size() > fileInfo.m_segmentInfoLists[longestList].size()) + { + longestList = i; + } + } + const bool + isAscending = fileInfo.m_segmentInfoLists[longestList].front().m_binInfoStart.m_inlineNumber <= fileInfo.m_segmentInfoLists[longestList].back().m_binInfoStart.m_inlineNumber; + auto + comparator = [&](int i1, int i2) + { + const auto + & v1 = fileInfo.m_segmentInfoLists[i1], + & v2 = fileInfo.m_segmentInfoLists[i2]; + return isAscending ? v1.front().m_binInfoStart.m_inlineNumber < v2.front().m_binInfoStart.m_inlineNumber : v2.front().m_binInfoStart.m_inlineNumber < v1.front().m_binInfoStart.m_inlineNumber; + }; + std::sort(orderedListIndices.begin(), orderedListIndices.end(), comparator); + } return orderedListIndices; } @@ -620,6 +648,8 @@ getOrderedSegmentListIndices(SEGYFileInfo const& fileInfo, size_t& globalTotalSe SEGYSegmentInfo const& findRepresentativeSegment(SEGYFileInfo const& fileInfo, int& primaryStep, int& bestListIndex) { + assert(!fileInfo.IsOffsetSorted()); + // Since we give more weight to segments near the center of the data we need a sorted index of segment lists so that we can // traverse the lists in data order, instead of the arbitrary order given by the filename ordering. size_t @@ -686,6 +716,97 @@ findRepresentativeSegment(SEGYFileInfo const& fileInfo, int& primaryStep, int& b return fileInfo.m_segmentInfoLists[bestListIndex][bestIndex]; } +// Analog of findRepresentativeSegment for offset-sorted prestack +int +findRepresentativePrimaryKey(SEGYFileInfo const& fileInfo, int& primaryStep) +{ + // If primary keys are reverse-sorted in the file the structures we build below will hide that. + // For the computations we're doing in this method it's OK that we're not preserving the original order. + + // scan segment infos to get primary key values and total trace count per primary key + std::map + primaryKeyLengths; + for (const auto& offsetMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetMap) + { + for (const auto& segmentInfo : entry.second) + { + const auto + segmentLength = segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; + const auto + newSum = segmentLength + primaryKeyLengths[segmentInfo.m_primaryKey]; + primaryKeyLengths[segmentInfo.m_primaryKey] = newSum; + } + } + } + + // determine primary step + primaryStep = 0; + int + segmentPrimaryStep = 0, + previousPrimaryKey = 0, + firstPrimaryKey = 0, + lastPrimaryKey = 0; + bool + firstSegment = true; + for (const auto& entry : primaryKeyLengths) + { + if (firstSegment) + { + firstSegment = false; + previousPrimaryKey = entry.first; + firstPrimaryKey = entry.first; + lastPrimaryKey = entry.first; + } + else + { + lastPrimaryKey = entry.first; + + // Updating the primary step with the step for the previous segment intentionally ignores the step of the last segment since it can be anomalous + if (segmentPrimaryStep && (!primaryStep || std::abs(segmentPrimaryStep) < std::abs(primaryStep))) + { + primaryStep = segmentPrimaryStep; + } + + segmentPrimaryStep = entry.first - previousPrimaryKey; + previousPrimaryKey = entry.first; + } + } + + // If the primary step couldn't be determined, set it to the last step or 1 + primaryStep = primaryStep ? primaryStep : std::max(segmentPrimaryStep, 1); + + float + bestScore = 0.0f; + int + bestPrimaryKey = 0; + + for (const auto& entry : primaryKeyLengths) + { + const auto& + numTraces = entry.second; + + // index of this segment within the entirety of segments from all input files + const auto + factor = abs(static_cast(entry.first - firstPrimaryKey) / static_cast(lastPrimaryKey - firstPrimaryKey)); + + const auto + multiplier = 1.5f - abs(factor - 0.5f); // give 50% more importance to a segment in the middle of the dataset + + const auto + score = static_cast(numTraces) * multiplier; + + if (score > bestScore) + { + bestScore = score; + bestPrimaryKey = entry.first; + } + } + + return bestPrimaryKey; +} + void copySamples(const void* data, SEGY::BinaryHeader::DataSampleFormatCode dataSampleFormatCode, SEGY::Endianness endianness, float* target, int sampleStart, int sampleCount) { @@ -764,10 +885,8 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg // Determine fold and secondary step int gatherSecondaryKey = 0, gatherFold = 0, gatherSecondaryStep = 0; - bool - hasPreviousGatherOffset = false; - int - previousGatherOffset = 0; + std::set + offsetValues; for (int64_t trace = segmentInfo.m_traceStart; trace <= segmentInfo.m_traceStop; trace++) { @@ -823,25 +942,9 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg if (fileInfo.HasGatherOffset()) { - auto + const auto thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - if (hasPreviousGatherOffset) - { - offsetStart = std::min(offsetStart, thisOffset); - offsetEnd = std::max(offsetEnd, thisOffset); - if (thisOffset != previousGatherOffset) - { - offsetStep = std::min(offsetStep, std::abs(thisOffset - previousGatherOffset)); - } - } - else - { - offsetStart = thisOffset; - offsetEnd = thisOffset; - offsetStep = INT32_MAX; - hasPreviousGatherOffset = true; - } - previousGatherOffset = thisOffset; + offsetValues.insert(thisOffset); } // Update value range @@ -880,13 +983,42 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg if (fileInfo.HasGatherOffset() && !fileInfo.IsUnbinned()) { + offsetStart = *offsetValues.begin(); + offsetEnd = *offsetValues.rbegin(); + + // iterate over offsetValues to get offset step + if (offsetValues.size() > 1) + { + auto + iter = offsetValues.begin(); + auto + previousOffset = *iter; + offsetStep = INT32_MAX; + while (++iter != offsetValues.end()) + { + const auto + thisOffset = *iter; + offsetStep = std::min(offsetStep, thisOffset - previousOffset); + previousOffset = thisOffset; + } + } + else + { + offsetStep = 1; + } + + // 'fold' calculated by counting traces in a gather may be incorrect if there are duplicate-key + // traces, so we'll ignore it and use the count of unique offset values + fold = static_cast(offsetValues.size()); + // check that offset start/end/step is consistent if (offsetStart + (fold - 1) * offsetStep != offsetEnd) { const auto - msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This usually indicates using the wrong header format for the input dataset.\n."; - error.string = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); - error.code = -1; + msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This may indicate an incorrect header format for Offset trace header."; + const auto + msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); + OpenVDS::printError(jsonOutput, "analyzeSegment", msg); return false; } @@ -913,6 +1045,312 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg return success; } +// Analog of analyzeSegment for offset-sorted SEGY +bool +analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo const& fileInfo, const int primaryKey, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, int& offsetStart, int& offsetEnd, int& offsetStep, bool jsonOutput, OpenVDS::Error& error) +{ + assert(fileInfo.IsOffsetSorted()); + + // TODO what kind of sanity checking should we do on offset-sorted segments? + //if (segmentInfo.m_traceStop < segmentInfo.m_traceStart) + //{ + // m_errorString = "Invalid parameters, a valid segment info should always have a stop trace greater than or equal to the start trace"; + // VDSImportSEGY::pLoggingInterface->LogError("VDSImportSEGY::analyzeSegment", m_errorString); + // return false; + //} + + bool + success = true; + + valueRange = OpenVDS::FloatRange(0.0f, 1.0f); + secondaryStep = 0; + fold = 1; + offsetStart = 0; + offsetEnd = 0; + offsetStep = 0; + + if (fileInfo.m_segmentInfoListsByOffset.empty() || fileInfo.m_segmentInfoListsByOffset.front().empty()) + { + // no data to analyze + return true; + } + + // get all offset values from all files + std::set + globalOffsets; + for (const auto& offsetSegments : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetSegments) + { + globalOffsets.insert(entry.first); + } + } + + fold = static_cast(globalOffsets.size()); + + offsetStart = *globalOffsets.begin(); + offsetEnd = *globalOffsets.rbegin(); + + // find offset step + if (globalOffsets.size() > 1) + { + auto + offsetsIter = ++globalOffsets.begin(); + offsetStep = abs(*offsetsIter - offsetStart); + auto + previousOffset = *offsetsIter; + while (++offsetsIter != globalOffsets.end()) + { + offsetStep = std::min(offsetStep, abs(*offsetsIter - previousOffset)); + previousOffset = *offsetsIter; + } + } + else + { + offsetStep = 1; + } + + // check that offset start/end/step is consistent + if (offsetStart + (fold - 1) * offsetStep != offsetEnd) + { + const auto + msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This may indicate an incorrect header format for Offset trace header."; + const auto + msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); + OpenVDS::printError(jsonOutput, "analyzePrimaryKey", msg); + return false; + } + + + const int32_t + traceByteSize = fileInfo.TraceByteSize(); + + int64_t + traceBufferStart = 0, + traceBufferSize = 0; + + std::vector + buffer; + + // 1. Find all the segments in all files that match the primary key + // 2. Get a total trace count of all the segments; pass trace count to value range estimator + std::map>> + providerSegments; + int64_t + primaryKeyTraceCount = 0; + for (int fileIndex = 0; fileIndex < fileInfo.m_segmentInfoListsByOffset.size(); ++fileIndex) + { + const auto& + offsetSegments = fileInfo.m_segmentInfoListsByOffset[fileIndex]; + for (const auto& entry : offsetSegments) + { + for (const auto& segmentInfo : entry.second) + { + if (segmentInfo.m_primaryKey == primaryKey) + { + providerSegments[fileIndex].push_back(std::cref(segmentInfo)); + primaryKeyTraceCount += segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; + } + } + } + } + + auto + segyValueRangeEstimator = CreateSEGYValueRangeEstimator(fileInfo, primaryKeyTraceCount, valueRangePercentile); + + // For secondary step need to scan all traces in all segments to get a complete (hopefully) list of secondary keys, then figure out start/end/step. + // We need to get all secondary keys before doing any analysis because we may not encounter them in least-to-greatest order in the offset-sorted segments. + std::set + allSecondaryKeys; + + for (const auto& entry : providerSegments) + { + auto& + dataProvider = dataProviders[entry.first]; + auto& + primaryKeySegments = entry.second; + + for (const auto segmentInfoRef : primaryKeySegments) + { + const auto& + segmentInfo = segmentInfoRef.get(); + for (int64_t trace = segmentInfo.m_traceStart; trace <= segmentInfo.m_traceStop; trace++) + { + if (trace - traceBufferStart >= traceBufferSize) + { + traceBufferStart = trace; + traceBufferSize = (segmentInfo.m_traceStop - trace + 1) < 1000 ? int(segmentInfo.m_traceStop - trace + 1) : 1000; + + buffer.resize(traceByteSize * traceBufferSize); + + const int64_t + offset = SEGY::TextualFileHeaderSize + SEGY::BinaryFileHeaderSize + traceByteSize * traceBufferStart; + + OpenVDS::Error + error; + + success = dataProvider.Read(buffer.data(), offset, (int32_t)(traceByteSize * traceBufferSize), error); + + if (!success) + { + OpenVDS::printError(jsonOutput, "analyzePrimaryKey", error.string); + break; + } + } + + const void + * header = buffer.data() + traceByteSize * (trace - traceBufferStart), + * data = buffer.data() + traceByteSize * (trace - traceBufferStart) + SEGY::TraceHeaderSize; + + const int + tracePrimaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + traceSecondaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); + + allSecondaryKeys.insert(traceSecondaryKey); + + if (tracePrimaryKey != segmentInfo.m_primaryKey) + { + auto + warnString = fmt::format("Warning: trace {} has a primary key that doesn't match with the segment. This trace will be ignored.", segmentInfo.m_traceStart + trace); + OpenVDS::printWarning(jsonOutput, "analyzePrimaryKey", warnString); + continue; + } + + // Update value range + int + sampleMin = 0; + + segyValueRangeEstimator->UpdateValueRange((unsigned char*)data, sampleMin, fileInfo.m_sampleCount); + } + } + } + + // Determine the secondary step + if (allSecondaryKeys.size() <= 1) + { + secondaryStep = 1; + } + else if (allSecondaryKeys.size() == 2) + { + secondaryStep = *allSecondaryKeys.rbegin() - *allSecondaryKeys.begin(); + } + else + { + auto + secondaryIter = allSecondaryKeys.begin(); + auto + previousSecondary = *secondaryIter; + auto + workingStep = 0; + secondaryStep = 0; + while (++secondaryIter != allSecondaryKeys.end()) + { + // Updating the secondary step with the previously computed step intentionally ignores the step of the final secondary key since it can be anomalous + if (workingStep && (!secondaryStep || std::abs(workingStep) < std::abs(secondaryStep))) + { + secondaryStep = workingStep; + } + + workingStep = *secondaryIter - previousSecondary; + previousSecondary = *secondaryIter; + } + + // If the secondary step couldn't be determined, set it to the last step or 1 + secondaryStep = secondaryStep ? secondaryStep : std::max(workingStep, 1); + } + + // Set value range + segyValueRangeEstimator->GetValueRange(valueRange.Min, valueRange.Max); + return success; +} + +std::pair +findFirstLastInlineSegmentInfos(const SEGYFileInfo& fileInfo, const std::vector& orderedListIndices) +{ + if (fileInfo.IsOffsetSorted()) + { + // Find least/greatest inline numbers + // Create synthetic segment infos for least/greatest inline using least/greatest crossline numbers on those inlines + // If inline is ascending: return (least-inline segment info, greatest-inline segment info) + // else: return (greatest-inline segment info, least-inline segment info) + + int + minInline = INT32_MAX, + maxInline = -INT32_MAX; + const std::vector + * pLongestList = nullptr; + for (const auto& offsetMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetMap) + { + if (pLongestList == nullptr || (entry.second.size() > pLongestList->size())) + { + pLongestList = &entry.second; + } + for (const auto& segmentInfo : entry.second) + { + minInline = std::min(minInline, segmentInfo.m_primaryKey); + maxInline = std::max(maxInline, segmentInfo.m_primaryKey); + } + } + } + + SEGYSegmentInfo + minSegmentInfo{}, + maxSegmentInfo{}; + minSegmentInfo.m_primaryKey = minInline; + minSegmentInfo.m_binInfoStart = { minInline, INT32_MAX, 0.0, 0.0 }; + minSegmentInfo.m_binInfoStop = { minInline, -INT32_MAX, 0.0, 0.0 }; + maxSegmentInfo.m_primaryKey = maxInline; + maxSegmentInfo.m_binInfoStart = { maxInline, INT32_MAX, 0.0, 0.0 }; + maxSegmentInfo.m_binInfoStop = { maxInline, -INT32_MAX, 0.0, 0.0 }; + + for (const auto& offsetMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetMap) + { + for (const auto& segmentInfo : entry.second) + { + if (segmentInfo.m_primaryKey == minInline) + { + if (segmentInfo.m_binInfoStart.m_crosslineNumber < minSegmentInfo.m_binInfoStart.m_crosslineNumber) + { + minSegmentInfo.m_binInfoStart = segmentInfo.m_binInfoStart; + } + if (segmentInfo.m_binInfoStop.m_crosslineNumber > minSegmentInfo.m_binInfoStop.m_crosslineNumber) + { + minSegmentInfo.m_binInfoStop = segmentInfo.m_binInfoStop; + } + } + if (segmentInfo.m_primaryKey == maxInline) + { + if (segmentInfo.m_binInfoStart.m_crosslineNumber < maxSegmentInfo.m_binInfoStart.m_crosslineNumber) + { + maxSegmentInfo.m_binInfoStart = segmentInfo.m_binInfoStart; + } + if (segmentInfo.m_binInfoStop.m_crosslineNumber > maxSegmentInfo.m_binInfoStop.m_crosslineNumber) + { + maxSegmentInfo.m_binInfoStop = segmentInfo.m_binInfoStop; + } + } + } + } + } + + const bool + isAscending = pLongestList->front().m_primaryKey <= pLongestList->back().m_primaryKey; + + if (isAscending) + { + return { minSegmentInfo, maxSegmentInfo }; + } + return { maxSegmentInfo, minSegmentInfo }; + } + + // else it's not offset-sorted + return { std::cref(fileInfo.m_segmentInfoLists[orderedListIndices.front()].front()), std::cref(fileInfo.m_segmentInfoLists[orderedListIndices.back()].back()) }; +} + double ConvertDistance(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, double distance) { @@ -972,7 +1410,7 @@ createSEGYMetadata(DataProvider &dataProvider, SEGYFileInfo const &fileInfo, Ope void createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer) { - if (fileInfo.m_segmentInfoLists.empty()) return; + if (fileInfo.m_segmentInfoLists.empty() && fileInfo.m_segmentInfoListsByOffset.empty()) return; if (fileInfo.Is2D()) return; @@ -987,11 +1425,9 @@ createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryH // Determine crossline spacing int countedCrosslineSpacings = 0; - for (size_t listIndex : orderedListIndices) + auto + crosslineUpdater = [&countedCrosslineSpacings, &crosslineSpacing](const std::vector& segmentInfoList) { - const auto - & segmentInfoList = fileInfo.m_segmentInfoLists[listIndex]; - for (auto const& segmentInfo : segmentInfoList) { int crosslineCount = segmentInfo.m_binInfoStop.m_crosslineNumber - segmentInfo.m_binInfoStart.m_crosslineNumber; @@ -1008,6 +1444,24 @@ createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryH countedCrosslineSpacings++; } + }; + + for (size_t listIndex : orderedListIndices) + { + if (fileInfo.IsOffsetSorted()) + { + for (const auto& offsetSegmentMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetSegmentMap) + { + crosslineUpdater(entry.second); + } + } + } + else + { + crosslineUpdater(fileInfo.m_segmentInfoLists[listIndex]); + } } if (countedCrosslineSpacings > 0) @@ -1022,8 +1476,11 @@ createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryH } // Determine inline spacing - SEGYSegmentInfo const& firstSegmentInfo = fileInfo.m_segmentInfoLists[orderedListIndices.front()].front(); - SEGYSegmentInfo const& lastSegmentInfo = fileInfo.m_segmentInfoLists[orderedListIndices.back()].back(); + const auto + firstLastSegmentInfos = findFirstLastInlineSegmentInfos(fileInfo, orderedListIndices); + const SEGYSegmentInfo + & firstSegmentInfo = firstLastSegmentInfos.first, + & lastSegmentInfo = firstLastSegmentInfos.second; if (firstSegmentInfo.m_binInfoStart.m_inlineNumber != lastSegmentInfo.m_binInfoStart.m_inlineNumber) { @@ -1415,24 +1872,58 @@ createAxisDescriptors(SEGYFileInfo const& fileInfo, SEGY::SampleUnits sampleUnit axisDescriptors.emplace_back(fold, VDS_DIMENSION_TRACE_NAME(VDS_DIMENSION_TRACE_SORT_OFFSET), KNOWNMETADATA_UNIT_UNITLESS, 1.0f, static_cast(fold)); } - int minInline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_inlineNumber, - minCrossline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_crosslineNumber, - maxInline = minInline, + int + minInline, + minCrossline, + maxInline, + maxCrossline; + + auto updateMinMaxes = [&minInline, &minCrossline, &maxInline, &maxCrossline](const SEGYSegmentInfo& segmentInfo) + { + minInline = std::min(minInline, segmentInfo.m_binInfoStart.m_inlineNumber); + minInline = std::min(minInline, segmentInfo.m_binInfoStop.m_inlineNumber); + maxInline = std::max(maxInline, segmentInfo.m_binInfoStart.m_inlineNumber); + maxInline = std::max(maxInline, segmentInfo.m_binInfoStop.m_inlineNumber); + + minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); + minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); + maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); + maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); + }; + + if (fileInfo.IsOffsetSorted()) + { + const auto& + firstSegment = fileInfo.m_segmentInfoListsByOffset.front().begin()->second.front(); + minInline = firstSegment.m_binInfoStart.m_inlineNumber; + minCrossline = firstSegment.m_binInfoStart.m_crosslineNumber; + maxInline = minInline; maxCrossline = minCrossline; - for (const auto& segmentInfoList : fileInfo.m_segmentInfoLists) + for (const auto& offsetSegments : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetSegments) + { + for (const auto& segmentInfo : entry.second) + { + updateMinMaxes(segmentInfo); + } + } + } + } + else { - for (const auto& segmentInfo : segmentInfoList) + minInline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_inlineNumber; + minCrossline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_crosslineNumber; + maxInline = minInline; + maxCrossline = minCrossline; + + for (const auto& segmentInfoList : fileInfo.m_segmentInfoLists) { - minInline = std::min(minInline, segmentInfo.m_binInfoStart.m_inlineNumber); - minInline = std::min(minInline, segmentInfo.m_binInfoStop.m_inlineNumber); - maxInline = std::max(maxInline, segmentInfo.m_binInfoStart.m_inlineNumber); - maxInline = std::max(maxInline, segmentInfo.m_binInfoStop.m_inlineNumber); - - minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); - minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); - maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); - maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); + for (const auto& segmentInfo : segmentInfoList) + { + updateMinMaxes(segmentInfo); + } } } @@ -1656,7 +2147,7 @@ TraceIndex2DToEnsembleNumber(TraceInfo2DManager * pTraceInfo2DManager, int trace int SecondaryKeyDimension(const SEGYFileInfo& fileInfo) { - if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D) + if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D || fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) { return 2; } @@ -2201,11 +2692,23 @@ main(int argc, char* argv[]) ? std::unique_ptr(new TraceInfo2DManagerImpl(fileInfo.m_headerEndianness, scale, g_traceHeaderFields["coordinatescale"], g_traceHeaderFields["sourcexcoordinate"], g_traceHeaderFields["sourceycoordinate"], g_traceHeaderFields["starttime"], g_traceHeaderFields["energysourcepointnumber"], g_traceHeaderFields["ensemblenumber"])) : std::unique_ptr(new TraceInfo2DManager()); - int fileIndex; - const auto& representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); - analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, traceInfo2DManager.get(), jsonOutput, error); + bool + analyzeResult = false; + if (fileInfo.IsOffsetSorted()) + { + const auto + primaryKey = findRepresentativePrimaryKey(fileInfo, primaryStep); + analyzeResult = analyzePrimaryKey(dataProviders, fileInfo, primaryKey, valueRangePercentile, valueRange, fold, secondaryStep, offsetStart, offsetEnd, offsetStep, jsonOutput, error); + } + else + { + int fileIndex; + const auto& representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); + assert(fileIndex < dataProviders.size()); + analyzeResult = analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, traceInfo2DManager.get(), jsonOutput, error); + } - if (error.code != 0) + if (!analyzeResult || error.code != 0) { OpenVDS::printError(jsonOutput, "SEGY", error.string); return EXIT_FAILURE; @@ -2415,6 +2918,10 @@ main(int argc, char* argv[]) { writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_013; } + else if (fileInfo.IsOffsetSorted()) + { + writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_023; + } auto amplitudeAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 0, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); auto traceFlagAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 1, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); @@ -2454,7 +2961,9 @@ main(int argc, char* argv[]) const auto perFileMemoryLimit = dvmMemoryLimit / dataProviders.size(); - for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) + assert(dataProviders.size() == fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size()); + + for (size_t fileIndex = 0; fileIndex < dataProviders.size(); ++fileIndex) { dataViewManagers.emplace_back(std::make_shared(dataProviders[fileIndex], perFileMemoryLimit)); traceDataManagers.emplace_back(dataViewManagers.back(), 128, traceByteSize, fileInfo.m_traceCounts[fileIndex]); @@ -2490,8 +2999,25 @@ main(int argc, char* argv[]) for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) { + const int + offetSortedOffsetValue = fileInfo.IsOffsetSorted() ? offsetStart + offsetStep * chunkInfo.min[1] : 0; + + if (fileInfo.IsOffsetSorted()) + { + // for offset sorted chunks should only have a single offset (i.e. dim 02 or 023) + assert(chunkInfo.min[1] + 1 == chunkInfo.max[1]); + + // If the calculated offset value isn't present in this file's map then skip + if (fileInfo.m_segmentInfoListsByOffset[fileIndex].find(offetSortedOffsetValue) == fileInfo.m_segmentInfoListsByOffset[fileIndex].end()) + { + continue; + } + } + auto& - segmentInfoList = fileInfo.m_segmentInfoLists[fileIndex]; + segmentInfoList = fileInfo.IsOffsetSorted() + ? fileInfo.m_segmentInfoListsByOffset[fileIndex][offetSortedOffsetValue] + : fileInfo.m_segmentInfoLists[fileIndex]; // does this file have any segments in the primary key range? bool hasSegments; @@ -2566,9 +3092,15 @@ main(int argc, char* argv[]) auto &chunkInfo = chunkInfos[chunk]; + const int + offsetSortedOffsetValue = fileInfo.IsOffsetSorted() ? offsetStart + offsetStep * chunkInfo.min[1] : 0; + // if we've crossed to a new inline then trim the trace page cache if (chunk > 0) { + + // TODO alternate version for offset sorted + const auto& previousChunkInfo = chunkInfos[chunk - 1]; for (size_t chunkFileIndex = 0; chunkFileIndex < dataProviders.size(); ++chunkFileIndex) @@ -2641,7 +3173,7 @@ main(int argc, char* argv[]) const auto upperSegmentIndex = std::get<1>(result->second); const auto& - segmentInfo = fileInfo.m_segmentInfoLists[fileIndex]; + segmentInfo = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset[fileIndex][offsetSortedOffsetValue] : fileInfo.m_segmentInfoLists[fileIndex]; auto& traceDataManager = traceDataManagers[fileIndex]; @@ -2723,14 +3255,33 @@ main(int argc, char* argv[]) if (fileInfo.Is4D() && traceOrderByOffset) { - // recalculate tertiaryIndex from header offset value - const auto - thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - tertiaryIndex = (thisOffset - offsetStart) / offsetStep; + if (fileInfo.IsOffsetSorted()) + { + // force the tertiary index to the index specified for the current chunk + tertiaryIndex = chunkInfo.min[1]; + + assert(offsetSortedOffsetValue == SEGY::ReadFieldFromHeader(header, offsetHeaderField, fileInfo.m_headerEndianness)); + } + else + { + if (traceOrderByOffset) + { + // recalculate tertiaryIndex from header offset value + const auto + thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); + tertiaryIndex = (thisOffset - offsetStart) / offsetStep; + + // sanity check the new index + if (tertiaryIndex < 0 || tertiaryIndex >= fold) + { + continue; + } + } + } - // sanity check the new index - if (tertiaryIndex < 0 || tertiaryIndex >= fold) + if (tertiaryIndex < chunkInfo.min[1] || tertiaryIndex >= chunkInfo.max[1]) { + // the current gather trace number is not within the request bounds continue; } } -- GitLab From 16ec2eb3f70c778ea11964a1943c2606b06c445f Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 28 Sep 2021 13:22:44 -0500 Subject: [PATCH 13/46] make unsafe expression less unsafe --- tools/SEGYImport/SEGYImport.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 41a1b66e..cc408661 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2961,7 +2961,7 @@ main(int argc, char* argv[]) const auto perFileMemoryLimit = dvmMemoryLimit / dataProviders.size(); - assert(dataProviders.size() == fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size()); + assert(dataProviders.size() == (fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size())); for (size_t fileIndex = 0; fileIndex < dataProviders.size(); ++fileIndex) { -- GitLab From b9a408df625ac23caa8b667202fe8e38ca2a6e7a Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 28 Sep 2021 18:08:48 -0500 Subject: [PATCH 14/46] fix tests --- .../SEGYImport/pytests/test_offset_sorted.py | 51 ++++++++++--------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_offset_sorted.py b/tests/tools/SEGYImport/pytests/test_offset_sorted.py index 92681d59..1488e6ad 100644 --- a/tests/tools/SEGYImport/pytests/test_offset_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_offset_sorted.py @@ -81,9 +81,10 @@ def test_produce_status(offset_sorted_executor): with openvds.open(output_vds.filename, "") as handle: access_manager = openvds.getAccessManager(handle) - assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_02) != openvds.VDSProduceStatus.Unavailable assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_02) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_03) == openvds.VDSProduceStatus.Unavailable assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable @@ -116,20 +117,20 @@ def test_survey_coordinate_system(offset_sorted_executor): value = layout.getMetadataDoubleVector2( openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) - assert value.x == pytest.approx(431961.93, abs=0.05) - assert value.y == pytest.approx(6348554.9, abs=0.05) + assert value[0] == pytest.approx(431961.93, abs=0.05) + assert value[1] == pytest.approx(6348554.9, abs=0.05) value = layout.getMetadataDoubleVector2( openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) - assert value.x == pytest.approx(6.05, abs=0.05) - assert value.y == pytest.approx(24.26, abs=0.05) + assert value[0] == pytest.approx(6.05, abs=0.05) + assert value[1] == pytest.approx(24.26, abs=0.05) value = layout.getMetadataDoubleVector2( openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) - assert value.x == pytest.approx(-12.13, abs=0.05) - assert value.y == pytest.approx(3.02, abs=0.05) + assert value[0] == pytest.approx(-12.13, abs=0.05) + assert value[1] == pytest.approx(3.02, abs=0.05) def test_axes(offset_sorted_executor): @@ -161,6 +162,9 @@ def test_axes(offset_sorted_executor): assert 4982.0 == layout.getDimensionMin(3) assert 5021.0 == layout.getDimensionMax(3) + assert pytest.approx(-0.137178, abs=1.0e-05) == layout.getChannelValueRangeMin(0) + assert pytest.approx(0.141104, abs=1.0e-05) == layout.getChannelValueRangeMax(0) + def test_read_gather(offset_sorted_executor): ex, output_vds = offset_sorted_executor @@ -186,12 +190,12 @@ def test_read_gather(offset_sorted_executor): (dim0_size, dim1_size, crossline_index + 1, inline_index + 1, 1, 1), channel=0, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - dimensionsND=openvds.DimensionsND.Dimensions_02) + dimensionsND=openvds.DimensionsND.Dimensions_023) trace_flag_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), (1, dim1_size, crossline_index + 1, inline_index + 1, 1, 1), channel=trace_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, - dimensionsND=openvds.DimensionsND.Dimensions_02) + dimensionsND=openvds.DimensionsND.Dimensions_023) data = request.data.reshape(dim1_size, dim0_size) trace_flag_data = trace_flag_request.data @@ -259,40 +263,43 @@ def test_compare_with_conventional_sorted(offset_sorted_executor, conventional_s request = access_manager.requestVolumeSubset(voxel_min, voxel_max, channel=0, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - dimensionsND=openvds.DimensionsND.Dimensions_02) + dimensionsND=openvds.DimensionsND.Dimensions_023) request_conv = access_manager_conv.requestVolumeSubset(voxel_min, voxel_max, channel=0, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - dimensionsND=openvds.DimensionsND.Dimensions_01) + dimensionsND=openvds.DimensionsND.Dimensions_012) trace_flag_request = access_manager.requestVolumeSubset(voxel_min, trace_voxel_max, channel=trace_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, - dimensionsND=openvds.DimensionsND.Dimensions_02) + dimensionsND=openvds.DimensionsND.Dimensions_023) trace_flag_request_conv = access_manager_conv.requestVolumeSubset(voxel_min, trace_voxel_max, channel=trace_channel_conv, format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, - dimensionsND=openvds.DimensionsND.Dimensions_01) + dimensionsND=openvds.DimensionsND.Dimensions_012) offset_request = access_manager.requestVolumeSubset(voxel_min, trace_voxel_max, channel=offset_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - dimensionsND=openvds.DimensionsND.Dimensions_02) + dimensionsND=openvds.DimensionsND.Dimensions_023) offset_request_conv = access_manager_conv.requestVolumeSubset(voxel_min, trace_voxel_max, channel=offset_channel_conv, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - dimensionsND=openvds.DimensionsND.Dimensions_01) + dimensionsND=openvds.DimensionsND.Dimensions_012) sample_data = request.data.reshape(fold, num_z) sample_data_conv = request_conv.data.reshape(fold, num_z) for trace_index in range(fold): - assert trace_flag_request.data[trace_index] == trace_flag_request_conv.data[trace_index] - assert offset_request.data[trace_index] == offset_request_conv.data[trace_index] + assert trace_flag_request.data[trace_index] == trace_flag_request_conv.data[trace_index],\ + f"trace index {trace_index} xl index {crossline_index} il index {inline_index}" + assert offset_request.data[trace_index] == offset_request_conv.data[trace_index],\ + f"trace index {trace_index} xl index {crossline_index} il index {inline_index}" if trace_flag_request.data[trace_index] != 0: for sample_index in range(num_z): - assert sample_data[trace_index, sample_index] == sample_data_conv[trace_index, sample_index] + assert sample_data[trace_index, sample_index] == sample_data_conv[trace_index, sample_index],\ + f"sample index {sample_index} trace index {trace_index} xl index {crossline_index} il index {inline_index}" def test_create_scan_file(offset_sorted_segy, output_scan): @@ -328,16 +335,10 @@ def test_with_scan_file(offset_sorted_segy, offset_sorted_scan, output_vds): result = ex.run() assert result == 0, ex.output() - assert Path(output_scan.filename).exists() + assert Path(output_vds.filename).exists() # check produce status as a quick test for output VDS validity with openvds.open(output_vds.filename, "") as handle: access_manager = openvds.getAccessManager(handle) - assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_02) != openvds.VDSProduceStatus.Unavailable assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable - - assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable - assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_03) == openvds.VDSProduceStatus.Unavailable - assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable - assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_013) == openvds.VDSProduceStatus.Unavailable -- GitLab From 8d0e8201ce0834bfd4c590b5eb442677147a9d13 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 28 Sep 2021 18:09:07 -0500 Subject: [PATCH 15/46] get offset sorted import mostly working --- src/SEGYUtils/SEGYFileInfo.cpp | 2 +- tools/SEGYImport/SEGYImport.cpp | 121 +++++++++++++++++++------------- 2 files changed, 75 insertions(+), 48 deletions(-) diff --git a/src/SEGYUtils/SEGYFileInfo.cpp b/src/SEGYUtils/SEGYFileInfo.cpp index 179bf826..70f5f88d 100644 --- a/src/SEGYUtils/SEGYFileInfo.cpp +++ b/src/SEGYUtils/SEGYFileInfo.cpp @@ -45,7 +45,7 @@ bool SEGYFileInfo::HasGatherOffset() const { // TODO unbinned gathers may be angle gathers? - return m_segyType == SEGYType::Prestack || m_segyType == SEGYType::Prestack2D || IsUnbinned(); + return m_segyType == SEGYType::Prestack || m_segyType == SEGYType::Prestack2D || IsOffsetSorted() || IsUnbinned(); } bool diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index cc408661..39ecc8d3 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -847,6 +847,43 @@ copySamples(const void* data, SEGY::BinaryHeader::DataSampleFormatCode dataSampl } } +void +updateValueRangeHeaps(const SEGYFileInfo& fileInfo, const int heapSizeMax, std::vector& minHeap, std::vector& maxHeap, const void * data, std::vector& sampleBuffer) +{ + if (fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IBMFloat || fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IEEEFloat) + { + sampleBuffer.resize(fileInfo.m_sampleCount); + copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, sampleBuffer.data(), 0, fileInfo.m_sampleCount); + + for (int sample = 0; sample < fileInfo.m_sampleCount; sample++) + { + if (int(minHeap.size()) < heapSizeMax) + { + minHeap.push_back(sampleBuffer[sample]); + std::push_heap(minHeap.begin(), minHeap.end(), std::less()); + } + else if (sampleBuffer[sample] < minHeap[0]) + { + std::pop_heap(minHeap.begin(), minHeap.end(), std::less()); + minHeap.back() = sampleBuffer[sample]; + std::push_heap(minHeap.begin(), minHeap.end(), std::less()); + } + + if (int(maxHeap.size()) < heapSizeMax) + { + maxHeap.push_back(sampleBuffer[sample]); + std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + } + else if (sampleBuffer[sample] > maxHeap[0]) + { + std::pop_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + maxHeap.back() = sampleBuffer[sample]; + std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + } + } + } +} + bool analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, TraceInfo2DManager * pTraceInfo2DManager, bool jsonOutput, OpenVDS::Error& error) { @@ -879,8 +916,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg maxHeap.reserve(heapSizeMax); // Allocate sample buffer for converting samples to float - std::unique_ptr sampleBuffer(new float[fileInfo.m_sampleCount]); - float* samples = sampleBuffer.get(); + std::vector sampleBuffer(fileInfo.m_sampleCount); // Determine fold and secondary step int gatherSecondaryKey = 0, gatherFold = 0, gatherSecondaryStep = 0; @@ -948,37 +984,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg } // Update value range - if (fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IBMFloat || fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IEEEFloat) - { - copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, samples, 0, fileInfo.m_sampleCount); - - for (int sample = 0; sample < fileInfo.m_sampleCount; sample++) - { - if (int(minHeap.size()) < heapSizeMax) - { - minHeap.push_back(samples[sample]); - std::push_heap(minHeap.begin(), minHeap.end(), std::less()); - } - else if (samples[sample] < minHeap[0]) - { - std::pop_heap(minHeap.begin(), minHeap.end(), std::less()); - minHeap.back() = samples[sample]; - std::push_heap(minHeap.begin(), minHeap.end(), std::less()); - } - - if (int(maxHeap.size()) < heapSizeMax) - { - maxHeap.push_back(samples[sample]); - std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); - } - else if (samples[sample] > maxHeap[0]) - { - std::pop_heap(maxHeap.begin(), maxHeap.end(), std::greater()); - maxHeap.back() = samples[sample]; - std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); - } - } - } + updateValueRangeHeaps(fileInfo, heapSizeMax, minHeap, maxHeap, data, sampleBuffer); } if (fileInfo.HasGatherOffset() && !fileInfo.IsUnbinned()) @@ -1118,6 +1124,8 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c const auto msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); OpenVDS::printError(jsonOutput, "analyzePrimaryKey", msg); + error.code = 1; + error.string = msg; return false; } @@ -1155,8 +1163,16 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c } } - auto - segyValueRangeEstimator = CreateSEGYValueRangeEstimator(fileInfo, primaryKeyTraceCount, valueRangePercentile); + // Create min/max heaps for determining value range + int heapSizeMax = int(((100.0f - valueRangePercentile) / 100.0f) * primaryKeyTraceCount * fileInfo.m_sampleCount / 2) + 1; + + std::vector minHeap, maxHeap; + + minHeap.reserve(heapSizeMax); + maxHeap.reserve(heapSizeMax); + + // Allocate sample buffer for converting samples to float + std::vector sampleBuffer(fileInfo.m_sampleCount); // For secondary step need to scan all traces in all segments to get a complete (hopefully) list of secondary keys, then figure out start/end/step. // We need to get all secondary keys before doing any analysis because we may not encounter them in least-to-greatest order in the offset-sorted segments. @@ -1187,9 +1203,9 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c offset = SEGY::TextualFileHeaderSize + SEGY::BinaryFileHeaderSize + traceByteSize * traceBufferStart; OpenVDS::Error - error; + readError; - success = dataProvider.Read(buffer.data(), offset, (int32_t)(traceByteSize * traceBufferSize), error); + success = dataProvider.Read(buffer.data(), offset, (int32_t)(traceByteSize * traceBufferSize), readError); if (!success) { @@ -1217,10 +1233,7 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c } // Update value range - int - sampleMin = 0; - - segyValueRangeEstimator->UpdateValueRange((unsigned char*)data, sampleMin, fileInfo.m_sampleCount); + updateValueRangeHeaps(fileInfo, heapSizeMax, minHeap, maxHeap, data, sampleBuffer); } } } @@ -1260,7 +1273,20 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c } // Set value range - segyValueRangeEstimator->GetValueRange(valueRange.Min, valueRange.Max); + if (!minHeap.empty()) + { + assert(!maxHeap.empty()); + + if (minHeap[0] != maxHeap[0]) + { + valueRange = OpenVDS::FloatRange(minHeap[0], maxHeap[0]); + } + else + { + valueRange = OpenVDS::FloatRange(minHeap[0], minHeap[0] + 1.0f); + } + } + return success; } @@ -2373,7 +2399,7 @@ main(int argc, char* argv[]) { segyType = SEGY::SEGYType::PrestackOffsetSorted; } - if (is2D) + else if (is2D) { if (prestack) { @@ -2857,7 +2883,7 @@ main(int argc, char* argv[]) // Create channel descriptors std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit); - if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Prestack) + if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Prestack || fileInfo.IsOffsetSorted()) { createSurveyCoordinateSystemMetadata(fileInfo, segyMeasurementSystem, crsWkt, metadataContainer); } @@ -3156,10 +3182,11 @@ main(int argc, char* argv[]) void* segyTraceHeaderBuffer = segyTraceHeaderPage ? segyTraceHeaderPage->GetWritableBuffer(segyTraceHeaderPitch) : nullptr; void* offsetBuffer = offsetPage ? offsetPage->GetWritableBuffer(offsetPitch) : nullptr; + const int pitchCheckDimension = fileInfo.IsOffsetSorted() ? 2 : 1; assert(amplitudePitch[0] == 1); - assert(!traceFlagBuffer || traceFlagPitch[1] == 1); - assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[1] == SEGY::TraceHeaderSize); - assert(!offsetBuffer || offsetPitch[1] == 1); + assert(!traceFlagBuffer || traceFlagPitch[pitchCheckDimension] == 1); + assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[pitchCheckDimension] == SEGY::TraceHeaderSize); + assert(!offsetBuffer || offsetPitch[pitchCheckDimension] == 1); for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) { -- GitLab From 4f905cb8ca84934e013d1b4874832a8e90f3a913 Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 29 Sep 2021 19:10:37 -0500 Subject: [PATCH 16/46] several small fixes to make actual data import work --- src/SEGYUtils/SEGYFileInfo.cpp | 2 +- tools/SEGYImport/SEGYImport.cpp | 24 ++++++++++++++++++++---- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/SEGYUtils/SEGYFileInfo.cpp b/src/SEGYUtils/SEGYFileInfo.cpp index 70f5f88d..e3220731 100644 --- a/src/SEGYUtils/SEGYFileInfo.cpp +++ b/src/SEGYUtils/SEGYFileInfo.cpp @@ -32,7 +32,7 @@ using namespace SEGY; bool SEGYFileInfo::Is4D() const { - return m_segyType == SEGY::SEGYType::Prestack; + return m_segyType == SEGY::SEGYType::Prestack || m_segyType == SEGY::SEGYType::PrestackOffsetSorted; } bool diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 39ecc8d3..1b1fe6d9 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2183,7 +2183,7 @@ SecondaryKeyDimension(const SEGYFileInfo& fileInfo) int PrimaryKeyDimension(const SEGYFileInfo& fileInfo) { - if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D) + if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D || fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) { // Prestack2D doesn't really have a primary key dimension, but we'll use the same one as for Prestack as sort of a placeholder. return 3; @@ -2694,7 +2694,11 @@ main(int argc, char* argv[]) // Check for only a single segment - if (!is2D && fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1) + const bool isOneSegment = fileInfo.IsOffsetSorted() + ? fileInfo.m_segmentInfoListsByOffset.size() == 1 && fileInfo.m_segmentInfoListsByOffset[0].size() == 1 + : fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1; + + if (!is2D && isOneSegment) { OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SegmentInfoList", "Warning: There is only one segment, either this is 2D data or this usually indicates using the wrong header format for the input dataset.", "Use --2d for 2D data. Use --ignore-warnings to force the import to go ahead."); } @@ -3023,7 +3027,9 @@ main(int argc, char* argv[]) // For each input file, find the lower/upper segments and then add data requests to that file's traceDataManager - for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) + const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); + + for (size_t fileIndex = 0; fileIndex < segmentInfoListsSize; ++fileIndex) { const int offetSortedOffsetValue = fileInfo.IsOffsetSorted() ? offsetStart + offsetStep * chunkInfo.min[1] : 0; @@ -3093,6 +3099,10 @@ main(int argc, char* argv[]) } } + int64_t + amplitudePageCount = 0L, + traceCopyCount = 0L; + for (int64_t chunk = 0; chunk < amplitudeAccessor->GetChunkCount() && error.code == 0; chunk++) { int new_percentage = int(double(chunk) / amplitudeAccessor->GetChunkCount() * 100); @@ -3188,7 +3198,9 @@ main(int argc, char* argv[]) assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[pitchCheckDimension] == SEGY::TraceHeaderSize); assert(!offsetBuffer || offsetPitch[pitchCheckDimension] == 1); - for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) + const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); + + for (size_t fileIndex = 0; fileIndex < segmentInfoListsSize; ++fileIndex) { auto result = chunkInfo.lowerUpperSegmentIndices.find(fileIndex); if (result == chunkInfo.lowerUpperSegmentIndices.end()) @@ -3323,6 +3335,7 @@ main(int argc, char* argv[]) const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, &reinterpret_cast(amplitudeBuffer)[targetOffset], chunkInfo.sampleStart, chunkInfo.sampleCount); + ++traceCopyCount; } if (traceFlagBuffer) @@ -3366,6 +3379,9 @@ main(int argc, char* argv[]) traceDataManagers.clear(); dataViewManagers.clear(); + fmt::print(stderr, "\nCreated {} amplitude pages\n", amplitudePageCount); + fmt::print(stderr, "Copied {} traces to amplitude pages\n", traceCopyCount); + if (error.code != 0) { return EXIT_FAILURE; -- GitLab From cfe86840ab4d74880210f85cdb6fd0ba653fef47 Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 29 Sep 2021 19:10:58 -0500 Subject: [PATCH 17/46] disable one test that takes a very long time (after verifying that it passes) --- tests/tools/SEGYImport/pytests/test_offset_sorted.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/tools/SEGYImport/pytests/test_offset_sorted.py b/tests/tools/SEGYImport/pytests/test_offset_sorted.py index 1488e6ad..7a05e4b3 100644 --- a/tests/tools/SEGYImport/pytests/test_offset_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_offset_sorted.py @@ -211,6 +211,7 @@ def test_read_gather(offset_sorted_executor): assert total > 0.0, f"trace at {dim1}" +@pytest.mark.skip(reason="This test is really inefficient and takes ~100 minutes to run") def test_compare_with_conventional_sorted(offset_sorted_executor, conventional_sorted_executor): ex, output_vds = offset_sorted_executor -- GitLab From b4b6c9ab3f45285dc9a91868af6e94d86e196fc9 Mon Sep 17 00:00:00 2001 From: Jim King Date: Thu, 30 Sep 2021 17:21:15 -0500 Subject: [PATCH 18/46] beginnings of tests for azimuth and mutes channels --- .../pytests/test_azimuth_channel.py | 193 ++++++++++++++++++ .../SEGYImport/pytests/test_mutes_channel.py | 97 +++++++++ 2 files changed, 290 insertions(+) create mode 100644 tests/tools/SEGYImport/pytests/test_azimuth_channel.py create mode 100644 tests/tools/SEGYImport/pytests/test_mutes_channel.py diff --git a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py new file mode 100644 index 00000000..b2f809c5 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py @@ -0,0 +1,193 @@ +import os +from pathlib import Path +from typing import Tuple + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard, TempScanFileGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_mutes_test") + + +@pytest.fixture +def azimuth_degrees_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "WAZ", "waz_trial.segy") + + +@pytest.fixture +def azimuth_offset_xy_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport", "Teleport_Trim", + "waz", "CG3_090008B32871_subset.segy") + + +@pytest.fixture +def azimuth_from_azimuth_executor(azimuth_degrees_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with options for SEGY with azimuth trace header field""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "azimuth=25:4"]) + ex.add_arg("--azimuth") + ex.add_args(["--azimuth-type", "angle"]) + ex.add_args(["--azimuth-unit", "degrees"]) + ex.add_arg("--prestack") + # TODO disable trace order by offset? + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(azimuth_degrees_segy) + + return ex, output_vds + + +@pytest.fixture +def azimuth_from_offset_xy_executor(azimuth_offset_xy_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for SEGY with mutes""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offsetx=97:2"]) + ex.add_args(["--header-field", "offsety=95:2"]) + ex.add_arg("--azimuth") + ex.add_args(["--azimuth-type", "offsetxy"]) + ex.add_arg("--prestack") + # TODO disable trace order by offset? + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(azimuth_offset_xy_segy) + + return ex, output_vds + + +def test_azimuth_channel_descriptor(azimuth_from_azimuth_executor): + ex, output_vds = azimuth_from_azimuth_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + mutes_channel = layout.getChannelIndex("Azimuth") + assert mutes_channel > 0 + + +def test_azimuth_range_from_azimuth(azimuth_from_azimuth_executor): + ex, output_vds = azimuth_from_azimuth_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + # TODO check axis dimensions? + + azimuth_channel = layout.getChannelIndex("Azimuth") + assert azimuth_channel > 0 + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + + for dim3 in range(layout.getDimensionNumSamples(3)): + # read one inline of trace flags and azimuths + + voxel_min = (0, 0, 0, dim3, 0, 0) + trace_voxel_max = (1, dim1_size, dim2_size, dim3 + 1, 1, 1) + + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + azimuth_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=azimuth_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + mensionsND=openvds.DimensionsND.Dimensions_012) + + trace_data = trace_flag_request.data.reshape(dim2_size, dim1_size) + azimuth_data = azimuth_request.data.reshape(dim2_size, dim1_size) + + for dim2 in range(dim2_size): + for dim1 in range(dim1_size): + if trace_data[dim2, dim1] > 0: + azimuth_value = azimuth_data[dim2, dim1] + assert 1.0 < azimuth_value <= 107.0,\ + f"azimuth value {azimuth_value} dim1 {dim1} dim2 {dim2} dim3 {dim3}" + + +def test_azimuth_range_from_offset_xy(azimuth_from_offset_xy_executor): + ex, output_vds = azimuth_from_offset_xy_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + # TODO check axis dimensions? + + azimuth_channel = layout.getChannelIndex("Azimuth") + assert azimuth_channel > 0 + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + + for dim3 in range(layout.getDimensionNumSamples(3)): + # read one inline of trace flags and azimuths + + voxel_min = (0, 0, 0, dim3, 0, 0) + trace_voxel_max = (1, dim1_size, dim2_size, dim3 + 1, 1, 1) + + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + azimuth_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=azimuth_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + mensionsND=openvds.DimensionsND.Dimensions_012) + + trace_data = trace_flag_request.data.reshape(dim2_size, dim1_size) + azimuth_data = azimuth_request.data.reshape(dim2_size, dim1_size) + + for dim2 in range(dim2_size): + for dim1 in range(dim1_size): + if trace_data[dim2, dim1] > 0: + azimuth_value = azimuth_data[dim2, dim1] + assert 3.691 <= azimuth_value <= 356.31, \ + f"azimuth value {azimuth_value} dim1 {dim1} dim2 {dim2} dim3 {dim3}" + + +def test_samples_with_azimuth(azimuth_from_azimuth_executor): + """Tests that samples can be read when the azimuth channel is enabled""" + ex, output_vds = azimuth_from_azimuth_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + assert False, "not implemented" diff --git a/tests/tools/SEGYImport/pytests/test_mutes_channel.py b/tests/tools/SEGYImport/pytests/test_mutes_channel.py new file mode 100644 index 00000000..13dd7bed --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_mutes_channel.py @@ -0,0 +1,97 @@ +from pathlib import Path +from typing import Tuple + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard, TempScanFileGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_mutes_test") + + +@pytest.fixture +def mutes_segy() -> str: + return "C:\\temp\\SEGY\\mutes\\ST0202R08_PS_PrSDM_CIP_gathers_in_PP_Time_modified.segy" + + +@pytest.fixture +def mutes_executor(mutes_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for SEGY with mutes""" + ex = ImportExecutor() + + ex.add_arg("--mutes") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(mutes_segy) + + return ex, output_vds + + +def test_mutes_channel_descriptor(mutes_executor): + ex, output_vds = mutes_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + mutes_channel = layout.getChannelIndex("Mute") + assert mutes_channel > 0 + + +def test_mutes_read(mutes_executor): + ex, output_vds = mutes_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + mutes_channel = layout.getChannelIndex("Mute") + assert mutes_channel > 0 + + inline_index = layout.getDimensionNumSamples(3) // 2 + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + dim0_size = layout.getDimensionNumSamples(0) + + for crossline_index in range(dim2_size): + # read one gather of samples and trace flags and mutes + + voxel_min = (0, 0, crossline_index, inline_index, 0, 0) + # voxel_max = (dim0_size, dim1_size, crossline_index + 1, inline_index + 1, 1, 1) + trace_voxel_max = (1, dim1_size, crossline_index + 1, inline_index + 1, 1, 1) + + # request = access_manager.requestVolumeSubset(voxel_min, + # voxel_max, + # channel=0, + # format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + # dimensionsND=openvds.DimensionsND.Dimensions_012) + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + mutes_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=mutes_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U16, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + for trace_index in range(dim1_size): + if trace_flag_request.data[trace_index] > 0: + assert 0 < mutes_request.data[trace_index * 2] < mutes_request.data[trace_index * 2 + 1] -- GitLab From a195f9c647d09444f1b2b777d8df8840fae74506 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 1 Oct 2021 15:56:57 -0500 Subject: [PATCH 19/46] flesh out azimuth tests --- .../pytests/test_azimuth_channel.py | 68 +++++++++++++++++-- 1 file changed, 64 insertions(+), 4 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py index b2f809c5..1d1289ab 100644 --- a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py +++ b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py @@ -4,8 +4,9 @@ from typing import Tuple import pytest import openvds +import numpy as np -from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard, TempScanFileGuard +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard @pytest.fixture @@ -87,7 +88,23 @@ def test_azimuth_range_from_azimuth(azimuth_from_azimuth_executor): access_manager = openvds.getAccessManager(handle) layout = openvds.getLayout(handle) - # TODO check axis dimensions? + # check axis dimensions + + assert layout.getDimensionMin(0) == 0 + assert layout.getDimensionMax(0) == 3100 + assert layout.getDimensionNumSamples(0) == 1551 + + assert layout.getDimensionMin(1) == 1 + assert layout.getDimensionMax(1) == 107 + assert layout.getDimensionNumSamples(1) == 107 + + assert layout.getDimensionMin(2) == 495 + assert layout.getDimensionMax(2) == 2050 + assert layout.getDimensionNumSamples(2) == 1556 + + assert layout.getDimensionMin(3) == 1000 + assert layout.getDimensionMax(3) == 1000 + assert layout.getDimensionNumSamples(3) == 1 azimuth_channel = layout.getChannelIndex("Azimuth") assert azimuth_channel > 0 @@ -138,7 +155,23 @@ def test_azimuth_range_from_offset_xy(azimuth_from_offset_xy_executor): access_manager = openvds.getAccessManager(handle) layout = openvds.getLayout(handle) - # TODO check axis dimensions? + # check axis dimensions + + assert layout.getDimensionMin(0) == 0 + assert layout.getDimensionMax(0) == 7000 + assert layout.getDimensionNumSamples(0) == 1751 + + assert layout.getDimensionMin(1) == 1 + assert layout.getDimensionMax(1) == 280 + assert layout.getDimensionNumSamples(1) == 280 + + assert layout.getDimensionMin(2) == 4715 + assert layout.getDimensionMax(2) == 4715 + assert layout.getDimensionNumSamples(2) == 1 + + assert layout.getDimensionMin(3) == 6117 + assert layout.getDimensionMax(3) == 6163 + assert layout.getDimensionNumSamples(3) == 47 azimuth_channel = layout.getChannelIndex("Azimuth") assert azimuth_channel > 0 @@ -190,4 +223,31 @@ def test_samples_with_azimuth(azimuth_from_azimuth_executor): access_manager = openvds.getAccessManager(handle) layout = openvds.getLayout(handle) - assert False, "not implemented" + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + # we're not going to read azimuths, but we require that the azimuth channel is present + azimuth_channel = layout.getChannelIndex("Azimuth") + assert azimuth_channel > 0 + + for dim3 in range(layout.getDimensionNumSamples(3)): + for dim2 in range(layout.getDimensionNumSamples(2)): + voxel_min = (0, 0, dim2, dim3, 0, 0) + voxel_max = (layout.getDimensionNumSamples(0), layout.getDimensionNumSamples(1), dim2 + 1, dim3 + 1, 1, 1) + trace_voxel_max = (1, layout.getDimensionNumSamples(1), dim2 + 1, dim3 + 1, 1, 1) + + sample_request = access_manager.requestVolumeSubset(voxel_min, + voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32) + trace_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8) + + sample_data = sample_request.data.reshape(layout.getDimensionNumSamples(1), + layout.getDimensionNumSamples(0)) + for dim1 in range(layout.getDimensionNumSamples(1)): + if trace_request.data[dim1] > 0: + total = np.sum(np.abs(sample_data[dim1, :])) + assert total > 0.0, f"zero-data trace at dim1 {dim1} dim2 {dim2} dim3 {dim3}" -- GitLab From 7102c7c42aace38d244b320fac5222f40372fd35 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 1 Oct 2021 17:20:39 -0500 Subject: [PATCH 20/46] add header fields and command line options for azimuth and mute channels --- src/SEGYUtils/SEGYUtils/SEGY.h | 21 +++++++ .../pytests/test_azimuth_channel.py | 5 +- tools/SEGYImport/SEGYImport.cpp | 56 ++++++++++++++++++- 3 files changed, 80 insertions(+), 2 deletions(-) diff --git a/src/SEGYUtils/SEGYUtils/SEGY.h b/src/SEGYUtils/SEGYUtils/SEGY.h index 5363039e..b1ba2374 100644 --- a/src/SEGYUtils/SEGYUtils/SEGY.h +++ b/src/SEGYUtils/SEGYUtils/SEGY.h @@ -204,6 +204,11 @@ static const HeaderField InlineNumberHeaderField(189, FieldWidth::FourByte); static const HeaderField CrosslineNumberHeaderField(193, FieldWidth::FourByte); static const HeaderField ReceiverHeaderField(13, FieldWidth::FourByte); static const HeaderField OffsetHeaderField(37, FieldWidth::FourByte); +static const HeaderField OffsetXHeaderField(97, FieldWidth::TwoByte); +static const HeaderField OffsetYHeaderField(95, FieldWidth::TwoByte); +static const HeaderField Azimuth(61, FieldWidth::FourByte); +static const HeaderField MuteStartTime(111, FieldWidth::TwoByte); +static const HeaderField MuteEndTime(113, FieldWidth::TwoByte); } // end namespace TraceHeader @@ -318,6 +323,22 @@ enum class SampleUnits Meters = 2 }; +enum class AzimuthUnits +{ + Radians = 0, + Degrees = 1, + MinValue = Radians, + MaxValue = Degrees +}; + +enum class AzimuthType +{ + Azimuth = 0, + OffsetXY = 1, + MinValue = Azimuth, + MaxValue = OffsetXY +}; + enum class SEGYType { Poststack = 0, diff --git a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py index 1d1289ab..da4a1030 100644 --- a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py +++ b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py @@ -32,12 +32,15 @@ def azimuth_from_azimuth_executor(azimuth_degrees_segy, output_vds) -> Tuple[Imp ex.add_args(["--header-field", "azimuth=25:4"]) ex.add_arg("--azimuth") - ex.add_args(["--azimuth-type", "angle"]) + ex.add_args(["--azimuth-type", "azimuth"]) ex.add_args(["--azimuth-unit", "degrees"]) ex.add_arg("--prestack") # TODO disable trace order by offset? ex.add_args(["--vdsfile", output_vds.filename]) + # Need to ignore warnings because this data is only one segment + ex.add_arg("--ignore-warnings") + ex.add_arg(azimuth_degrees_segy) return ex, output_vds diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 1b1fe6d9..c05ed49f 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -278,7 +278,12 @@ g_traceHeaderFields = { "inlinenumber", SEGY::TraceHeader::InlineNumberHeaderField }, { "crosslinenumber", SEGY::TraceHeader::CrosslineNumberHeaderField }, { "receiver", SEGY::TraceHeader::ReceiverHeaderField }, - { "offset", SEGY::TraceHeader::OffsetHeaderField } + { "offset", SEGY::TraceHeader::OffsetHeaderField }, + { "offsetx", SEGY::TraceHeader::OffsetXHeaderField }, + { "offsety", SEGY::TraceHeader::OffsetYHeaderField }, + { "azimuth", SEGY::TraceHeader::Azimuth }, + { "mutestarttime", SEGY::TraceHeader::MuteStartTime }, + { "muteendtime", SEGY::TraceHeader::MuteEndTime }, }; std::map @@ -2240,6 +2245,15 @@ main(int argc, char* argv[]) bool version = false; std::string attributeName = AMPLITUDE_ATTRIBUTE_NAME; std::string attributeUnit; + bool isMutes = false; + bool isAzimuth = false; + SEGY::AzimuthType azimuthType = SEGY::AzimuthType::Azimuth; + SEGY::AzimuthUnits azimuthUnit = SEGY::AzimuthUnits::Degrees; + std::string azimuthTypeString; + std::string azimuthUnitString; + + const std::string supportedAzimuthTypes("Azimuth (from trace header field) (default), OffsetXY (computed from OffsetX and OffsetY header fields)"); + const std::string supportedAzimuthUnits("Radians, Degrees (default)"); // default key names used if not supplied by user std::string defaultPrimaryKey = "InlineNumber"; @@ -2287,6 +2301,10 @@ main(int argc, char* argv[]) options.add_option("", "", "attribute-unit", "The units of the primary VDS channel. The unit name may be blank (default), ft, ft/s, Hz, m, m/s, ms, or s", cxxopts::value(attributeUnit), ""); options.add_option("", "", "2d", "Import 2D data.", cxxopts::value(is2D), ""); options.add_option("", "", "offset-sorted", "Import prestack data sorted by trace header Offset value.", cxxopts::value(isOffsetSorted), ""); + options.add_option("", "", "mute", "Enable Mutes channel in output VDS.", cxxopts::value(isMutes), ""); + options.add_option("", "", "azimuth", "Enable Azimuth channel in output VDS.", cxxopts::value(isAzimuth), ""); + options.add_option("", "", "azimuth-type", std::string("Azimuth type. Supported azimuth types are: ") + supportedAzimuthTypes + ".", cxxopts::value(azimuthTypeString), ""); + options.add_option("", "", "azimuth-unit", std::string("Azimuth unit. Supported azimuth units are: ") + supportedAzimuthUnits + ".", cxxopts::value(azimuthUnitString), ""); // TODO add option for turning off traceOrderByOffset options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); @@ -2499,6 +2517,42 @@ main(int argc, char* argv[]) } } + if (!azimuthTypeString.empty()) + { + std::transform(azimuthTypeString.begin(), azimuthTypeString.end(), azimuthTypeString.begin(), asciitolower); + if (azimuthTypeString == "azimuth") + { + azimuthType = SEGY::AzimuthType::Azimuth; + } + else if (azimuthTypeString == "offsetxy") + { + azimuthType = SEGY::AzimuthType::OffsetXY; + } + else + { + OpenVDS::printError(jsonOutput, "Args", fmt::format("Unknown Azimuth type '{}'", azimuthTypeString)); + return EXIT_FAILURE; + } + } + + if (!azimuthUnitString.empty()) + { + std::transform(azimuthUnitString.begin(), azimuthUnitString.end(), azimuthUnitString.begin(), asciitolower); + if (azimuthUnitString == "radians") + { + azimuthUnit = SEGY::AzimuthUnits::Radians; + } + else if (azimuthUnitString == "degrees") + { + azimuthUnit = SEGY::AzimuthUnits::Degrees; + } + else + { + OpenVDS::printError(jsonOutput, "Args", fmt::format("Unknown Azimuth unit '{}'", azimuthUnitString)); + return EXIT_FAILURE; + } + } + SEGY::HeaderField primaryKeyHeaderField, secondaryKeyHeaderField; -- GitLab From 42b7e6070ef2b176cae83420253d3de2f8145d66 Mon Sep 17 00:00:00 2001 From: Jim King Date: Mon, 4 Oct 2021 17:13:26 -0500 Subject: [PATCH 21/46] all code ported, not tested --- tools/SEGYImport/SEGYImport.cpp | 179 +++++++++++++++++++++++++++----- 1 file changed, 154 insertions(+), 25 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index c05ed49f..2248b093 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -42,6 +42,9 @@ #include #include +#define _USE_MATH_DEFINES +#include + #include #include #include @@ -890,7 +893,7 @@ updateValueRangeHeaps(const SEGYFileInfo& fileInfo, const int heapSizeMax, std:: } bool -analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, TraceInfo2DManager * pTraceInfo2DManager, bool jsonOutput, OpenVDS::Error& error) +analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, TraceInfo2DManager * pTraceInfo2DManager, bool isTraceOrderByOffset, bool jsonOutput, OpenVDS::Error& error) { assert(segmentInfo.m_traceStop >= segmentInfo.m_traceStart && "A valid segment info should always have a stop trace greater or equal to the start trace"); assert(pTraceInfo2DManager != nullptr); @@ -1018,20 +1021,22 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg offsetStep = 1; } - // 'fold' calculated by counting traces in a gather may be incorrect if there are duplicate-key - // traces, so we'll ignore it and use the count of unique offset values - fold = static_cast(offsetValues.size()); - - // check that offset start/end/step is consistent - if (offsetStart + (fold - 1) * offsetStep != offsetEnd) + if (isTraceOrderByOffset) { - const auto - msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This may indicate an incorrect header format for Offset trace header."; - const auto - msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); - OpenVDS::printError(jsonOutput, "analyzeSegment", msg); - return false; + // 'fold' calculated by counting traces in a gather may be incorrect if there are duplicate-key + // traces, so we'll ignore it and use the count of unique offset values + fold = static_cast(offsetValues.size()); + // check that offset start/end/step is consistent + if (offsetStart + (fold - 1) * offsetStep != offsetEnd) + { + const auto + msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This may indicate an incorrect header format for Offset trace header."; + const auto + msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); + OpenVDS::printError(jsonOutput, "analyzeSegment", msg); + return false; + } } } @@ -1531,6 +1536,15 @@ createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryH inlineSpacing[1] = -crosslineSpacing[0]; } + if (crosslineSpacing[0] == 0.0 && crosslineSpacing[1] == 1.0 && inlineSpacing[0] == 0.0) + { + // Deal with Headwave quirk: If crossline spacing is (0, 1) (i.e. only one crossline) and the inline spacing is (0, anything) then Headwave + // won't display sample data. In this case we'll change the crossline spacing to (-1, 0) (which seems to be how the Headwave SEGY importer + // handles this case). + crosslineSpacing[0] = -1; + crosslineSpacing[1] = 0; + } + // Determine origin double origin[2]; @@ -1984,7 +1998,7 @@ struct OffsetChannelInfo }; std::vector -createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const& valueRange, const OffsetChannelInfo& offsetInfo, const std::string& attributeName, const std::string& attributeUnit) +createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const& valueRange, const OffsetChannelInfo& offsetInfo, const std::string& attributeName, const std::string& attributeUnit, bool isAzimuthEnabled, bool isMuteEnabled) { std::vector channelDescriptors; @@ -2006,6 +2020,16 @@ createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const // TODO channels for other gather types - "Angle", "Vrms", "Frequency" } + if (isAzimuthEnabled) + { + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Azimuth", "deg", FLT_MIN, FLT_MAX, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); + } + + if (isMuteEnabled) + { + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_U16, OpenVDS::VolumeDataChannelDescriptor::Components_2, "Mute", KNOWNMETADATA_UNIT_MILLISECOND, 0.0f, 65535.0f, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); + } + return channelDescriptors; } @@ -2196,6 +2220,21 @@ PrimaryKeyDimension(const SEGYFileInfo& fileInfo) return 2; } +int +findChannelDescriptorIndex(const std::string& channelName, const std::vector& channelDescriptors) +{ + for (int index = 0; index < channelDescriptors.size(); ++index) + { + if (channelName == channelDescriptors[index].GetName()) + { + return index; + } + } + + // not found + return -1; +} + int main(int argc, char* argv[]) { @@ -2249,6 +2288,7 @@ main(int argc, char* argv[]) bool isAzimuth = false; SEGY::AzimuthType azimuthType = SEGY::AzimuthType::Azimuth; SEGY::AzimuthUnits azimuthUnit = SEGY::AzimuthUnits::Degrees; + float azimuthScaleFactor = 1.0f; std::string azimuthTypeString; std::string azimuthUnitString; @@ -2305,6 +2345,7 @@ main(int argc, char* argv[]) options.add_option("", "", "azimuth", "Enable Azimuth channel in output VDS.", cxxopts::value(isAzimuth), ""); options.add_option("", "", "azimuth-type", std::string("Azimuth type. Supported azimuth types are: ") + supportedAzimuthTypes + ".", cxxopts::value(azimuthTypeString), ""); options.add_option("", "", "azimuth-unit", std::string("Azimuth unit. Supported azimuth units are: ") + supportedAzimuthUnits + ".", cxxopts::value(azimuthUnitString), ""); + options.add_option("", "", "azimuth-scale", "Azimuth scale factor. Trace header field Azimuth values will be multiplied by this factor.", cxxopts::value(azimuthScaleFactor), ""); // TODO add option for turning off traceOrderByOffset options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); @@ -2789,7 +2830,7 @@ main(int argc, char* argv[]) int fileIndex; const auto& representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); assert(fileIndex < dataProviders.size()); - analyzeResult = analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, traceInfo2DManager.get(), jsonOutput, error); + analyzeResult = analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, traceInfo2DManager.get(), traceOrderByOffset, jsonOutput, error); } if (!analyzeResult || error.code != 0) @@ -2939,7 +2980,7 @@ main(int argc, char* argv[]) offsetInfo(fileInfo.HasGatherOffset(), ConvertDistance(segyMeasurementSystem, static_cast(offsetStart)), ConvertDistance(segyMeasurementSystem, static_cast(offsetEnd)), ConvertDistance(segyMeasurementSystem, static_cast(offsetStep))); // Create channel descriptors - std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit); + std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit, isAzimuth, isMutes); if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Prestack || fileInfo.IsOffsetSorted()) { @@ -3007,10 +3048,44 @@ main(int argc, char* argv[]) writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_023; } + int offsetChannelIndex = 0, azimuthChannelIndex = 0, muteChannelIndex = 0; + + if (fileInfo.HasGatherOffset()) + { + offsetChannelIndex = findChannelDescriptorIndex("Offset", channelDescriptors); + if (offsetChannelIndex < 0) + { + OpenVDS::printError(jsonOutput, "VDS", "Could not find VDS channel descriptor for Offset"); + return EXIT_FAILURE; + } + } + + if (isAzimuth) + { + azimuthChannelIndex = findChannelDescriptorIndex("Azimuth", channelDescriptors); + if (azimuthChannelIndex < 0) + { + OpenVDS::printError(jsonOutput, "VDS", "Could not find VDS channel descriptor for Azimuth"); + return EXIT_FAILURE; + } + } + + if (isMutes) + { + muteChannelIndex = findChannelDescriptorIndex("Mute", channelDescriptors); + if (muteChannelIndex < 0) + { + OpenVDS::printError(jsonOutput, "VDS", "Could not find VDS channel descriptor for Mute"); + return EXIT_FAILURE; + } + } + auto amplitudeAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 0, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); auto traceFlagAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 1, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); auto segyTraceHeaderAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 2, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); - auto offsetAccessor = fileInfo.HasGatherOffset() ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 3, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; + auto offsetAccessor = fileInfo.HasGatherOffset() ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, offsetChannelIndex, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; + auto azimuthAccessor = isAzimuth ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, azimuthChannelIndex, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; + auto muteAccessor = isMutes ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, muteChannelIndex, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; int64_t traceByteSize = fileInfo.TraceByteSize(); @@ -3153,10 +3228,6 @@ main(int argc, char* argv[]) } } - int64_t - amplitudePageCount = 0L, - traceCopyCount = 0L; - for (int64_t chunk = 0; chunk < amplitudeAccessor->GetChunkCount() && error.code == 0; chunk++) { int new_percentage = int(double(chunk) / amplitudeAccessor->GetChunkCount() * 100); @@ -3225,6 +3296,8 @@ main(int argc, char* argv[]) OpenVDS::VolumeDataPage* traceFlagPage = nullptr; OpenVDS::VolumeDataPage* segyTraceHeaderPage = nullptr; OpenVDS::VolumeDataPage* offsetPage = nullptr; + OpenVDS::VolumeDataPage* azimuthPage = nullptr; + OpenVDS::VolumeDataPage* mutePage = nullptr; if (chunkInfo.min[0] == 0) { @@ -3234,23 +3307,37 @@ main(int argc, char* argv[]) { offsetPage = offsetAccessor->CreatePage(offsetAccessor->GetMappedChunkIndex(chunk)); } + if (azimuthAccessor != nullptr) + { + azimuthPage = azimuthAccessor->CreatePage(azimuthAccessor->GetMappedChunkIndex(chunk)); + } + if (muteAccessor != nullptr) + { + mutePage = muteAccessor->CreatePage(muteAccessor->GetMappedChunkIndex(chunk)); + } } int amplitudePitch[OpenVDS::Dimensionality_Max]; int traceFlagPitch[OpenVDS::Dimensionality_Max]; int segyTraceHeaderPitch[OpenVDS::Dimensionality_Max]; int offsetPitch[OpenVDS::Dimensionality_Max]; + int azimuthPitch[OpenVDS::Dimensionality_Max]; + int mutePitch[OpenVDS::Dimensionality_Max]; void* amplitudeBuffer = amplitudePage->GetWritableBuffer(amplitudePitch); void* traceFlagBuffer = traceFlagPage ? traceFlagPage->GetWritableBuffer(traceFlagPitch) : nullptr; void* segyTraceHeaderBuffer = segyTraceHeaderPage ? segyTraceHeaderPage->GetWritableBuffer(segyTraceHeaderPitch) : nullptr; void* offsetBuffer = offsetPage ? offsetPage->GetWritableBuffer(offsetPitch) : nullptr; + void* azimuthBuffer = azimuthPage ? azimuthPage->GetWritableBuffer(azimuthPitch) : nullptr; + void* muteBuffer = mutePage ? mutePage->GetWritableBuffer(mutePitch) : nullptr; const int pitchCheckDimension = fileInfo.IsOffsetSorted() ? 2 : 1; assert(amplitudePitch[0] == 1); assert(!traceFlagBuffer || traceFlagPitch[pitchCheckDimension] == 1); assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[pitchCheckDimension] == SEGY::TraceHeaderSize); assert(!offsetBuffer || offsetPitch[pitchCheckDimension] == 1); + assert(!azimuthBuffer || azimuthPitch[pitchCheckDimension] == 1); + assert(!muteBuffer || mutePitch[pitchCheckDimension] == 2); const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); @@ -3389,7 +3476,6 @@ main(int argc, char* argv[]) const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, &reinterpret_cast(amplitudeBuffer)[targetOffset], chunkInfo.sampleStart, chunkInfo.sampleCount); - ++traceCopyCount; } if (traceFlagBuffer) @@ -3414,6 +3500,52 @@ main(int argc, char* argv[]) traceOffset = ConvertDistance(segyMeasurementSystem, static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness))); static_cast(offsetBuffer)[targetOffset] = traceOffset; } + + if (azimuthBuffer) + { + float + azimuth; + if (azimuthType == SEGY::AzimuthType::Azimuth) + { + azimuth = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["azimuth"], fileInfo.m_headerEndianness)); + + // apply azimuth scale + azimuth = static_cast(azimuth * azimuthScaleFactor); + + if (azimuthUnit == SEGY::AzimuthUnits::Radians) + { + azimuth = static_cast(azimuth * 180.0 / M_PI); + } + } + else + { + auto + offsetX = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offsetx"], fileInfo.m_headerEndianness), + offsetY = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offsety"], fileInfo.m_headerEndianness); + azimuth = static_cast(atan2(offsetY, offsetX) * 180.0 / M_PI); + + // transform from unit circle azimuth to compass azimuth + azimuth = fmod(450.0f - azimuth, 360.0f); + } + + const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, azimuthPitch); + + static_cast(azimuthBuffer)[targetOffset] = azimuth; + } + + if (muteBuffer) + { + auto + muteStartTime = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["mutestarttime"], fileInfo.m_headerEndianness)), + muteEndTime = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["muteendtime"], fileInfo.m_headerEndianness)); + + // TODO Need to multiply by 2 the result of VoxelIndexToDataIndex, because it gives the two-component offset, and we want the plain offset into a uint16_t buffer? + const int + targetOffset = 2 * VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, mutePitch); + + static_cast(muteBuffer)[targetOffset] = muteStartTime; + static_cast(muteBuffer)[targetOffset + 1] = muteEndTime; + } } } } @@ -3433,9 +3565,6 @@ main(int argc, char* argv[]) traceDataManagers.clear(); dataViewManagers.clear(); - fmt::print(stderr, "\nCreated {} amplitude pages\n", amplitudePageCount); - fmt::print(stderr, "Copied {} traces to amplitude pages\n", traceCopyCount); - if (error.code != 0) { return EXIT_FAILURE; -- GitLab From a2b0830ec8e5d06e62acad6ac97be9c1ec2f7c22 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 5 Oct 2021 12:10:28 -0500 Subject: [PATCH 22/46] fixes for tests --- tests/tools/SEGYImport/pytests/test_azimuth_channel.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py index da4a1030..841e9ae1 100644 --- a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py +++ b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py @@ -35,7 +35,7 @@ def azimuth_from_azimuth_executor(azimuth_degrees_segy, output_vds) -> Tuple[Imp ex.add_args(["--azimuth-type", "azimuth"]) ex.add_args(["--azimuth-unit", "degrees"]) ex.add_arg("--prestack") - # TODO disable trace order by offset? + ex.add_arg("--order-by-offset=false") ex.add_args(["--vdsfile", output_vds.filename]) # Need to ignore warnings because this data is only one segment @@ -56,7 +56,7 @@ def azimuth_from_offset_xy_executor(azimuth_offset_xy_segy, output_vds) -> Tuple ex.add_arg("--azimuth") ex.add_args(["--azimuth-type", "offsetxy"]) ex.add_arg("--prestack") - # TODO disable trace order by offset? + ex.add_arg("--order-by-offset=false") ex.add_args(["--vdsfile", output_vds.filename]) ex.add_arg(azimuth_offset_xy_segy) @@ -133,7 +133,7 @@ def test_azimuth_range_from_azimuth(azimuth_from_azimuth_executor): trace_voxel_max, channel=azimuth_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - mensionsND=openvds.DimensionsND.Dimensions_012) + dimensionsND=openvds.DimensionsND.Dimensions_012) trace_data = trace_flag_request.data.reshape(dim2_size, dim1_size) azimuth_data = azimuth_request.data.reshape(dim2_size, dim1_size) @@ -142,7 +142,7 @@ def test_azimuth_range_from_azimuth(azimuth_from_azimuth_executor): for dim1 in range(dim1_size): if trace_data[dim2, dim1] > 0: azimuth_value = azimuth_data[dim2, dim1] - assert 1.0 < azimuth_value <= 107.0,\ + assert 1.0 <= azimuth_value <= 107.0,\ f"azimuth value {azimuth_value} dim1 {dim1} dim2 {dim2} dim3 {dim3}" @@ -200,7 +200,7 @@ def test_azimuth_range_from_offset_xy(azimuth_from_offset_xy_executor): trace_voxel_max, channel=azimuth_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, - mensionsND=openvds.DimensionsND.Dimensions_012) + dimensionsND=openvds.DimensionsND.Dimensions_012) trace_data = trace_flag_request.data.reshape(dim2_size, dim1_size) azimuth_data = azimuth_request.data.reshape(dim2_size, dim1_size) -- GitLab From dab662c27210011c5742c2711a3f75f515210fa7 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 5 Oct 2021 12:11:21 -0500 Subject: [PATCH 23/46] omg release/commit azimuth and mute data --- tools/SEGYImport/SEGYImport.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 2248b093..5bc1b4a8 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2346,7 +2346,9 @@ main(int argc, char* argv[]) options.add_option("", "", "azimuth-type", std::string("Azimuth type. Supported azimuth types are: ") + supportedAzimuthTypes + ".", cxxopts::value(azimuthTypeString), ""); options.add_option("", "", "azimuth-unit", std::string("Azimuth unit. Supported azimuth units are: ") + supportedAzimuthUnits + ".", cxxopts::value(azimuthUnitString), ""); options.add_option("", "", "azimuth-scale", "Azimuth scale factor. Trace header field Azimuth values will be multiplied by this factor.", cxxopts::value(azimuthScaleFactor), ""); - // TODO add option for turning off traceOrderByOffset + + // TODO temporary option that will be removed/changed when new respace algo is implemented (traceOrderByOffset) + options.add_option("", "", "order-by-offset", "Order traces within a gather by offset.", cxxopts::value(traceOrderByOffset), ""); options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); options.add_option("", "H", "help-connection", "Print help information about the connection string", cxxopts::value(helpConnection), ""); @@ -3554,12 +3556,16 @@ main(int argc, char* argv[]) if (traceFlagPage) traceFlagPage->Release(); if (segyTraceHeaderPage) segyTraceHeaderPage->Release(); if (offsetPage) offsetPage->Release(); + if (azimuthPage) azimuthPage->Release(); + if (mutePage) mutePage->Release(); } amplitudeAccessor->Commit(); traceFlagAccessor->Commit(); segyTraceHeaderAccessor->Commit(); if (offsetAccessor) offsetAccessor->Commit(); + if (azimuthAccessor) azimuthAccessor->Commit(); + if (muteAccessor) muteAccessor->Commit(); dataView.reset(); traceDataManagers.clear(); -- GitLab From 053ae8f2dc016681598ce49eee2df2141f589b84 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 5 Oct 2021 16:08:54 -0500 Subject: [PATCH 24/46] tweaks for mutes --- tests/tools/SEGYImport/pytests/test_mutes_channel.py | 10 ++++++---- tools/SEGYImport/SEGYImport.cpp | 4 ++-- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_mutes_channel.py b/tests/tools/SEGYImport/pytests/test_mutes_channel.py index 13dd7bed..a0d95e97 100644 --- a/tests/tools/SEGYImport/pytests/test_mutes_channel.py +++ b/tests/tools/SEGYImport/pytests/test_mutes_channel.py @@ -1,10 +1,11 @@ +import os from pathlib import Path from typing import Tuple import pytest import openvds -from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard, TempScanFileGuard +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard @pytest.fixture @@ -14,7 +15,8 @@ def output_vds() -> TempVDSGuard: @pytest.fixture def mutes_segy() -> str: - return "C:\\temp\\SEGY\\mutes\\ST0202R08_PS_PrSDM_CIP_gathers_in_PP_Time_modified.segy" + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "Mutes", + "ST0202R08_PS_PrSDM_CIP_gathers_in_PP_Time_modified_subset.segy") @pytest.fixture @@ -22,7 +24,7 @@ def mutes_executor(mutes_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard """Fixture to setup an ImportExecutor with common options for SEGY with mutes""" ex = ImportExecutor() - ex.add_arg("--mutes") + ex.add_arg("--mute") ex.add_arg("--prestack") ex.add_args(["--vdsfile", output_vds.filename]) @@ -94,4 +96,4 @@ def test_mutes_read(mutes_executor): for trace_index in range(dim1_size): if trace_flag_request.data[trace_index] > 0: - assert 0 < mutes_request.data[trace_index * 2] < mutes_request.data[trace_index * 2 + 1] + assert 0 <= mutes_request.data[trace_index * 2] < mutes_request.data[trace_index * 2 + 1] diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 5bc1b4a8..ad497cac 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -3339,7 +3339,7 @@ main(int argc, char* argv[]) assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[pitchCheckDimension] == SEGY::TraceHeaderSize); assert(!offsetBuffer || offsetPitch[pitchCheckDimension] == 1); assert(!azimuthBuffer || azimuthPitch[pitchCheckDimension] == 1); - assert(!muteBuffer || mutePitch[pitchCheckDimension] == 2); + assert(!muteBuffer || mutePitch[pitchCheckDimension] == 1); const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); @@ -3541,7 +3541,7 @@ main(int argc, char* argv[]) muteStartTime = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["mutestarttime"], fileInfo.m_headerEndianness)), muteEndTime = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["muteendtime"], fileInfo.m_headerEndianness)); - // TODO Need to multiply by 2 the result of VoxelIndexToDataIndex, because it gives the two-component offset, and we want the plain offset into a uint16_t buffer? + // Need to multiply by 2 the result of VoxelIndexToDataIndex, because it gives the two-component offset, and we want the plain offset into a uint16_t buffer const int targetOffset = 2 * VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, mutePitch); -- GitLab From 77fa795cee9cd52b4eb2079fadbc33cdcc4f6a41 Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 6 Oct 2021 17:01:01 -0500 Subject: [PATCH 25/46] test skeletons for gather respacing --- .../pytests/test_prestack_gather_spacing.py | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py diff --git a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py new file mode 100644 index 00000000..302f6f00 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py @@ -0,0 +1,150 @@ +import os +from pathlib import Path +from typing import Tuple, Union + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +def construct_respace_executor(output_vds: TempVDSGuard, segy_name: str, respace_setting: Union[str, None]) -> ImportExecutor: + ex = ImportExecutor() + + if respace_setting: + ex.add_args(["--respace-gathers", respace_setting]) + + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(segy_name) + + return ex + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_mutes_test") + + +@pytest.fixture +def prestack_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "Mutes", + "") + + +@pytest.fixture +def default_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with no arg for respacing""" + ex = construct_respace_executor(output_vds, prestack_segy, None) + return ex, output_vds + + +@pytest.fixture +def auto_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with respacing set to Auto""" + ex = construct_respace_executor(output_vds, prestack_segy, "Auto") + return ex, output_vds + + +@pytest.fixture +def off_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with respacing set to Off""" + ex = construct_respace_executor(output_vds, prestack_segy, "Off") + return ex, output_vds + + +@pytest.fixture +def on_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with respacing set to On""" + ex = construct_respace_executor(output_vds, prestack_segy, "On") + return ex, output_vds + + +@pytest.fixture +def invalid_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with respacing set to an invalid value""" + ex = construct_respace_executor(output_vds, prestack_segy, "Partial") + return ex, output_vds + + +def test_gather_spacing_invalid_arg(invalid_executor): + ex, output_vds = invalid_executor + + result = ex.run() + + assert result != 0, ex.output() + # TODO what text to look for? + assert "some message" in ex.output().lower() + + +def test_gather_spacing_default(default_executor): + ex, output_vds = default_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + # TODO read an inline (or all inlines) + # TODO for each gather, check location of dead traces + # TODO assert dead traces occur elsewhere besides the end of the gather + + assert False, "not implemented" + + +def test_gather_spacing_auto(auto_executor): + ex, output_vds = auto_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + # TODO read an inline (or all inlines) + # TODO for each gather, check location of dead traces + # TODO assert dead traces occur elsewhere besides the end of the gather + + assert False, "not implemented" + + +def test_gather_spacing_on(on_executor): + ex, output_vds = on_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + # TODO read an inline (or all inlines) + # TODO for each gather, check location of dead traces + # TODO assert dead traces occur elsewhere besides the end of the gather + + assert False, "not implemented" + + +def test_gather_spacing_off(off_executor): + ex, output_vds = off_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + # TODO read an inline (or all inlines) + # TODO for each gather, check location of dead traces + # TODO assert dead traces occur only at the end of the gather + + assert False, "not implemented" -- GitLab From 72dc2bcfadd62df9ac8ef5822ec4c012b25d3e14 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 8 Oct 2021 10:41:22 -0500 Subject: [PATCH 26/46] mostly complete tests --- .../pytests/test_prestack_gather_spacing.py | 135 ++++++++++++++---- 1 file changed, 105 insertions(+), 30 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py index 302f6f00..8039d3fa 100644 --- a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py +++ b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py @@ -29,8 +29,8 @@ def output_vds() -> TempVDSGuard: @pytest.fixture def prestack_segy() -> str: - return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "Mutes", - "") + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport_Trim", "3D_Prestack", + "ST0202R08_Gather_Time.segy") @pytest.fixture @@ -68,6 +68,81 @@ def invalid_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDS return ex, output_vds +def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: + """ + Read all the gathers in a single inline and return stats on how many gathers have dead traces in various + regions of the gather. + + :return tuple of ( + total gathers, + number of gathers with dead traces only at front of gather, + number of gathers with dead traces only within the gather, + number of gathers with dead traces only at end of gather, + number of gathers with dead traces in multiple places, + number of gathers with no dead traces + ) + """ + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0, "Trace channel not found" + + # only read one inline; make it the middle inline + inline_index = layout.getDimensionNumSamples(3) // 2 + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + + # stats counters + front_only = 0 + middle_only = 0 + end_only = 0 + mixed = 0 + no_dead = 0 + + for crossline_index in range(dim2_size): + trace_flag_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (1, dim1_size, crossline_index + 1, inline_index + 1, 1, + 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_023) + + assert dim1_size == trace_flag_request.data.shape[0] + + # analyze trace_flag_request.data to figure out which stats counter to bump + + dead_trace_ranges = [] + i = 0 + while i < dim1_size: + if trace_flag_request.data[i] == 0: + start = i + i += 1 + while i < dim1_size and trace_flag_request.data[i] == 0: + i += 1 + end = i - 1 + dead_trace_ranges.append((start, end)) + else: + i += 1 + + if len(dead_trace_ranges) == 0: + no_dead += 1 + elif len(dead_trace_ranges) == 1: + # either front, middle, end + start, stop = dead_trace_ranges[0] + if start == 0: + front_only += 1 + elif stop == 0: + end_only += 1 + else: + middle_only += 1 + else: + mixed += 1 + + return dim2_size, front_only, middle_only, end_only, mixed, no_dead + + def test_gather_spacing_invalid_arg(invalid_executor): ex, output_vds = invalid_executor @@ -86,14 +161,14 @@ def test_gather_spacing_default(default_executor): assert result == 0, ex.output() assert Path(output_vds.filename).exists() - with openvds.open(output_vds.filename, "") as handle: - layout = openvds.getLayout(handle) + total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - # TODO read an inline (or all inlines) - # TODO for each gather, check location of dead traces - # TODO assert dead traces occur elsewhere besides the end of the gather - - assert False, "not implemented" + assert total == 60 + assert middle_only > 0 + assert mixed > 0 + assert trailing_only == 0 + assert leading_only > 0 + assert no_dead == 0 def test_gather_spacing_auto(auto_executor): @@ -104,14 +179,14 @@ def test_gather_spacing_auto(auto_executor): assert result == 0, ex.output() assert Path(output_vds.filename).exists() - with openvds.open(output_vds.filename, "") as handle: - layout = openvds.getLayout(handle) - - # TODO read an inline (or all inlines) - # TODO for each gather, check location of dead traces - # TODO assert dead traces occur elsewhere besides the end of the gather + total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert False, "not implemented" + assert total == 60 + assert middle_only > 0 + assert mixed > 0 + assert trailing_only == 0 + assert leading_only > 0 + assert no_dead == 0 def test_gather_spacing_on(on_executor): @@ -122,14 +197,14 @@ def test_gather_spacing_on(on_executor): assert result == 0, ex.output() assert Path(output_vds.filename).exists() - with openvds.open(output_vds.filename, "") as handle: - layout = openvds.getLayout(handle) - - # TODO read an inline (or all inlines) - # TODO for each gather, check location of dead traces - # TODO assert dead traces occur elsewhere besides the end of the gather + total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert False, "not implemented" + assert total == 60 + assert middle_only > 0 + assert mixed > 0 + assert trailing_only == 0 + assert leading_only > 0 + assert no_dead == 0 def test_gather_spacing_off(off_executor): @@ -140,11 +215,11 @@ def test_gather_spacing_off(off_executor): assert result == 0, ex.output() assert Path(output_vds.filename).exists() - with openvds.open(output_vds.filename, "") as handle: - layout = openvds.getLayout(handle) - - # TODO read an inline (or all inlines) - # TODO for each gather, check location of dead traces - # TODO assert dead traces occur only at the end of the gather + total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert False, "not implemented" + assert total == 60 + assert trailing_only == total + assert middle_only == 0 + assert mixed == 0 + assert leading_only == 0 + assert no_dead == 0 -- GitLab From 79f1cac18dd346c114c2345b878bdd6fcdaaefa3 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 8 Oct 2021 14:02:37 -0500 Subject: [PATCH 27/46] add new command line option --- .../pytests/test_prestack_gather_spacing.py | 19 +++++----- tools/SEGYImport/SEGYImport.cpp | 37 ++++++++++++++++++- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py index 8039d3fa..10910da6 100644 --- a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py +++ b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py @@ -29,8 +29,8 @@ def output_vds() -> TempVDSGuard: @pytest.fixture def prestack_segy() -> str: - return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport_Trim", "3D_Prestack", - "ST0202R08_Gather_Time.segy") + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport", "Teleport_Trim", + "3D_Prestack", "ST0202R08_Gather_Time.segy") @pytest.fixture @@ -82,7 +82,7 @@ def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: number of gathers with no dead traces ) """ - with openvds.open(output_vds.filename, "") as handle: + with openvds.open(vds_filename, "") as handle: layout = openvds.getLayout(handle) access_manager = openvds.getAccessManager(handle) @@ -107,7 +107,7 @@ def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: 1), channel=trace_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, - dimensionsND=openvds.DimensionsND.Dimensions_023) + dimensionsND=openvds.DimensionsND.Dimensions_012) assert dim1_size == trace_flag_request.data.shape[0] @@ -149,8 +149,7 @@ def test_gather_spacing_invalid_arg(invalid_executor): result = ex.run() assert result != 0, ex.output() - # TODO what text to look for? - assert "some message" in ex.output().lower() + assert "unknown --respace-gathers option" in ex.output().lower() def test_gather_spacing_default(default_executor): @@ -163,7 +162,7 @@ def test_gather_spacing_default(default_executor): total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert total == 60 + assert total == 71 assert middle_only > 0 assert mixed > 0 assert trailing_only == 0 @@ -181,7 +180,7 @@ def test_gather_spacing_auto(auto_executor): total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert total == 60 + assert total == 71 assert middle_only > 0 assert mixed > 0 assert trailing_only == 0 @@ -199,7 +198,7 @@ def test_gather_spacing_on(on_executor): total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert total == 60 + assert total == 71 assert middle_only > 0 assert mixed > 0 assert trailing_only == 0 @@ -217,7 +216,7 @@ def test_gather_spacing_off(off_executor): total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - assert total == 60 + assert total == 71 assert trailing_only == total assert middle_only == 0 assert mixed == 0 diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index ad497cac..3a4071dc 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2277,6 +2277,7 @@ main(int argc, char* argv[]) bool prestack = false; bool is2D = false; bool isOffsetSorted = false; + // TODO remove traceOrderByOffset bool traceOrderByOffset = true; bool jsonOutput = false; bool help = false; @@ -2292,8 +2293,19 @@ main(int argc, char* argv[]) std::string azimuthTypeString; std::string azimuthUnitString; + enum class TraceSpacingByOffset + { + Off = 0, + On = 1, + Auto = 2 + }; + + TraceSpacingByOffset traceSpacingByOffset = TraceSpacingByOffset::Auto; + std::string traceSpacingByOffsetString; + const std::string supportedAzimuthTypes("Azimuth (from trace header field) (default), OffsetXY (computed from OffsetX and OffsetY header fields)"); const std::string supportedAzimuthUnits("Radians, Degrees (default)"); + const std::string supportedTraceSpacingTypes("Off, On, Auto (default)"); // default key names used if not supplied by user std::string defaultPrimaryKey = "InlineNumber"; @@ -2346,8 +2358,9 @@ main(int argc, char* argv[]) options.add_option("", "", "azimuth-type", std::string("Azimuth type. Supported azimuth types are: ") + supportedAzimuthTypes + ".", cxxopts::value(azimuthTypeString), ""); options.add_option("", "", "azimuth-unit", std::string("Azimuth unit. Supported azimuth units are: ") + supportedAzimuthUnits + ".", cxxopts::value(azimuthUnitString), ""); options.add_option("", "", "azimuth-scale", "Azimuth scale factor. Trace header field Azimuth values will be multiplied by this factor.", cxxopts::value(azimuthScaleFactor), ""); + options.add_option("", "", "respace-gathers", std::string("Respace traces in prestack gathers by Offset trace header field. Supported options are: ") + supportedTraceSpacingTypes + ".", cxxopts::value(traceSpacingByOffsetString), ""); - // TODO temporary option that will be removed/changed when new respace algo is implemented (traceOrderByOffset) + // TODO remove traceOrderByOffset option options.add_option("", "", "order-by-offset", "Order traces within a gather by offset.", cxxopts::value(traceOrderByOffset), ""); options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); @@ -2596,6 +2609,28 @@ main(int argc, char* argv[]) } } + if (!traceSpacingByOffsetString.empty()) + { + std::transform(traceSpacingByOffsetString.begin(), traceSpacingByOffsetString.end(), traceSpacingByOffsetString.begin(), asciitolower); + if (traceSpacingByOffsetString == "off") + { + traceSpacingByOffset = TraceSpacingByOffset::Off; + } + else if (traceSpacingByOffsetString == "on") + { + traceSpacingByOffset = TraceSpacingByOffset::On; + } + else if (traceSpacingByOffsetString == "auto") + { + traceSpacingByOffset = TraceSpacingByOffset::Auto; + } + else + { + OpenVDS::printError(jsonOutput, "Args", fmt::format("Unknown --respace-gathers option '{}'", traceSpacingByOffsetString)); + return EXIT_FAILURE; + } + } + SEGY::HeaderField primaryKeyHeaderField, secondaryKeyHeaderField; -- GitLab From 323261c2eccebd643632c6fc8683d3807965197b Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 8 Oct 2021 16:10:38 -0500 Subject: [PATCH 28/46] update analyzeSegment and analyzePrimaryKey --- tools/SEGYImport/SEGYImport.cpp | 109 ++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 3a4071dc..ab38fa30 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -66,6 +66,13 @@ constexpr double METERS_PER_FOOT = 0.3048; +enum class TraceSpacingByOffset +{ + Off = 0, + On = 1, + Auto = 2 +}; + int64_t GetTotalSystemMemory() { MEMORYSTATUSEX status; @@ -893,7 +900,7 @@ updateValueRangeHeaps(const SEGYFileInfo& fileInfo, const int heapSizeMax, std:: } bool -analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, TraceInfo2DManager * pTraceInfo2DManager, bool isTraceOrderByOffset, bool jsonOutput, OpenVDS::Error& error) +analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, TraceInfo2DManager * pTraceInfo2DManager, TraceSpacingByOffset& traceSpacingByOffset, std::vector& offsetValues, bool jsonOutput, OpenVDS::Error& error) { assert(segmentInfo.m_traceStop >= segmentInfo.m_traceStart && "A valid segment info should always have a stop trace greater or equal to the start trace"); assert(pTraceInfo2DManager != nullptr); @@ -905,9 +912,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg valueRange = OpenVDS::FloatRange(0.0f, 1.0f); secondaryStep = 0; fold = 1; - offsetStart = 0; - offsetEnd = 0; - offsetStep = 0; + offsetValues.clear(); const int traceByteSize = fileInfo.TraceByteSize(); @@ -929,8 +934,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg // Determine fold and secondary step int gatherSecondaryKey = 0, gatherFold = 0, gatherSecondaryStep = 0; - std::set - offsetValues; + std::map> gatherOffsetValues; for (int64_t trace = segmentInfo.m_traceStart; trace <= segmentInfo.m_traceStop; trace++) { @@ -988,56 +992,84 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg { const auto thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - offsetValues.insert(thisOffset); + gatherOffsetValues[gatherSecondaryKey].push_back(thisOffset); } // Update value range updateValueRangeHeaps(fileInfo, heapSizeMax, minHeap, maxHeap, data, sampleBuffer); } - if (fileInfo.HasGatherOffset() && !fileInfo.IsUnbinned()) + if (fileInfo.HasGatherOffset() && !fileInfo.IsUnbinned() && !gatherOffsetValues.empty()) { - offsetStart = *offsetValues.begin(); - offsetEnd = *offsetValues.rbegin(); + // use the longest gather in the segment to populate the list of offset values - // iterate over offsetValues to get offset step - if (offsetValues.size() > 1) + // find the longest gather + int + longestSecondaryKey = (*gatherOffsetValues.begin()).first; + + for (const auto& entry : gatherOffsetValues) { - auto - iter = offsetValues.begin(); - auto - previousOffset = *iter; - offsetStep = INT32_MAX; - while (++iter != offsetValues.end()) + if (entry.second.size() > gatherOffsetValues[longestSecondaryKey].size()) { - const auto - thisOffset = *iter; - offsetStep = std::min(offsetStep, thisOffset - previousOffset); - previousOffset = thisOffset; + longestSecondaryKey = entry.first; } } - else - { - offsetStep = 1; - } - if (isTraceOrderByOffset) + // copy the longest gather's offset values to the method arg + const auto + & longestGatherValues = gatherOffsetValues[longestSecondaryKey]; + offsetValues.assign(longestGatherValues.begin(), longestGatherValues.end()); + + if (traceSpacingByOffset != TraceSpacingByOffset::Off) { - // 'fold' calculated by counting traces in a gather may be incorrect if there are duplicate-key - // traces, so we'll ignore it and use the count of unique offset values - fold = static_cast(offsetValues.size()); + // Determine if offset values are suitable for computed respacing: Values are monotonically increasing + bool + isSane = false; + size_t + numSorted = 0; - // check that offset start/end/step is consistent - if (offsetStart + (fold - 1) * offsetStep != offsetEnd) + // check each gather to see if offsets are ordered within every gather + for (const auto& entry : gatherOffsetValues) { const auto - msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This may indicate an incorrect header format for Offset trace header."; - const auto - msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); + & offsets = entry.second; + if (std::is_sorted(offsets.begin(), offsets.end())) + { + ++numSorted; + } + else + { + // if a gather is unordered then we're done + break; + } + } + + if (numSorted == gatherOffsetValues.size()) + { + // we can do respacing + if (traceSpacingByOffset == TraceSpacingByOffset::Auto) + { + // change the parameter to On to indicate that respacing is in effect + traceSpacingByOffset = TraceSpacingByOffset::On; + } + } + else if (traceSpacingByOffset == TraceSpacingByOffset::Auto) + { + // the offset spacing doesn't conform to our needs for respacing, so force the setting from Auto to Off + traceSpacingByOffset = TraceSpacingByOffset::Off; + } + else + { + // traceSpacingByOffset is set to On, but detected offset values aren't compatible with the respacing scheme + + auto msg = "The detected gather offset values are not monotonically increasing. This may indicate an incorrect header format for the trace header Offset field and/or an incorrect option value for --respace-gathers."; OpenVDS::printError(jsonOutput, "analyzeSegment", msg); return false; + + return false; } } + } // If the secondary step couldn't be determined, set it to the last step or 1 @@ -2293,13 +2325,6 @@ main(int argc, char* argv[]) std::string azimuthTypeString; std::string azimuthUnitString; - enum class TraceSpacingByOffset - { - Off = 0, - On = 1, - Auto = 2 - }; - TraceSpacingByOffset traceSpacingByOffset = TraceSpacingByOffset::Auto; std::string traceSpacingByOffsetString; -- GitLab From 468abd97439e4939403304e6f311c1df43a6c487 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 8 Oct 2021 16:20:05 -0500 Subject: [PATCH 29/46] update usage of createChannelDescriptors, analyzeSegment, and analyzePrimaryKey --- tools/SEGYImport/SEGYImport.cpp | 61 ++++++++++----------------------- 1 file changed, 19 insertions(+), 42 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index ab38fa30..e253fa94 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -1095,7 +1095,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg // Analog of analyzeSegment for offset-sorted SEGY bool -analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo const& fileInfo, const int primaryKey, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, int& offsetStart, int& offsetEnd, int& offsetStep, bool jsonOutput, OpenVDS::Error& error) +analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo const& fileInfo, const int primaryKey, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, std::vector& offsetValues, bool jsonOutput, OpenVDS::Error& error) { assert(fileInfo.IsOffsetSorted()); @@ -1113,9 +1113,7 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c valueRange = OpenVDS::FloatRange(0.0f, 1.0f); secondaryStep = 0; fold = 1; - offsetStart = 0; - offsetEnd = 0; - offsetStep = 0; + offsetValues.clear(); if (fileInfo.m_segmentInfoListsByOffset.empty() || fileInfo.m_segmentInfoListsByOffset.front().empty()) { @@ -1136,41 +1134,7 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c fold = static_cast(globalOffsets.size()); - offsetStart = *globalOffsets.begin(); - offsetEnd = *globalOffsets.rbegin(); - - // find offset step - if (globalOffsets.size() > 1) - { - auto - offsetsIter = ++globalOffsets.begin(); - offsetStep = abs(*offsetsIter - offsetStart); - auto - previousOffset = *offsetsIter; - while (++offsetsIter != globalOffsets.end()) - { - offsetStep = std::min(offsetStep, abs(*offsetsIter - previousOffset)); - previousOffset = *offsetsIter; - } - } - else - { - offsetStep = 1; - } - - // check that offset start/end/step is consistent - if (offsetStart + (fold - 1) * offsetStep != offsetEnd) - { - const auto - msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This may indicate an incorrect header format for Offset trace header."; - const auto - msg = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); - OpenVDS::printError(jsonOutput, "analyzePrimaryKey", msg); - error.code = 1; - error.string = msg; - return false; - } - + offsetValues.assign(globalOffsets.begin(), globalOffsets.end()); const int32_t traceByteSize = fileInfo.TraceByteSize(); @@ -2864,7 +2828,7 @@ main(int argc, char* argv[]) OpenVDS::FloatRange valueRange; int fold = 1, primaryStep = 1, secondaryStep = 1; - int offsetStart, offsetEnd, offsetStep; + std::vector gatherOffsetValues; const float valueRangePercentile = 99.5f; // 99.5f is the same default as Petrel uses. @@ -2885,7 +2849,7 @@ main(int argc, char* argv[]) { const auto primaryKey = findRepresentativePrimaryKey(fileInfo, primaryStep); - analyzeResult = analyzePrimaryKey(dataProviders, fileInfo, primaryKey, valueRangePercentile, valueRange, fold, secondaryStep, offsetStart, offsetEnd, offsetStep, jsonOutput, error); + analyzeResult = analyzePrimaryKey(dataProviders, fileInfo, primaryKey, valueRangePercentile, valueRange, fold, secondaryStep, gatherOffsetValues, jsonOutput, error); } else { @@ -3038,8 +3002,21 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } + int + offsetStart = 0, + offsetEnd = 0; + + if (!gatherOffsetValues.empty()) + { + // use minmax_element because the offset values are not necessarily sorted (and use 1 for the offset step because the values aren't necessarily spaced consistently) + const auto + minMax = std::minmax_element(gatherOffsetValues.begin(), gatherOffsetValues.end()); + offsetStart = *minMax.first, + offsetEnd = *minMax.second;; + } + OffsetChannelInfo - offsetInfo(fileInfo.HasGatherOffset(), ConvertDistance(segyMeasurementSystem, static_cast(offsetStart)), ConvertDistance(segyMeasurementSystem, static_cast(offsetEnd)), ConvertDistance(segyMeasurementSystem, static_cast(offsetStep))); + offsetInfo(fileInfo.HasGatherOffset(), ConvertDistance(segyMeasurementSystem, static_cast(offsetStart)), ConvertDistance(segyMeasurementSystem, static_cast(offsetEnd)), ConvertDistance(segyMeasurementSystem, 1.0f)); // Create channel descriptors std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit, isAzimuth, isMutes); -- GitLab From 7263efa9b1958ab2741ec584c3b7ac3fd35345b6 Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 8 Oct 2021 16:42:58 -0500 Subject: [PATCH 30/46] add CalculateGatherSpacing --- tools/SEGYImport/SEGYImport.cpp | 151 ++++++++++++++++++++++++++++++-- 1 file changed, 146 insertions(+), 5 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index e253fa94..d7e40f28 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2231,6 +2231,149 @@ findChannelDescriptorIndex(const std::string& channelName, const std::vector traceIndices) : m_isRespace(true), m_firstTrace(firstTrace), m_traceIndices(std::move(traceIndices)) {} + + int GetTertiaryIndex(int64_t traceNumber) + { + if (m_isRespace) + { + assert(traceNumber >= m_firstTrace && traceNumber < m_firstTrace + static_cast(m_traceIndices.size())); + return m_traceIndices[traceNumber]; + } + + assert(traceNumber >= m_firstTrace); + return static_cast(traceNumber - m_firstTrace); + } + +private: + bool m_isRespace; + int64_t m_firstTrace; + std::map + m_traceIndices; +}; + +std::unique_ptr +CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std::vector& globalOffsetValues, TraceDataManager& traceDataManager, TraceSpacingByOffset traceSpacingByOffset, int64_t firstTrace, bool jsonOutput) +{ + if (fileInfo.Is4D() && traceSpacingByOffset != TraceSpacingByOffset::Off) + { + OpenVDS::Error + error; + const char + * header = traceDataManager.getTraceData(firstTrace, error); + if (error.code != 0) + { + return std::unique_ptr(new GatherSpacing(firstTrace)); + } + + const auto + primaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + secondaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); + + std::vector + gatherOffsets; + gatherOffsets.reserve(fold); + gatherOffsets.push_back(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness)); + + // read traces and stuff offset into a vector while primaryKey/secondaryKey match + int64_t + trace = firstTrace + 1; + for (int64_t trace = firstTrace + 1; trace < firstTrace + fold; ++trace) + { + header = traceDataManager.getTraceData(trace, error); + if (error.code != 0) + { + break; + } + const auto + testPrimary = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + testSecondary = SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); + if (testPrimary != primaryKey || testSecondary != secondaryKey) + { + break; + } + + gatherOffsets.push_back(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness)); + } + + // apply respace algorithm + int + gatherIndex = 0, + offsetIndex = 0, + tertiaryIndex = 0; + std::map + traceIndices; + auto + hasRoom = [&fold, &tertiaryIndex, &gatherOffsets, &gatherIndex]() { return (fold - tertiaryIndex) - (gatherOffsets.size() - gatherIndex) > 0; }; + auto + offsetAtIndex = [&globalOffsetValues](int index) { return globalOffsetValues[index]; }; + + // skip tertiary indices before the first trace's offset + while (hasRoom() && offsetIndex < fold && offsetAtIndex(offsetIndex) != gatherOffsets[0]) + { + ++offsetIndex; + ++tertiaryIndex; + } + + // map traces to tertiary indices while we have room for dead traces and we have more input traces + while (hasRoom() && gatherIndex < gatherOffsets.size()) + { + // process all the traces whose offset matches the current offset + const auto + currentOffset = offsetAtIndex(offsetIndex); + while (hasRoom() && gatherIndex < gatherOffsets.size() && gatherOffsets[gatherIndex] == currentOffset) + { + traceIndices[firstTrace + gatherIndex] = tertiaryIndex; + ++gatherIndex; + ++tertiaryIndex; + } + + // advance to the next offset + ++offsetIndex; + + // try to advance to tertiary index so that the next trace's offset is placed near-ish to where it might be expected + if (gatherIndex < gatherOffsets.size() && tertiaryIndex <= offsetIndex) + { + while (hasRoom() && gatherOffsets[gatherIndex] != offsetAtIndex(offsetIndex)) + { + ++tertiaryIndex; + ++offsetIndex; + + // hasRoom() will effectively take care of bounds-checking tertiaryIndex, but we need to explicitly check offsetIndex + if (offsetIndex >= fold) + { + // if the trace's offset doesn't match any expected offset values then this is unexpected and may indicate a data problem + const auto + warnString = fmt::format("Warning: Unknown trace header Offset value {} found at primary key {}, secondary key {}. This may indicate corrupt data or an incorrect header format.", gatherOffsets[gatherIndex], primaryKey, secondaryKey); + OpenVDS::printWarning(jsonOutput, "Data", warnString); + + // return a no-respacing GatherSpacing + return std::unique_ptr(new GatherSpacing(firstTrace)); + } + } + } + } + + // map remaining traces to remaining indices + while (gatherIndex < gatherOffsets.size()) + { + traceIndices[firstTrace + gatherIndex] = tertiaryIndex; + ++gatherIndex; + ++tertiaryIndex; + } + + return std::unique_ptr(new GatherSpacing(firstTrace, traceIndices)); + } + + // else return a GatherSpacing that doesn't alter the spacing + return std::unique_ptr(new GatherSpacing(firstTrace)); +} + int main(int argc, char* argv[]) { @@ -2856,7 +2999,7 @@ main(int argc, char* argv[]) int fileIndex; const auto& representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); assert(fileIndex < dataProviders.size()); - analyzeResult = analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, traceInfo2DManager.get(), traceOrderByOffset, jsonOutput, error); + analyzeResult = analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, traceInfo2DManager.get(), traceSpacingByOffset, gatherOffsetValues, jsonOutput, error); } if (!analyzeResult || error.code != 0) @@ -3002,9 +3145,7 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } - int - offsetStart = 0, - offsetEnd = 0; + int offsetStart = 0, offsetEnd = 0; if (!gatherOffsetValues.empty()) { @@ -3012,7 +3153,7 @@ main(int argc, char* argv[]) const auto minMax = std::minmax_element(gatherOffsetValues.begin(), gatherOffsetValues.end()); offsetStart = *minMax.first, - offsetEnd = *minMax.second;; + offsetEnd = *minMax.second; } OffsetChannelInfo -- GitLab From 45e697855a2f0fe5b36a55ba2682f4fb6b85171a Mon Sep 17 00:00:00 2001 From: Jim King Date: Fri, 8 Oct 2021 16:58:02 -0500 Subject: [PATCH 31/46] port remainder of changes --- tools/SEGYImport/SEGYImport.cpp | 40 ++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index d7e40f28..0e0dd7fd 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -3340,8 +3340,10 @@ main(int argc, char* argv[]) for (size_t fileIndex = 0; fileIndex < segmentInfoListsSize; ++fileIndex) { + assert(fileInfo.IsOffsetSorted() ? chunkInfo.min[1] < gatherOffsetValues.size() : true); + const int - offetSortedOffsetValue = fileInfo.IsOffsetSorted() ? offsetStart + offsetStep * chunkInfo.min[1] : 0; + offsetSortedOffsetValue = fileInfo.IsOffsetSorted() ? gatherOffsetValues[chunkInfo.min[1]] : 0; if (fileInfo.IsOffsetSorted()) { @@ -3349,7 +3351,7 @@ main(int argc, char* argv[]) assert(chunkInfo.min[1] + 1 == chunkInfo.max[1]); // If the calculated offset value isn't present in this file's map then skip - if (fileInfo.m_segmentInfoListsByOffset[fileIndex].find(offetSortedOffsetValue) == fileInfo.m_segmentInfoListsByOffset[fileIndex].end()) + if (fileInfo.m_segmentInfoListsByOffset[fileIndex].find(offsetSortedOffsetValue) == fileInfo.m_segmentInfoListsByOffset[fileIndex].end()) { continue; } @@ -3357,7 +3359,7 @@ main(int argc, char* argv[]) auto& segmentInfoList = fileInfo.IsOffsetSorted() - ? fileInfo.m_segmentInfoListsByOffset[fileIndex][offetSortedOffsetValue] + ? fileInfo.m_segmentInfoListsByOffset[fileIndex][offsetSortedOffsetValue] : fileInfo.m_segmentInfoLists[fileIndex]; // does this file have any segments in the primary key range? @@ -3433,9 +3435,6 @@ main(int argc, char* argv[]) auto &chunkInfo = chunkInfos[chunk]; - const int - offsetSortedOffsetValue = fileInfo.IsOffsetSorted() ? offsetStart + offsetStep * chunkInfo.min[1] : 0; - // if we've crossed to a new inline then trim the trace page cache if (chunk > 0) { @@ -3529,6 +3528,11 @@ main(int argc, char* argv[]) continue; } + assert(fileInfo.IsOffsetSorted() ? chunkInfo.min[1] < gatherOffsetValues.size() : true); + + const int + offsetSortedOffsetValue = fileInfo.IsOffsetSorted() ? gatherOffsetValues[chunkInfo.min[1]] : 0; + const auto lowerSegmentIndex = std::get<0>(result->second); const auto upperSegmentIndex = std::get<1>(result->second); @@ -3564,6 +3568,9 @@ main(int argc, char* argv[]) tertiaryIndex = 0, currentSecondaryKey = chunkInfo.secondaryKeyStart; + auto + gatherSpacing = CalculateGatherSpacing(fileInfo, fold, gatherOffsetValues, traceDataManager, traceSpacingByOffset, firstTrace, jsonOutput); + for (int64_t trace = firstTrace; trace <= segment->m_traceStop; trace++, tertiaryIndex++) { const char* header = traceDataManager.getTraceData(trace, error); @@ -3589,6 +3596,9 @@ main(int argc, char* argv[]) // we've progressed to a new secondary key, so reset the tertiary (gather) index currentSecondaryKey = secondaryTest; tertiaryIndex = 0; + + // then get respace info for the next gather + gatherSpacing = CalculateGatherSpacing(fileInfo, fold, gatherOffsetValues, traceDataManager, traceSpacingByOffset, trace, jsonOutput); } int @@ -3613,7 +3623,7 @@ main(int argc, char* argv[]) assert(primaryIndex >= chunkInfo.min[PrimaryKeyDimension(fileInfo)] && primaryIndex < chunkInfo.max[PrimaryKeyDimension(fileInfo)]); assert(secondaryIndex >= chunkInfo.min[SecondaryKeyDimension(fileInfo)] && secondaryIndex < chunkInfo.max[SecondaryKeyDimension(fileInfo)]); - if (fileInfo.Is4D() && traceOrderByOffset) + if (fileInfo.Is4D()) { if (fileInfo.IsOffsetSorted()) { @@ -3624,18 +3634,12 @@ main(int argc, char* argv[]) } else { - if (traceOrderByOffset) + tertiaryIndex = gatherSpacing->GetTertiaryIndex(trace); + + // sanity check the new index + if (tertiaryIndex < 0 || tertiaryIndex >= fold) { - // recalculate tertiaryIndex from header offset value - const auto - thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - tertiaryIndex = (thisOffset - offsetStart) / offsetStep; - - // sanity check the new index - if (tertiaryIndex < 0 || tertiaryIndex >= fold) - { - continue; - } + continue; } } -- GitLab From 8145c9b9c09ab0df6efeeec07f44313f7c185ff1 Mon Sep 17 00:00:00 2001 From: Jim King Date: Mon, 11 Oct 2021 14:16:50 -0500 Subject: [PATCH 32/46] refactor tests to avoid code duplication; add check for traces with duplicate offset --- .../pytests/test_prestack_gather_spacing.py | 118 +++++++----------- 1 file changed, 46 insertions(+), 72 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py index 10910da6..a7f56a7f 100644 --- a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py +++ b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py @@ -33,20 +33,6 @@ def prestack_segy() -> str: "3D_Prestack", "ST0202R08_Gather_Time.segy") -@pytest.fixture -def default_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: - """Setup an ImportExecutor with no arg for respacing""" - ex = construct_respace_executor(output_vds, prestack_segy, None) - return ex, output_vds - - -@pytest.fixture -def auto_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: - """Setup an ImportExecutor with respacing set to Auto""" - ex = construct_respace_executor(output_vds, prestack_segy, "Auto") - return ex, output_vds - - @pytest.fixture def off_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: """Setup an ImportExecutor with respacing set to Off""" @@ -54,13 +40,6 @@ def off_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuar return ex, output_vds -@pytest.fixture -def on_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: - """Setup an ImportExecutor with respacing set to On""" - ex = construct_respace_executor(output_vds, prestack_segy, "On") - return ex, output_vds - - @pytest.fixture def invalid_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: """Setup an ImportExecutor with respacing set to an invalid value""" @@ -68,7 +47,7 @@ def invalid_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDS return ex, output_vds -def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: +def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int, int]: """ Read all the gathers in a single inline and return stats on how many gathers have dead traces in various regions of the gather. @@ -80,6 +59,7 @@ def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: number of gathers with dead traces only at end of gather, number of gathers with dead traces in multiple places, number of gathers with no dead traces + number of consecutive duplicate offset values within a gather ) """ with openvds.open(vds_filename, "") as handle: @@ -89,6 +69,9 @@ def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: trace_channel = layout.getChannelIndex("Trace") assert trace_channel > 0, "Trace channel not found" + offset_channel = layout.getChannelIndex("Offset") + assert offset_channel > 0, "Offset channel not found" + # only read one inline; make it the middle inline inline_index = layout.getDimensionNumSamples(3) // 2 dim2_size = layout.getDimensionNumSamples(2) @@ -100,18 +83,27 @@ def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: end_only = 0 mixed = 0 no_dead = 0 + duplicate_offsets = 0 for crossline_index in range(dim2_size): trace_flag_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), - (1, dim1_size, crossline_index + 1, inline_index + 1, 1, - 1), + (1, dim1_size, crossline_index + 1, + inline_index + 1, 1, 1), channel=trace_channel, format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, dimensionsND=openvds.DimensionsND.Dimensions_012) + offset_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (1, dim1_size, crossline_index + 1, inline_index + 1, 1, + 1), + channel=offset_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + assert dim1_size == trace_flag_request.data.shape[0] + assert dim1_size == offset_request.data.shape[0] - # analyze trace_flag_request.data to figure out which stats counter to bump + # analyze trace flag and offset data to figure out which stats counters to bump dead_trace_ranges = [] i = 0 @@ -133,14 +125,19 @@ def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int]: start, stop = dead_trace_ranges[0] if start == 0: front_only += 1 - elif stop == 0: + elif stop == trace_flag_request.data.shape[0] - 1: end_only += 1 else: middle_only += 1 else: mixed += 1 - return dim2_size, front_only, middle_only, end_only, mixed, no_dead + # Find any traces with duplicate offset values within the gather + for i in range(1, offset_request.data.shape[0]): + if offset_request.data[i] == offset_request.data[i - 1]: + duplicate_offsets += 1 + + return dim2_size, front_only, middle_only, end_only, mixed, no_dead, duplicate_offsets def test_gather_spacing_invalid_arg(invalid_executor): @@ -152,58 +149,31 @@ def test_gather_spacing_invalid_arg(invalid_executor): assert "unknown --respace-gathers option" in ex.output().lower() -def test_gather_spacing_default(default_executor): - ex, output_vds = default_executor - - result = ex.run() - - assert result == 0, ex.output() - assert Path(output_vds.filename).exists() - - total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - - assert total == 71 - assert middle_only > 0 - assert mixed > 0 - assert trailing_only == 0 - assert leading_only > 0 - assert no_dead == 0 - - -def test_gather_spacing_auto(auto_executor): - ex, output_vds = auto_executor +@pytest.mark.parametrize("respace_option", [None, "Auto", "On"]) +def test_gather_spacing_with_on_variations(prestack_segy, output_vds, respace_option): + """ + Parameterized test for the different ways to execute the importer that all result in having gather + respacing turned On. + """ + ex = construct_respace_executor(output_vds, prestack_segy, respace_option) result = ex.run() assert result == 0, ex.output() assert Path(output_vds.filename).exists() - total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) + total, leading_only, middle_only, trailing_only, mixed, no_dead, duplicate_offsets =\ + get_gathers_stats(output_vds.filename) assert total == 71 - assert middle_only > 0 - assert mixed > 0 + assert middle_only == 23 + assert mixed == 0 assert trailing_only == 0 - assert leading_only > 0 - assert no_dead == 0 - - -def test_gather_spacing_on(on_executor): - ex, output_vds = on_executor - - result = ex.run() - - assert result == 0, ex.output() - assert Path(output_vds.filename).exists() + assert leading_only == 0 + assert no_dead == 48 - total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) - - assert total == 71 - assert middle_only > 0 - assert mixed > 0 - assert trailing_only == 0 - assert leading_only > 0 - assert no_dead == 0 + # There traces with duplicate offsets in this data, and the importer must preserve all those traces + assert duplicate_offsets == 40 def test_gather_spacing_off(off_executor): @@ -214,11 +184,15 @@ def test_gather_spacing_off(off_executor): assert result == 0, ex.output() assert Path(output_vds.filename).exists() - total, leading_only, middle_only, trailing_only, mixed, no_dead = get_gathers_stats(output_vds.filename) + total, leading_only, middle_only, trailing_only, mixed, no_dead, duplicate_offsets =\ + get_gathers_stats(output_vds.filename) assert total == 71 - assert trailing_only == total assert middle_only == 0 assert mixed == 0 + assert trailing_only == 23 assert leading_only == 0 - assert no_dead == 0 + assert no_dead == 48 + + # There traces with duplicate offsets in this data, and the importer must preserve all those traces + assert duplicate_offsets == 40 -- GitLab From f3d62e5dfd6fdcd6050bf68d287975d0c8e0c7d4 Mon Sep 17 00:00:00 2001 From: Jim King Date: Mon, 11 Oct 2021 15:40:24 -0500 Subject: [PATCH 33/46] fix bug where CalculateGatherSpacing may try to read past end of file --- src/SEGYUtils/SEGYUtils/TraceDataManager.h | 6 ++++++ tools/SEGYImport/SEGYImport.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/SEGYUtils/SEGYUtils/TraceDataManager.h b/src/SEGYUtils/SEGYUtils/TraceDataManager.h index 7cfc7b63..3bb073d0 100644 --- a/src/SEGYUtils/SEGYUtils/TraceDataManager.h +++ b/src/SEGYUtils/SEGYUtils/TraceDataManager.h @@ -141,6 +141,12 @@ public: m_dataViewManager->retireAllDataViews(); } + int64_t + fileTraceCount() const + { + return m_numTraces; + } + private: std::shared_ptr m_dataViewManager; diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 0e0dd7fd..26321cd9 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2283,7 +2283,7 @@ CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std:: // read traces and stuff offset into a vector while primaryKey/secondaryKey match int64_t trace = firstTrace + 1; - for (int64_t trace = firstTrace + 1; trace < firstTrace + fold; ++trace) + for (int64_t trace = firstTrace + 1; trace < traceDataManager.fileTraceCount() && trace < firstTrace + fold; ++trace) { header = traceDataManager.getTraceData(trace, error); if (error.code != 0) -- GitLab From 20571b5eb0b3d222d340cca71fc11ee3581f34e9 Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 13 Oct 2021 13:11:02 -0500 Subject: [PATCH 34/46] tests for crossline sorted SEGY --- .../pytests/segyimport_test_config.py | 17 +- .../pytests/test_crossline_sorted.py | 295 ++++++++++++++++++ 2 files changed, 311 insertions(+), 1 deletion(-) create mode 100644 tests/tools/SEGYImport/pytests/test_crossline_sorted.py diff --git a/tests/tools/SEGYImport/pytests/segyimport_test_config.py b/tests/tools/SEGYImport/pytests/segyimport_test_config.py index 1a69a068..fdcd8148 100644 --- a/tests/tools/SEGYImport/pytests/segyimport_test_config.py +++ b/tests/tools/SEGYImport/pytests/segyimport_test_config.py @@ -5,7 +5,8 @@ import os import subprocess import tempfile import weakref -from typing import List, Union, Any +import pytest +from typing import List test_data_dir = "c:\\temp\\SEGY\\RegressionTestData" @@ -60,3 +61,17 @@ class TempVDSGuard(TempFileGuard): class TempScanFileGuard(TempFileGuard): def __init__(self, base_name="scan_test"): super().__init__(base_name, ".scan.json") + + +@pytest.fixture +def teleport_test_data_dir() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport") + + +@pytest.fixture +def segyimport_test_data_dir() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest") + + +def platform_integration_test_data_dir() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration") diff --git a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py new file mode 100644 index 00000000..36c1e622 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py @@ -0,0 +1,295 @@ +from pathlib import Path +from typing import List, Tuple + +import openvds +import pytest +import os + +from segyimport_test_config import ImportExecutor, TempVDSGuard, segyimport_test_data_dir, teleport_test_data_dir, \ + platform_integration_test_data_dir + + +def construct_crossline_executor(segy_filename: str, output_vds: TempVDSGuard, + additional_args: List[str] = None) -> ImportExecutor: + ex = ImportExecutor() + + ex.add_args(["--primary-key", "CrosslineNumber"]) + ex.add_args(["--secondary-key", "InlineNumber"]) + ex.add_args(["--vdsfile", output_vds.filename]) + if additional_args: + ex.add_args(additional_args) + ex.add_arg(segy_filename) + + return ex + + +@pytest.fixture +def crossline_output_vds() -> TempVDSGuard: + return TempVDSGuard("import_crossline_test") + + +@pytest.fixture +def conventional_output_vds() -> TempVDSGuard: + return TempVDSGuard("import_conventional_test") + + +@pytest.fixture +def teleport_conventional_segy(teleport_test_data_dir) -> str: + return os.path.join(teleport_test_data_dir, "Teleport_Trim", "3D_Stack", "ST0202R08_TIME.segy") + + +@pytest.fixture +def teleport_crossline_sorted_segy(segyimport_test_data_dir) -> str: + return os.path.join(segyimport_test_data_dir, "CrosslineSorted", "ST0202R08_TIME_crossline_sorted.segy") + + +@pytest.fixture +def teleport_crossline_executor(teleport_crossline_sorted_segy, crossline_output_vds)\ + -> Tuple[ImportExecutor, TempVDSGuard]: + ex = construct_crossline_executor(teleport_crossline_sorted_segy, crossline_output_vds) + return ex, crossline_output_vds + + +@pytest.fixture +def teleport_conventional_executor(teleport_conventional_segy, conventional_output_vds)\ + -> Tuple[ImportExecutor, TempVDSGuard]: + ex = ImportExecutor() + + ex.add_args(["--vdsfile", conventional_output_vds.filename]) + ex.add_arg(teleport_conventional_segy) + + return ex, conventional_output_vds + + +def get_scs_metadata_vectors(layout: openvds.VolumeDataLayout) -> Tuple[Tuple[float, float], Tuple[float, float], + Tuple[float, float]]: + """Convenience method to retrieve survey coordinate system vectors from VDS metadata""" + + origin = layout.getMetadataDoubleVector2(openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + inline_spacing = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + crossline_spacing = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + return origin, inline_spacing, crossline_spacing + + +def test_coordinate_system(teleport_crossline_executor): + """ + Examine the survey coordinate system metadata for a crossline-sorted SEGY. + + Note that these survey coordinate system values should be the same as the values examined in + test_coordinate_system_comparison, but we'll check explicit values here to ensure that the importer + is producing expected values (and not just two sets of incorrect but identical values). + """ + ex, output_vds = teleport_crossline_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + origin, inline_spacing, crossline_spacing = get_scs_metadata_vectors(layout) + + assert origin[0] == pytest.approx(431990.979266) + assert origin[1] == pytest.approx(6348549.7101) + assert inline_spacing[0] == pytest.approx(3.021094) + assert inline_spacing[1] == pytest.approx(12.125) + assert crossline_spacing[0] == pytest.approx(-12.128862) + assert crossline_spacing[1] == pytest.approx(3.022056) + + +def test_coordinate_system_comparison(teleport_crossline_executor, teleport_conventional_executor): + """ + Compare the survey coordinate system metadata for a crossline-sorted SEGY with the same data stored in a + conventionally-sorted SEGY. + """ + xl_ex, xl_output_vds = teleport_crossline_executor + + result = xl_ex.run() + + assert result == 0, xl_ex.output() + assert Path(xl_output_vds.filename).exists() + + conv_ex, conv_output_vds = teleport_crossline_executor + + result = conv_ex.run() + + assert result == 0, conv_ex.output() + assert Path(conv_output_vds.filename).exists() + + with openvds.open(xl_output_vds.filename, "") as xl_handle,\ + openvds.open(conv_output_vds.filename, "") as conv_handle: + xl_layout = openvds.getLayout(xl_handle) + conv_layout = openvds.getLayout(conv_handle) + + xl_origin, xl_inline, xl_crossline = get_scs_metadata_vectors(xl_layout) + conv_origin, conv_inline, conv_crossline = get_scs_metadata_vectors(conv_layout) + + assert xl_origin == conv_origin + assert xl_inline == conv_inline + assert xl_crossline == conv_crossline + + +def test_samples_comparison(teleport_crossline_executor, teleport_conventional_executor): + xl_ex, xl_output_vds = teleport_crossline_executor + + result = xl_ex.run() + + assert result == 0, xl_ex.output() + assert Path(xl_output_vds.filename).exists() + + conv_ex, conv_vds = teleport_conventional_executor + + result = conv_ex.run() + + assert result == 0, conv_ex.output() + assert Path(conv_vds.filename).exists() + + with openvds.open(xl_output_vds.filename, "") as handle_xl, openvds.open(conv_vds.filename, "") as handle_conv: + access_manager_xl = openvds.getAccessManager(handle_xl) + layout_xl = openvds.getLayout(handle_xl) + + access_manager_conv = openvds.getAccessManager(handle_conv) + layout_conv = openvds.getLayout(handle_conv) + + num_z = layout_xl.getDimensionNumSamples(0) + num_crossline = layout_xl.getDimensionNumSamples(1) + num_inline = layout_xl.getDimensionNumSamples(2) + + assert num_z == layout_conv.getDimensionNumSamples(0) + assert num_crossline == layout_conv.getDimensionNumSamples(1) + assert num_inline == layout_conv.getDimensionNumSamples(2) + + for inline_index in range(num_inline): + voxel_min = (0, 0, inline_index, 0, 0, 0) + voxel_max = (num_z, num_crossline, inline_index + 1, 1, 1) + + request = access_manager_xl.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + request_conv = access_manager_conv.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + sample_data = request.data.reshape(num_crossline, num_z) + sample_data_conv = request_conv.data.reshape(num_crossline, num_z) + + for crossline_index in range(num_crossline): + for sample_index in range(num_z): + assert sample_data[crossline_index, sample_index] == sample_data_conv[crossline_index, sample_index],\ + f"sample index {sample_index} xl index {crossline_index} il index {inline_index}" + + +poststack_axes = [ + (1477, "Sample", "ms", 0.0, 5904.0), + (101, "Crossline", "unitless", 2000.0, 2100.0), + (101, "Inline", "unitless", 1000.0, 1100.0), + () +] + +prestack_axes = [ + (1477, "Sample", "ms", 0.0, 5904.0), + (60, "Trace (offset)", "unitless", 1.0, 60.0), + (10, "Crossline", "unitless", 2031.0, 2040.0), + (101, "Inline", "unitless", 1000.0, 1100.0) +] + +poststack_survey_vectors = [ + (76290.81610169, 949398.18641265), + (-15.5, 19.609375), + (19.60459195, 15.49621929) +] + +prestack_survey_vectors = [ + (530620.06457521, 5789005.97294581), + (-15.51, 19.625), + (19.63167672, 15.51527674) +] + +poststack_segy_filename = os.path.join(platform_integration_test_data_dir(), "ImporterTests", + "poststack_Xl-inc_In-inc.segy") +prestack_segy_filename = os.path.join(platform_integration_test_data_dir(), "ImporterTests", + "prestack_Xl-inc_In-inc_2031-2040.segy") + + +@pytest.mark.parametrize("test_params", [(poststack_segy_filename, 3, poststack_axes, poststack_survey_vectors), + (prestack_segy_filename, 4, prestack_axes, prestack_survey_vectors)]) +def test_dimensions_and_produce_status(crossline_output_vds, + test_params: Tuple[str, int, List[Tuple[int, str, str, float, float]], + List[Tuple[float, float]]]): + segy_filename, dimensions, axes, scs_vectors = test_params + + additional_args = [ + "--header-field", "InlineNumber=17:4", + "--header-field", "CrosslineNumber=17:4", + "--header-field", "EnsembleXCoordinateByteLocation=73:4", + "--header-field", "EnsembleYCoordinateByteLocation=77:4" + ] + if dimensions == 4: + additional_args.append("--prestack") + + ex = construct_crossline_executor(segy_filename, crossline_output_vds, additional_args) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(crossline_output_vds.filename).exists() + + with openvds.open(crossline_output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + + sample_channel_name = layout.getChannelName(0) + + # check produce status + + if dimensions == 4: + # Dim_013 should be available/remapped + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_013) != openvds.VDSProduceStatus.Unavailable + + # These dimensions (and others) should be unavailable + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable + elif dimensions == 3: + # Dim_012 should be available/remapped + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_012) != openvds.VDSProduceStatus.Unavailable + + # These dimensions (and others) should be unavailable + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + else: + assert False, f"Invalid number of VDS dimensions: {dimensions}" + + # check axes + + for dim in range(dimensions): + descriptor = layout.getAxisDescriptor(dim) + axis_props = axes[dim] + assert descriptor.getNumSamples() == axis_props[0] + assert descriptor.getName() == axis_props[1] + assert descriptor.getUnit() == axis_props[2] + assert descriptor.getCoordinateMin() == axis_props[3] + assert descriptor.getCoordinateMax() == axis_props[4] + + # check survey coordinate system vectors + + origin, inline_spacing, crossline_spacing = get_scs_metadata_vectors(layout) + assert origin[0] == scs_vectors[0][0] + assert origin[1] == scs_vectors[0][1] + assert inline_spacing[0] == scs_vectors[1][0] + assert inline_spacing[1] == scs_vectors[1][1] + assert crossline_spacing[0] == scs_vectors[2][0] + assert crossline_spacing[1] == scs_vectors[2][1] -- GitLab From 4fec8c14e95b502ac7e1c81006cc570b21ba46b1 Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 13 Oct 2021 17:11:34 -0500 Subject: [PATCH 35/46] tweak tests --- .../SEGYImport/pytests/test_crossline_sorted.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py index 36c1e622..4ffb9c44 100644 --- a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py @@ -99,10 +99,10 @@ def test_coordinate_system(teleport_crossline_executor): assert origin[0] == pytest.approx(431990.979266) assert origin[1] == pytest.approx(6348549.7101) - assert inline_spacing[0] == pytest.approx(3.021094) - assert inline_spacing[1] == pytest.approx(12.125) - assert crossline_spacing[0] == pytest.approx(-12.128862) - assert crossline_spacing[1] == pytest.approx(3.022056) + assert inline_spacing[0] == pytest.approx(3.02145) + assert inline_spacing[1] == pytest.approx(12.13037) + assert crossline_spacing[0] == pytest.approx(-12.12871) + assert crossline_spacing[1] == pytest.approx(3.024143) def test_coordinate_system_comparison(teleport_crossline_executor, teleport_conventional_executor): @@ -180,6 +180,9 @@ def test_samples_comparison(teleport_crossline_executor, teleport_conventional_e format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, dimensionsND=openvds.DimensionsND.Dimensions_012) + assert len(request.data) == num_z * num_crossline + assert len(request_conv.data) == num_z * num_crossline + sample_data = request.data.reshape(num_crossline, num_z) sample_data_conv = request_conv.data.reshape(num_crossline, num_z) @@ -236,6 +239,8 @@ def test_dimensions_and_produce_status(crossline_output_vds, ] if dimensions == 4: additional_args.append("--prestack") + # Prestack file has lots of empty traces? Need to ignore it. + additional_args.append("--ignore-warnings") ex = construct_crossline_executor(segy_filename, crossline_output_vds, additional_args) -- GitLab From d4d7355f0688c6bd2dc16687622f319629ddbbfd Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 13 Oct 2021 17:12:10 -0500 Subject: [PATCH 36/46] port implementation for crossline-sorted SEGY import --- src/SEGYUtils/SEGYFileInfo.cpp | 4 +- tools/SEGYImport/SEGYImport.cpp | 227 +++++++++++++++++++++++--------- 2 files changed, 169 insertions(+), 62 deletions(-) diff --git a/src/SEGYUtils/SEGYFileInfo.cpp b/src/SEGYUtils/SEGYFileInfo.cpp index e3220731..99f02f6c 100644 --- a/src/SEGYUtils/SEGYFileInfo.cpp +++ b/src/SEGYUtils/SEGYFileInfo.cpp @@ -240,7 +240,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro SEGYBinInfo outsideBinInfo; - int primaryKey = ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness), nextPrimaryKey = 0; + int primaryKey = Is2D() ? 0 : ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness), nextPrimaryKey = 0; SEGYSegmentInfo segmentInfo(primaryKey, 0, readBinInfoFromHeader(traceHeader, binInfoHeaderFields, m_headerEndianness, 1)); @@ -262,7 +262,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro } readCount++; - primaryKey = ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness); + primaryKey = Is2D() ? 0 : ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness); testOffset = readOffset(traceHeader); if (primaryKey == segmentInfo.m_primaryKey && (!IsOffsetSorted() || testOffset == currentOffset)) // expand current segment if the primary key matches diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 26321cd9..a158fceb 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -73,6 +73,13 @@ enum class TraceSpacingByOffset Auto = 2 }; +enum class PrimaryKeyValue +{ + Other = 0, + InlineNumber = 1, + CrosslineNumber = 2 +}; + int64_t GetTotalSystemMemory() { MEMORYSTATUSEX status; @@ -1383,6 +1390,26 @@ findFirstLastInlineSegmentInfos(const SEGYFileInfo& fileInfo, const std::vector< return { std::cref(fileInfo.m_segmentInfoLists[orderedListIndices.front()].front()), std::cref(fileInfo.m_segmentInfoLists[orderedListIndices.back()].back()) }; } +int +BinInfoPrimaryKeyValue(PrimaryKeyValue primaryKey, const SEGYBinInfo& binInfo) +{ + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return binInfo.m_crosslineNumber; + } + return binInfo.m_inlineNumber; +} + +int +BinInfoSecondaryKeyValue(PrimaryKeyValue primaryKey, const SEGYBinInfo& binInfo) +{ + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return binInfo.m_inlineNumber; + } + return binInfo.m_crosslineNumber; +} + double ConvertDistance(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, double distance) { @@ -1440,41 +1467,41 @@ createSEGYMetadata(DataProvider &dataProvider, SEGYFileInfo const &fileInfo, Ope } void -createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer) +createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer, PrimaryKeyValue primaryKey) { if (fileInfo.m_segmentInfoLists.empty() && fileInfo.m_segmentInfoListsByOffset.empty()) return; if (fileInfo.Is2D()) return; - double inlineSpacing[2] = { 0, 0 }; - double crosslineSpacing[2] = { 0, 0 }; + double secondarySpacing[2] = { 0, 0 }; size_t globalTotalSegments; auto orderedListIndices = getOrderedSegmentListIndices(fileInfo, globalTotalSegments); - // Determine crossline spacing - int countedCrosslineSpacings = 0; + // Determine secondary spacing + int countedSecondarySpacings = 0; auto - crosslineUpdater = [&countedCrosslineSpacings, &crosslineSpacing](const std::vector& segmentInfoList) + secondaryUpdater = [&primaryKey, &countedSecondarySpacings, &secondarySpacing](const std::vector& segmentInfoList) { for (auto const& segmentInfo : segmentInfoList) { - int crosslineCount = segmentInfo.m_binInfoStop.m_crosslineNumber - segmentInfo.m_binInfoStart.m_crosslineNumber; + const auto + secondaryCount = BinInfoSecondaryKeyValue(primaryKey, segmentInfo.m_binInfoStop) - BinInfoSecondaryKeyValue(primaryKey, segmentInfo.m_binInfoStart); - if (crosslineCount == 0 || segmentInfo.m_binInfoStart.m_inlineNumber != segmentInfo.m_binInfoStop.m_inlineNumber) continue; + if (secondaryCount == 0 || BinInfoPrimaryKeyValue(primaryKey, segmentInfo.m_binInfoStart) != BinInfoPrimaryKeyValue(primaryKey, segmentInfo.m_binInfoStop)) continue; - double segmentCrosslineSpacing[3]; + double segmentSecondarySpacing[3]; - segmentCrosslineSpacing[0] = (segmentInfo.m_binInfoStop.m_ensembleXCoordinate - segmentInfo.m_binInfoStart.m_ensembleXCoordinate) / crosslineCount; - segmentCrosslineSpacing[1] = (segmentInfo.m_binInfoStop.m_ensembleYCoordinate - segmentInfo.m_binInfoStart.m_ensembleYCoordinate) / crosslineCount; + segmentSecondarySpacing[0] = (segmentInfo.m_binInfoStop.m_ensembleXCoordinate - segmentInfo.m_binInfoStart.m_ensembleXCoordinate) / secondaryCount; + segmentSecondarySpacing[1] = (segmentInfo.m_binInfoStop.m_ensembleYCoordinate - segmentInfo.m_binInfoStart.m_ensembleYCoordinate) / secondaryCount; - crosslineSpacing[0] += segmentCrosslineSpacing[0]; - crosslineSpacing[1] += segmentCrosslineSpacing[1]; + secondarySpacing[0] += segmentSecondarySpacing[0]; + secondarySpacing[1] += segmentSecondarySpacing[1]; - countedCrosslineSpacings++; + countedSecondarySpacings++; } }; @@ -1486,50 +1513,83 @@ createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryH { for (const auto& entry : offsetSegmentMap) { - crosslineUpdater(entry.second); + secondaryUpdater(entry.second); } } } else { - crosslineUpdater(fileInfo.m_segmentInfoLists[listIndex]); + secondaryUpdater(fileInfo.m_segmentInfoLists[listIndex]); } } - if (countedCrosslineSpacings > 0) + if (countedSecondarySpacings > 0) { - crosslineSpacing[0] /= countedCrosslineSpacings; - crosslineSpacing[1] /= countedCrosslineSpacings; + secondarySpacing[0] /= countedSecondarySpacings; + secondarySpacing[1] /= countedSecondarySpacings; } else { - crosslineSpacing[0] = 0; - crosslineSpacing[1] = 1; + secondarySpacing[0] = 0; + secondarySpacing[1] = 1; } - // Determine inline spacing + // Determine primary spacing + + double primarySpacing[2] = { 0, 0 }; + const auto firstLastSegmentInfos = findFirstLastInlineSegmentInfos(fileInfo, orderedListIndices); const SEGYSegmentInfo & firstSegmentInfo = firstLastSegmentInfos.first, & lastSegmentInfo = firstLastSegmentInfos.second; - if (firstSegmentInfo.m_binInfoStart.m_inlineNumber != lastSegmentInfo.m_binInfoStart.m_inlineNumber) + if (BinInfoPrimaryKeyValue(primaryKey, firstSegmentInfo.m_binInfoStart) != BinInfoPrimaryKeyValue(primaryKey, lastSegmentInfo.m_binInfoStart)) { - int inlineNunberDelta = lastSegmentInfo.m_binInfoStart.m_inlineNumber - firstSegmentInfo.m_binInfoStart.m_inlineNumber; - int crosslineNunberDelta = lastSegmentInfo.m_binInfoStart.m_crosslineNumber - firstSegmentInfo.m_binInfoStart.m_crosslineNumber; + int primaryNunberDelta = BinInfoPrimaryKeyValue(primaryKey, lastSegmentInfo.m_binInfoStart) - BinInfoPrimaryKeyValue(primaryKey, firstSegmentInfo.m_binInfoStart); + int secondaryNunberDelta = BinInfoSecondaryKeyValue(primaryKey, lastSegmentInfo.m_binInfoStart) - BinInfoSecondaryKeyValue(primaryKey, firstSegmentInfo.m_binInfoStart); + + double offset[2] = { secondarySpacing[0] * secondaryNunberDelta, + secondarySpacing[1] * secondaryNunberDelta }; + + primarySpacing[0] = (lastSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - offset[0]) / primaryNunberDelta; + primarySpacing[1] = (lastSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - offset[1]) / primaryNunberDelta; + } + else + { + if (fileInfo.Is2D()) + { + // Headwave convention is 1, 0 for 2D inline spacing + primarySpacing[0] = ConvertDistanceInverse(segyMeasurementSystem, 1.0); + primarySpacing[1] = ConvertDistanceInverse(segyMeasurementSystem, 0.0); + } + else + { + // make square voxels + primarySpacing[0] = secondarySpacing[1]; + primarySpacing[1] = -secondarySpacing[0]; + } + } - double offset[2] = { crosslineSpacing[0] * crosslineNunberDelta, - crosslineSpacing[1] * crosslineNunberDelta }; + double + inlineSpacing[2] = {}, + crosslineSpacing[2] = {}; - inlineSpacing[0] = (lastSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - offset[0]) / inlineNunberDelta; - inlineSpacing[1] = (lastSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - offset[1]) / inlineNunberDelta; + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + // map inline to secondary, crossline to primary + inlineSpacing[0] = secondarySpacing[0]; + inlineSpacing[1] = secondarySpacing[1]; + crosslineSpacing[0] = primarySpacing[0]; + crosslineSpacing[1] = primarySpacing[1]; } else { - // make square voxels - inlineSpacing[0] = crosslineSpacing[1]; - inlineSpacing[1] = -crosslineSpacing[0]; + // map inline to primary, crossline to secondary + inlineSpacing[0] = primarySpacing[0]; + inlineSpacing[1] = primarySpacing[1]; + crosslineSpacing[0] = secondarySpacing[0]; + crosslineSpacing[1] = secondarySpacing[1]; } if (crosslineSpacing[0] == 0.0 && crosslineSpacing[1] == 1.0 && inlineSpacing[0] == 0.0) @@ -2132,11 +2192,10 @@ findFirstTrace(TraceDataManager& traceDataManager, int primaryKey, int secondary } int64_t -findFirstTrace(TraceDataManager& traceDataManager, const SEGYSegmentInfo& segment, int secondaryKey, SEGYFileInfo const& fileInfo, OpenVDS::Error error) +findFirstTrace(TraceDataManager& traceDataManager, const SEGYSegmentInfo& segment, int secondaryKey, SEGYFileInfo const& fileInfo, PrimaryKeyValue primaryKey, OpenVDS::Error error) { - // TODO do the info start and stop reflect the start and stop for reverse-sorted inlines, or are they simply min and max? - const int secondaryStart = segment.m_binInfoStart.m_crosslineNumber; - const int secondaryStop = segment.m_binInfoStop.m_crosslineNumber; + const int secondaryStart = BinInfoSecondaryKeyValue(primaryKey, segment.m_binInfoStart); + const int secondaryStop = BinInfoSecondaryKeyValue(primaryKey, segment.m_binInfoStop); return findFirstTrace(traceDataManager, segment.m_primaryKey, secondaryKey, fileInfo, segment.m_traceStart, segment.m_traceStop, secondaryStart, secondaryStop, error); } @@ -2196,9 +2255,17 @@ TraceIndex2DToEnsembleNumber(TraceInfo2DManager * pTraceInfo2DManager, int trace } int -SecondaryKeyDimension(const SEGYFileInfo& fileInfo) +SecondaryKeyDimension(const SEGYFileInfo& fileInfo, PrimaryKeyValue primaryKey) { - if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D || fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) + if ((fileInfo.m_segyType == SEGY::SEGYType::Prestack && primaryKey != PrimaryKeyValue::CrosslineNumber) || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D || fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) + { + return 2; + } + if (fileInfo.m_segyType == SEGY::SEGYType::Prestack && primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return 3; + } + if (fileInfo.m_segyType == SEGY::SEGYType::Poststack && primaryKey == PrimaryKeyValue::CrosslineNumber) { return 2; } @@ -2206,13 +2273,21 @@ SecondaryKeyDimension(const SEGYFileInfo& fileInfo) } int -PrimaryKeyDimension(const SEGYFileInfo& fileInfo) +PrimaryKeyDimension(const SEGYFileInfo& fileInfo, PrimaryKeyValue primaryKey) { - if (fileInfo.m_segyType == SEGY::SEGYType::Prestack || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D || fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) + if (fileInfo.Is4D() || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D) { // Prestack2D doesn't really have a primary key dimension, but we'll use the same one as for Prestack as sort of a placeholder. + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return 2; + } return 3; } + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return 1; + } return 2; } @@ -2435,6 +2510,8 @@ main(int argc, char* argv[]) TraceSpacingByOffset traceSpacingByOffset = TraceSpacingByOffset::Auto; std::string traceSpacingByOffsetString; + PrimaryKeyValue primaryKeyValue = PrimaryKeyValue::InlineNumber; + const std::string supportedAzimuthTypes("Azimuth (from trace header field) (default), OffsetXY (computed from OffsetX and OffsetY header fields)"); const std::string supportedAzimuthUnits("Radians, Degrees (default)"); const std::string supportedTraceSpacingTypes("Off, On, Auto (default)"); @@ -2599,6 +2676,20 @@ main(int argc, char* argv[]) ResolveAlias(primaryKey); ResolveAlias(secondaryKey); + // set the enum value for primary key + if (primaryKey == "inlinenumber") + { + primaryKeyValue = PrimaryKeyValue::InlineNumber; + } + else if (primaryKey == "crosslinenumber") + { + primaryKeyValue = PrimaryKeyValue::CrosslineNumber; + } + else + { + primaryKeyValue = PrimaryKeyValue::Other; + } + SEGY::SEGYType segyType = SEGY::SEGYType::Poststack; if (isOffsetSorted) @@ -2802,11 +2893,12 @@ main(int argc, char* argv[]) fileInfo.m_segyType = segyType; const auto + binInfoCrosslineFieldIndex = fileInfo.Is2D() ? secondaryKey.c_str() : "crosslinenumber", // for 2D need to borrow the BinInfo crossline field for ensemble number xcoordHeaderFieldIndex = fileInfo.Is2D() ? "sourcexcoordinate" : "ensemblexcoordinate", ycoordHeaderFieldIndex = fileInfo.Is2D() ? "sourceycoordinate" : "ensembleycoordinate"; SEGYBinInfoHeaderFields - binInfoHeaderFields(g_traceHeaderFields[primaryKey], g_traceHeaderFields[secondaryKey], g_traceHeaderFields["coordinatescale"], g_traceHeaderFields[xcoordHeaderFieldIndex], g_traceHeaderFields[ycoordHeaderFieldIndex], scale); + binInfoHeaderFields(g_traceHeaderFields["inlinenumber"], g_traceHeaderFields[binInfoCrosslineFieldIndex], g_traceHeaderFields["coordinatescale"], g_traceHeaderFields[xcoordHeaderFieldIndex], g_traceHeaderFields[ycoordHeaderFieldIndex], scale); // Scan the file if '--scan' was passed or we're uploading but no fileInfo file was specified if (scan || fileInfoFileNames.empty()) @@ -3162,9 +3254,10 @@ main(int argc, char* argv[]) // Create channel descriptors std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit, isAzimuth, isMutes); - if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Prestack || fileInfo.IsOffsetSorted()) + if (primaryKeyValue == PrimaryKeyValue::InlineNumber || primaryKeyValue == PrimaryKeyValue::CrosslineNumber) { - createSurveyCoordinateSystemMetadata(fileInfo, segyMeasurementSystem, crsWkt, metadataContainer); + // only create the lattice metadata if the primary key is Inline or Crossline, else we may not have Inline/Crossline bin data + createSurveyCoordinateSystemMetadata(fileInfo, segyMeasurementSystem, crsWkt, metadataContainer, primaryKeyValue); } create2DTraceInformationMetadata(is2D, traceInfo2DManager.get(), metadataContainer, error); @@ -3308,6 +3401,10 @@ main(int argc, char* argv[]) traceDataManagers.emplace_back(dataViewManagers.back(), 128, traceByteSize, fileInfo.m_traceCounts[fileIndex]); } + const int + primaryKeyDimension = PrimaryKeyDimension(fileInfo, primaryKeyValue), + secondaryKeyDimension = SecondaryKeyDimension(fileInfo, primaryKeyValue); + std::vector chunkInfos; chunkInfos.resize(amplitudeAccessor->GetChunkCount()); std::vector dataRequests; @@ -3322,8 +3419,8 @@ main(int argc, char* argv[]) OpenVDS::Error traceIndexError; - chunkInfo.secondaryKeyStart = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.min[SecondaryKeyDimension(fileInfo)], traceIndexError) : (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[SecondaryKeyDimension(fileInfo)]) + 0.5f); - chunkInfo.secondaryKeyStop = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.max[SecondaryKeyDimension(fileInfo)] - 1, traceIndexError) : (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[SecondaryKeyDimension(fileInfo)] - 1) + 0.5f); + chunkInfo.secondaryKeyStart = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.min[secondaryKeyDimension], traceIndexError) : (int)floorf(layout->GetAxisDescriptor(secondaryKeyDimension).SampleIndexToCoordinate(chunkInfo.min[secondaryKeyDimension]) + 0.5f); + chunkInfo.secondaryKeyStop = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.max[secondaryKeyDimension] - 1, traceIndexError) : (int)floorf(layout->GetAxisDescriptor(secondaryKeyDimension).SampleIndexToCoordinate(chunkInfo.max[secondaryKeyDimension] - 1) + 0.5f); if (fileInfo.Is2D() && traceIndexError.code != 0) { @@ -3331,8 +3428,8 @@ main(int argc, char* argv[]) break; } - chunkInfo.primaryKeyStart = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[PrimaryKeyDimension(fileInfo)]) + 0.5f); - chunkInfo.primaryKeyStop = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[PrimaryKeyDimension(fileInfo)] - 1) + 0.5f); + chunkInfo.primaryKeyStart = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(primaryKeyDimension).SampleIndexToCoordinate(chunkInfo.min[primaryKeyDimension]) + 0.5f); + chunkInfo.primaryKeyStop = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(primaryKeyDimension).SampleIndexToCoordinate(chunkInfo.max[primaryKeyDimension] - 1) + 0.5f); // For each input file, find the lower/upper segments and then add data requests to that file's traceDataManager @@ -3556,7 +3653,7 @@ main(int argc, char* argv[]) } else { - firstTrace = findFirstTrace(traceDataManager, *segment, chunkInfo.secondaryKeyStart, fileInfo, error); + firstTrace = findFirstTrace(traceDataManager, *segment, chunkInfo.secondaryKeyStart, fileInfo, primaryKeyValue, error); if (error.code) { OpenVDS::printWarning(jsonOutput, "IO", "Failed when reading data", fmt::format("{}", error.code), error.string); @@ -3568,11 +3665,17 @@ main(int argc, char* argv[]) tertiaryIndex = 0, currentSecondaryKey = chunkInfo.secondaryKeyStart; - auto - gatherSpacing = CalculateGatherSpacing(fileInfo, fold, gatherOffsetValues, traceDataManager, traceSpacingByOffset, firstTrace, jsonOutput); + std::unique_ptr gatherSpacing; for (int64_t trace = firstTrace; trace <= segment->m_traceStop; trace++, tertiaryIndex++) { + if (!static_cast(gatherSpacing)) + { + // get the first GatherSpacing + // (do it here instead of before the loop to handle the case where 'firstTrace' is past the segment) + gatherSpacing = CalculateGatherSpacing(fileInfo, fold, gatherOffsetValues, traceDataManager, traceSpacingByOffset, firstTrace, jsonOutput); + } + const char* header = traceDataManager.getTraceData(trace, error); if (error.code) { @@ -3616,12 +3719,12 @@ main(int argc, char* argv[]) } else { - primaryIndex = layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).CoordinateToSampleIndex((float)segment->m_primaryKey); - secondaryIndex = layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).CoordinateToSampleIndex((float)secondaryTest); + primaryIndex = layout->GetAxisDescriptor(primaryKeyDimension).CoordinateToSampleIndex((float)segment->m_primaryKey); + secondaryIndex = layout->GetAxisDescriptor(secondaryKeyDimension).CoordinateToSampleIndex((float)secondaryTest); } - assert(primaryIndex >= chunkInfo.min[PrimaryKeyDimension(fileInfo)] && primaryIndex < chunkInfo.max[PrimaryKeyDimension(fileInfo)]); - assert(secondaryIndex >= chunkInfo.min[SecondaryKeyDimension(fileInfo)] && secondaryIndex < chunkInfo.max[SecondaryKeyDimension(fileInfo)]); + assert(primaryIndex >= chunkInfo.min[primaryKeyDimension] && primaryIndex < chunkInfo.max[primaryKeyDimension]); + assert(secondaryIndex >= chunkInfo.min[secondaryKeyDimension] && secondaryIndex < chunkInfo.max[secondaryKeyDimension]); if (fileInfo.Is4D()) { @@ -3656,29 +3759,33 @@ main(int argc, char* argv[]) continue; } + const int + targetPrimaryIndex = primaryKeyValue == PrimaryKeyValue::CrosslineNumber ? secondaryIndex : primaryIndex, + targetSecondaryIndex = primaryKeyValue == PrimaryKeyValue::CrosslineNumber ? primaryIndex : secondaryIndex; + { - const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, &reinterpret_cast(amplitudeBuffer)[targetOffset], chunkInfo.sampleStart, chunkInfo.sampleCount); } if (traceFlagBuffer) { - const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, traceFlagPitch); + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, traceFlagPitch); reinterpret_cast(traceFlagBuffer)[targetOffset] = true; } if (segyTraceHeaderBuffer) { - const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, segyTraceHeaderPitch); + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, segyTraceHeaderPitch); memcpy(&reinterpret_cast(segyTraceHeaderBuffer)[targetOffset], header, SEGY::TraceHeaderSize); } if (offsetBuffer) { - const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, offsetPitch); + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, offsetPitch); const auto traceOffset = ConvertDistance(segyMeasurementSystem, static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness))); @@ -3712,7 +3819,7 @@ main(int argc, char* argv[]) azimuth = fmod(450.0f - azimuth, 360.0f); } - const int targetOffset = VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, azimuthPitch); + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, azimuthPitch); static_cast(azimuthBuffer)[targetOffset] = azimuth; } @@ -3725,7 +3832,7 @@ main(int argc, char* argv[]) // Need to multiply by 2 the result of VoxelIndexToDataIndex, because it gives the two-component offset, and we want the plain offset into a uint16_t buffer const int - targetOffset = 2 * VoxelIndexToDataIndex(fileInfo, primaryIndex, secondaryIndex, tertiaryIndex, chunkInfo.min, mutePitch); + targetOffset = 2 * VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, mutePitch); static_cast(muteBuffer)[targetOffset] = muteStartTime; static_cast(muteBuffer)[targetOffset + 1] = muteEndTime; -- GitLab From 7aae76a21bada5b30262809ed97f3d37e1718699 Mon Sep 17 00:00:00 2001 From: Jim King Date: Thu, 14 Oct 2021 12:42:58 -0500 Subject: [PATCH 37/46] fix test --- tests/tools/SEGYImport/pytests/test_crossline_sorted.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py index 4ffb9c44..45847efe 100644 --- a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py @@ -233,14 +233,12 @@ def test_dimensions_and_produce_status(crossline_output_vds, additional_args = [ "--header-field", "InlineNumber=17:4", - "--header-field", "CrosslineNumber=17:4", + "--header-field", "CrosslineNumber=21:4", "--header-field", "EnsembleXCoordinateByteLocation=73:4", "--header-field", "EnsembleYCoordinateByteLocation=77:4" ] if dimensions == 4: additional_args.append("--prestack") - # Prestack file has lots of empty traces? Need to ignore it. - additional_args.append("--ignore-warnings") ex = construct_crossline_executor(segy_filename, crossline_output_vds, additional_args) @@ -253,8 +251,6 @@ def test_dimensions_and_produce_status(crossline_output_vds, layout = openvds.getLayout(handle) access_manager = openvds.getAccessManager(handle) - sample_channel_name = layout.getChannelName(0) - # check produce status if dimensions == 4: -- GitLab From e446eec4e638e338be48257cc2e2048613385b76 Mon Sep 17 00:00:00 2001 From: Jim King Date: Thu, 14 Oct 2021 15:55:47 -0500 Subject: [PATCH 38/46] fix test --- .../pytests/test_crossline_sorted.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py index 45847efe..9cf4bc2e 100644 --- a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py @@ -207,15 +207,15 @@ prestack_axes = [ ] poststack_survey_vectors = [ - (76290.81610169, 949398.18641265), - (-15.5, 19.609375), - (19.60459195, 15.49621929) + (76276.3367, 949408.127), + (-15.496337, 19.612673), + (19.610, 15.48960) ] prestack_survey_vectors = [ - (530620.06457521, 5789005.97294581), - (-15.51, 19.625), - (19.63167672, 15.51527674) + (530768.667, 5788901.760), + (-15.5040, 19.611520), + (19.555556, 15.573333) ] poststack_segy_filename = os.path.join(platform_integration_test_data_dir(), "ImporterTests", @@ -234,8 +234,8 @@ def test_dimensions_and_produce_status(crossline_output_vds, additional_args = [ "--header-field", "InlineNumber=17:4", "--header-field", "CrosslineNumber=21:4", - "--header-field", "EnsembleXCoordinateByteLocation=73:4", - "--header-field", "EnsembleYCoordinateByteLocation=77:4" + "--header-field", "EnsembleXCoordinate=73:4", + "--header-field", "EnsembleYCoordinate=77:4" ] if dimensions == 4: additional_args.append("--prestack") @@ -288,9 +288,9 @@ def test_dimensions_and_produce_status(crossline_output_vds, # check survey coordinate system vectors origin, inline_spacing, crossline_spacing = get_scs_metadata_vectors(layout) - assert origin[0] == scs_vectors[0][0] - assert origin[1] == scs_vectors[0][1] - assert inline_spacing[0] == scs_vectors[1][0] - assert inline_spacing[1] == scs_vectors[1][1] - assert crossline_spacing[0] == scs_vectors[2][0] - assert crossline_spacing[1] == scs_vectors[2][1] + assert origin[0] == pytest.approx(scs_vectors[0][0]) + assert origin[1] == pytest.approx(scs_vectors[0][1]) + assert inline_spacing[0] == pytest.approx(scs_vectors[1][0]) + assert inline_spacing[1] == pytest.approx(scs_vectors[1][1]) + assert crossline_spacing[0] == pytest.approx(scs_vectors[2][0]) + assert crossline_spacing[1] == pytest.approx(scs_vectors[2][1]) -- GitLab From 48f2511cae017e45876217de801978ce8c1ab730 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 26 Oct 2021 18:29:01 -0500 Subject: [PATCH 39/46] fix test --- tests/tools/SEGYImport/pytests/test_azimuth_channel.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py index 841e9ae1..4b435bb2 100644 --- a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py +++ b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py @@ -35,7 +35,6 @@ def azimuth_from_azimuth_executor(azimuth_degrees_segy, output_vds) -> Tuple[Imp ex.add_args(["--azimuth-type", "azimuth"]) ex.add_args(["--azimuth-unit", "degrees"]) ex.add_arg("--prestack") - ex.add_arg("--order-by-offset=false") ex.add_args(["--vdsfile", output_vds.filename]) # Need to ignore warnings because this data is only one segment @@ -56,7 +55,6 @@ def azimuth_from_offset_xy_executor(azimuth_offset_xy_segy, output_vds) -> Tuple ex.add_arg("--azimuth") ex.add_args(["--azimuth-type", "offsetxy"]) ex.add_arg("--prestack") - ex.add_arg("--order-by-offset=false") ex.add_args(["--vdsfile", output_vds.filename]) ex.add_arg(azimuth_offset_xy_segy) -- GitLab From dc170ab9a4892792227d18e8c3979939e9d3f344 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 26 Oct 2021 18:29:55 -0500 Subject: [PATCH 40/46] re-order chunks consistent with input data order when importing crossline-sorted SEGY --- tools/SEGYImport/SEGYImport.cpp | 81 +++++++++++++++++++++++++++++---- 1 file changed, 73 insertions(+), 8 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index a158fceb..b8edd17f 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #if defined(WIN32) #undef WIN32_LEAN_AND_MEAN // avoid warnings if defined on command line @@ -2458,7 +2459,7 @@ main(int argc, char* argv[]) bool is_tty = isatty(fileno(stdout)); #endif //auto start_time = std::chrono::high_resolution_clock::now(); - cxxopts::Options options("SEGYImport", "SEGYImport - A tool to scan and import a SEG-Y file to a volume data store (VDS)\n\nUse -H or see online documentation for connection string paramters:\nhttp://osdu.pages.community.opengroup.org/platform/domain-data-mgmt-services/seismic/open-vds/connection.html\n"); + cxxopts::Options options("SEGYImport", "SEGYImport - A tool to scan and import a SEG-Y file to a volume data store (VDS)\n\nUse -H or see online documentation for connection string parameters:\nhttp://osdu.pages.community.opengroup.org/platform/domain-data-mgmt-services/seismic/open-vds/connection.html\n"); options.positional_help(""); std::string headerFormatFileName; @@ -2509,6 +2510,7 @@ main(int argc, char* argv[]) TraceSpacingByOffset traceSpacingByOffset = TraceSpacingByOffset::Auto; std::string traceSpacingByOffsetString; + bool isDisableCrosslineReordering = false; PrimaryKeyValue primaryKeyValue = PrimaryKeyValue::InlineNumber; @@ -2568,9 +2570,7 @@ main(int argc, char* argv[]) options.add_option("", "", "azimuth-unit", std::string("Azimuth unit. Supported azimuth units are: ") + supportedAzimuthUnits + ".", cxxopts::value(azimuthUnitString), ""); options.add_option("", "", "azimuth-scale", "Azimuth scale factor. Trace header field Azimuth values will be multiplied by this factor.", cxxopts::value(azimuthScaleFactor), ""); options.add_option("", "", "respace-gathers", std::string("Respace traces in prestack gathers by Offset trace header field. Supported options are: ") + supportedTraceSpacingTypes + ".", cxxopts::value(traceSpacingByOffsetString), ""); - - // TODO remove traceOrderByOffset option - options.add_option("", "", "order-by-offset", "Order traces within a gather by offset.", cxxopts::value(traceOrderByOffset), ""); + options.add_option("", "", "disable-crossline-input-reorder", "Disable VDS chunk reordering for crossline-sorted SEGY. This option will probably reduce performance.", cxxopts::value(isDisableCrosslineReordering), ""); options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); options.add_option("", "H", "help-connection", "Print help information about the connection string", cxxopts::value(helpConnection), ""); @@ -3382,6 +3382,13 @@ main(int argc, char* argv[]) lowerUpperSegmentIndices; }; + // chunk information for iterating crossline-sorted data in input order (instead of default chunk order) + std::array + dimChunkCounts = { 0, 0, 0, 0 }, + dimChunkPitches = { 1, 1, 1, 1 }; + const auto + dimensionality = axisDescriptors.size(); + // limit DataViewManager's memory use to 1.5 sets of brick inlines const int64_t dvmMemoryLimit = 3LL * (writeDimensionGroup == OpenVDS::DimensionsND::Dimensions_01 ? 1 : brickSize) * axisDescriptors[1].GetNumSamples() * fileInfo.TraceByteSize() / 2LL; @@ -3414,6 +3421,25 @@ main(int argc, char* argv[]) auto &chunkInfo = chunkInfos[chunk]; amplitudeAccessor->GetChunkMinMax(chunk, chunkInfo.min, chunkInfo.max); + if (primaryKeyValue == PrimaryKeyValue::CrosslineNumber) + { + for (auto i = 0; i < dimChunkCounts.size(); ++i) + { + bool isZero = true; + for (auto j = 0; isZero && j < dimChunkCounts.size(); ++j) + { + if (j != i) + { + isZero = chunkInfo.min[j] == 0; + } + } + if (isZero) + { + ++dimChunkCounts[i]; + } + } + } + chunkInfo.sampleStart = chunkInfo.min[0]; chunkInfo.sampleCount = chunkInfo.max[0] - chunkInfo.min[0]; @@ -3507,9 +3533,14 @@ main(int argc, char* argv[]) } } - for (int64_t chunk = 0; chunk < amplitudeAccessor->GetChunkCount() && error.code == 0; chunk++) + for (size_t i = 1; i < dimChunkCounts.size(); ++i) { - int new_percentage = int(double(chunk) / amplitudeAccessor->GetChunkCount() * 100); + dimChunkPitches[i] = dimChunkPitches[i - 1] * dimChunkCounts[i - 1]; + } + + for (int64_t chunkSequence = 0; chunkSequence < amplitudeAccessor->GetChunkCount() && error.code == 0; chunkSequence++) + { + int new_percentage = int(double(chunkSequence) / amplitudeAccessor->GetChunkCount() * 100); if (!jsonOutput && is_tty && percentage != new_percentage) { percentage = new_percentage; @@ -3530,13 +3561,47 @@ main(int argc, char* argv[]) } } + auto + chunk = chunkSequence; + + if (primaryKeyValue == PrimaryKeyValue::CrosslineNumber && (dimensionality == 3 || dimensionality == 4) && !isDisableCrosslineReordering) + { + // recompute the chunk number for the crossline-sorted chunk corresponding to the current sequence number + + std::array chunkCoords = { 0, 0, 0, 0 }; + if (dimensionality == 3) + { + auto dv = std::div(chunkSequence, dimChunkCounts[0]); + chunkCoords[0] = dv.rem; + dv = std::div(dv.quot, dimChunkCounts[2]); // 2 not 1 because we're working in crossline-major space + chunkCoords[1] = dv.quot; + chunkCoords[2] = dv.rem; + } + else + { + auto dv = std::div(chunkSequence, dimChunkCounts[0]); + chunkCoords[0] = dv.rem; + dv = std::div(dv.quot, dimChunkCounts[1]); + chunkCoords[1] = dv.rem; + dv = std::div(dv.quot, dimChunkCounts[3]); // 3 not 2 because we're working in crossline-major space + chunkCoords[2] = dv.quot; + chunkCoords[3] = dv.rem; + } + + chunk = 0; + for (size_t i = 0; i < chunkCoords.size(); ++i) + { + chunk += chunkCoords[i] * dimChunkPitches[i]; + } + } + auto &chunkInfo = chunkInfos[chunk]; // if we've crossed to a new inline then trim the trace page cache if (chunk > 0) { - // TODO alternate version for offset sorted + // TODO alternate versions for offset sorted and crossline sorted const auto& previousChunkInfo = chunkInfos[chunk - 1]; @@ -3649,7 +3714,7 @@ main(int argc, char* argv[]) { // For unbinned gathers the secondary key is the 1-based index of the trace, so to get the // first trace we convert the index to 0-based and add that to the segment's start trace. - firstTrace = segment->m_traceStart + (chunkInfo.secondaryKeyStart - 1); + firstTrace = segment->m_traceStart + (chunkInfo.secondaryKeyStart - 1L); } else { -- GitLab From 982cf64df06f19baf1f508e0a2ac8c15db57ee46 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 26 Oct 2021 18:30:46 -0500 Subject: [PATCH 41/46] add crossline-sorted test for large-ish Volve SEGY file --- .../pytests/segyimport_test_config.py | 5 ++++ .../pytests/test_crossline_sorted.py | 28 ++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/tools/SEGYImport/pytests/segyimport_test_config.py b/tests/tools/SEGYImport/pytests/segyimport_test_config.py index fdcd8148..9d3c9af8 100644 --- a/tests/tools/SEGYImport/pytests/segyimport_test_config.py +++ b/tests/tools/SEGYImport/pytests/segyimport_test_config.py @@ -75,3 +75,8 @@ def segyimport_test_data_dir() -> str: def platform_integration_test_data_dir() -> str: return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration") + + +@pytest.fixture +def volve_data_dir() -> str: + return "D:\\SEGY\\Equinor\\Volve" diff --git a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py index 9cf4bc2e..b77f8590 100644 --- a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py @@ -6,7 +6,7 @@ import pytest import os from segyimport_test_config import ImportExecutor, TempVDSGuard, segyimport_test_data_dir, teleport_test_data_dir, \ - platform_integration_test_data_dir + platform_integration_test_data_dir, volve_data_dir def construct_crossline_executor(segy_filename: str, output_vds: TempVDSGuard, @@ -43,6 +43,11 @@ def teleport_crossline_sorted_segy(segyimport_test_data_dir) -> str: return os.path.join(segyimport_test_data_dir, "CrosslineSorted", "ST0202R08_TIME_crossline_sorted.segy") +@pytest.fixture +def volve_crossline_sorted_segy(volve_data_dir) -> str: + return os.path.join(volve_data_dir, "ST0202", "Stacks", "ST0202R08_PS_PSDM_FULL_OFFSET_PP_TIME.MIG_FIN.POST_STACK.3D.JS-017534_crossline_sorted.segy") + + @pytest.fixture def teleport_crossline_executor(teleport_crossline_sorted_segy, crossline_output_vds)\ -> Tuple[ImportExecutor, TempVDSGuard]: @@ -142,6 +147,7 @@ def test_samples_comparison(teleport_crossline_executor, teleport_conventional_e result = xl_ex.run() + s = xl_ex.command_line() assert result == 0, xl_ex.output() assert Path(xl_output_vds.filename).exists() @@ -294,3 +300,23 @@ def test_dimensions_and_produce_status(crossline_output_vds, assert inline_spacing[1] == pytest.approx(scs_vectors[1][1]) assert crossline_spacing[0] == pytest.approx(scs_vectors[2][0]) assert crossline_spacing[1] == pytest.approx(scs_vectors[2][1]) + + +@pytest.mark.parametrize("disable_crossline_ordering", [False, True]) +def test_volve_crossline_sorted(volve_crossline_sorted_segy, crossline_output_vds, disable_crossline_ordering: bool): + """ + Executes SEGYImport on a large-ish crossline-sorted poststack SEGY from Volve. + + This test is mostly meant to compare execution time with and without the crossline input + re-ordering, so the test itself does minimal checks. + """ + additional_args = [] + if disable_crossline_ordering: + additional_args.append("--disable-crossline-input-reorder") + + ex = construct_crossline_executor(volve_crossline_sorted_segy, crossline_output_vds, additional_args) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(crossline_output_vds.filename).exists() -- GitLab From 04a4c2a30ae7326cb365ea24ab46e393852c8e10 Mon Sep 17 00:00:00 2001 From: Jim King Date: Tue, 26 Oct 2021 18:31:23 -0500 Subject: [PATCH 42/46] prototype of chunk ordering algorithm --- .../pytests/crossline_chunk_order.py | 287 ++++++++++++++++++ 1 file changed, 287 insertions(+) create mode 100644 tests/tools/SEGYImport/pytests/crossline_chunk_order.py diff --git a/tests/tools/SEGYImport/pytests/crossline_chunk_order.py b/tests/tools/SEGYImport/pytests/crossline_chunk_order.py new file mode 100644 index 00000000..f87e8e8a --- /dev/null +++ b/tests/tools/SEGYImport/pytests/crossline_chunk_order.py @@ -0,0 +1,287 @@ +""" +Testing/prototyping code for ordering chunks for crossline-sorted data. +""" +import math +import time +from contextlib import redirect_stdout +from timeit import timeit + +import openvds + + +def create_3d_vds(): + layout_descriptor = openvds.VolumeDataLayoutDescriptor(openvds.VolumeDataLayoutDescriptor.BrickSize.BrickSize_64, + 0, 0, 4, + openvds.VolumeDataLayoutDescriptor.LODLevels.LODLevels_None, + openvds.VolumeDataLayoutDescriptor.Options.Options_None) + compression_method = openvds.CompressionMethod(0) + compression_tolerance = 0.01 + + sample_samples = 500 + crossline_samples = 800 + inline_samples = 350 + + axis_descriptors = [ + openvds.VolumeDataAxisDescriptor(sample_samples, openvds.KnownAxisNames.sample(), openvds.KnownUnitNames.millisecond(), 0.0, + (sample_samples - 1) * 4.0), + openvds.VolumeDataAxisDescriptor(crossline_samples, openvds.KnownAxisNames.crossline(), openvds.KnownUnitNames.unitless(), 2000.0, + 2000.0 + (crossline_samples - 1) * 4), + openvds.VolumeDataAxisDescriptor(inline_samples, openvds.KnownAxisNames.inline(), openvds.KnownUnitNames.unitless(), 1000.0, + 1000.0 + (inline_samples - 1) * 2), + ] + channel_descriptors = [openvds.VolumeDataChannelDescriptor(openvds.VolumeDataChannelDescriptor.Format.Format_R32, + openvds.VolumeDataChannelDescriptor.Components.Components_1, + "Amplitude", openvds.KnownUnitNames.millisecond(), + -20000.0, 20000.0) + ] + + metaData = openvds.MetadataContainer() + metaData.setMetadataDoubleVector2(openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name, (1234.0, 4321.0)) + + vds = openvds.create("C:\\temp\\SEGY\\t\\crossline_test.vds", "", layout_descriptor, axis_descriptors, channel_descriptors, metaData, + compression_method, compression_tolerance) + + return vds + + +def create_4d_vds(): + layout_descriptor = openvds.VolumeDataLayoutDescriptor(openvds.VolumeDataLayoutDescriptor.BrickSize.BrickSize_64, + 0, 0, 4, + openvds.VolumeDataLayoutDescriptor.LODLevels.LODLevels_None, + openvds.VolumeDataLayoutDescriptor.Options.Options_None) + compression_method = openvds.CompressionMethod(0) + compression_tolerance = 0.01 + + sample_samples = 500 + trace_samples = 100 + crossline_samples = 800 + inline_samples = 350 + + axis_descriptors = [ + openvds.VolumeDataAxisDescriptor(sample_samples, openvds.KnownAxisNames.sample(), openvds.KnownUnitNames.millisecond(), 0.0, + (sample_samples - 1) * 4.0), + openvds.VolumeDataAxisDescriptor(trace_samples, "Trace (offset)", openvds.KnownUnitNames.unitless(), 1.0, + trace_samples + 1.0), + openvds.VolumeDataAxisDescriptor(crossline_samples, openvds.KnownAxisNames.crossline(), openvds.KnownUnitNames.unitless(), 2000.0, + 2000.0 + (crossline_samples - 1) * 4), + openvds.VolumeDataAxisDescriptor(inline_samples, openvds.KnownAxisNames.inline(), openvds.KnownUnitNames.unitless(), 1000.0, + 1000.0 + (inline_samples - 1) * 2), + ] + channel_descriptors = [openvds.VolumeDataChannelDescriptor(openvds.VolumeDataChannelDescriptor.Format.Format_R32, + openvds.VolumeDataChannelDescriptor.Components.Components_1, + "Amplitude", openvds.KnownUnitNames.millisecond(), + -20000.0, 20000.0) + ] + + metaData = openvds.MetadataContainer() + metaData.setMetadataDoubleVector2(openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name, (1234.0, 4321.0)) + + vds = openvds.create("C:\\temp\\SEGY\\t\\crossline_test.vds", "", layout_descriptor, axis_descriptors, channel_descriptors, metaData, + compression_method, compression_tolerance) + + return vds + + +def create_vds_and_iterate_crossline_chunks_3d(): + vds = create_3d_vds() + + manager = openvds.getAccessManager(vds) + accessor = manager.createVolumeDataPageAccessor(openvds.DimensionsND.Dimensions_012, 0, 0, 8, + openvds.IVolumeDataAccessManager.AccessMode.AccessMode_Create, 1024) + + sample_axis_chunk_count = 0 + crossline_axis_chunk_count = 0 + inline_axis_chunk_count = 0 + + for chunk_num in range(accessor.getChunkCount()): + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + if chunk_min[0] == 0 and chunk_min[1] == 0: + inline_axis_chunk_count += 1 + if chunk_min[0] == 0 and chunk_min[2] == 0: + crossline_axis_chunk_count += 1 + if chunk_min[1] == 0 and chunk_min[2] == 0: + sample_axis_chunk_count += 1 + + print() + print("Gen 1 3D VDS chunk iteration") + print() + print(f"sample axis chunk count: {sample_axis_chunk_count}") + print(f"crossline axis chunk count: {crossline_axis_chunk_count}") + print(f"inline axis chunk count: {inline_axis_chunk_count}") + print(f"total chunks: {accessor.getChunkCount()}") + + chunks_visited = set() + + for chunk_sequence in range(accessor.getChunkCount()): + # convert sequence number to chunk number for crossline-sorted input + + # xl_chunk_coord = chunk_sequence // (inline_axis_chunk_count * sample_axis_chunk_count) + # il_chunk_coord = (chunk_sequence % (inline_axis_chunk_count * sample_axis_chunk_count)) // sample_axis_chunk_count + # sample_chunk_coord = chunk_sequence % sample_axis_chunk_count + + working, sample_chunk_coord = divmod(chunk_sequence, sample_axis_chunk_count) + xl_chunk_coord, il_chunk_coord = divmod(working, inline_axis_chunk_count) + + chunk_num = il_chunk_coord * crossline_axis_chunk_count * sample_axis_chunk_count + xl_chunk_coord * sample_axis_chunk_count + sample_chunk_coord + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + print(f"{chunk_sequence:3} {chunk_num:3} ({chunk_min[0]:3}, {chunk_min[1]:3}, {chunk_min[2]:3})") + chunks_visited.add(chunk_num) + + print() + print(f"Visited chunk count: {len(chunks_visited)}") + + +def create_vds_and_iterate_crossline_chunks_4d(): + vds = create_4d_vds() + + manager = openvds.getAccessManager(vds) + accessor = manager.createVolumeDataPageAccessor(openvds.DimensionsND.Dimensions_012, 0, 0, 8, + openvds.IVolumeDataAccessManager.AccessMode.AccessMode_Create, 1024) + + sample_axis_chunk_count = 0 + trace_axis_chunk_count = 0 + crossline_axis_chunk_count = 0 + inline_axis_chunk_count = 0 + + for chunk_num in range(accessor.getChunkCount()): + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + if chunk_min[0] == 0 and chunk_min[1] == 0 and chunk_min[2] == 0: + inline_axis_chunk_count += 1 + if chunk_min[0] == 0 and chunk_min[1] == 0 and chunk_min[3] == 0: + crossline_axis_chunk_count += 1 + if chunk_min[0] == 0 and chunk_min[2] == 0 and chunk_min[3] == 0: + trace_axis_chunk_count += 1 + if chunk_min[1] == 0 and chunk_min[2] == 0 and chunk_min[3] == 0: + sample_axis_chunk_count += 1 + + print() + print("Gen 1 4D VDS chunk iteration") + print() + print(f"sample axis chunk count: {sample_axis_chunk_count}") + print(f"trace axis chunk count: {trace_axis_chunk_count}") + print(f"crossline axis chunk count: {crossline_axis_chunk_count}") + print(f"inline axis chunk count: {inline_axis_chunk_count}") + print(f"total chunks: {accessor.getChunkCount()}") + + chunks_visited = set() + + for chunk_sequence in range(accessor.getChunkCount()): + # convert sequence number to chunk number for 4D crossline-sorted input + + working, sample_chunk_coord = divmod(chunk_sequence, sample_axis_chunk_count) + working, trace_chunk_coord = divmod(working, trace_axis_chunk_count) + xl_chunk_coord, il_chunk_coord = divmod(working, inline_axis_chunk_count) + + chunk_num = il_chunk_coord * crossline_axis_chunk_count * trace_axis_chunk_count * sample_axis_chunk_count\ + + xl_chunk_coord * trace_axis_chunk_count * sample_axis_chunk_count\ + + trace_chunk_coord * sample_axis_chunk_count\ + + sample_chunk_coord + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + print(f"{chunk_sequence:5} {chunk_num:5} ({chunk_min[0]:3}, {chunk_min[1]:3}, {chunk_min[2]:3}, {chunk_min[3]:3})") + chunks_visited.add(chunk_num) + + print() + print(f"Visited chunk count: {len(chunks_visited)}") + + +def iterate_crossline_chunks_universal(vds_handle): + layout = openvds.getLayout(vds_handle) + dimensionality = layout.getDimensionality() + assert dimensionality == 3 or dimensionality == 4 + + manager = openvds.getAccessManager(vds_handle) + accessor = manager.createVolumeDataPageAccessor(openvds.DimensionsND.Dimensions_012, 0, 0, 8, + openvds.IVolumeDataAccessManager.AccessMode.AccessMode_Create, 1024) + + dim_chunk_counts = [0, 0, 0, 0] + + for chunk_num in range(accessor.getChunkCount()): + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + for i in range(len(dim_chunk_counts)): + is_zero = True + j = 0 + while is_zero and j < len(dim_chunk_counts): + if j != i: + is_zero = chunk_min[j] == 0 + j += 1 + if is_zero: + dim_chunk_counts[i] += 1 + + for i in range(len(dim_chunk_counts)): + print(f"dim{i} axis chunk count: {dim_chunk_counts[i]}") + print() + print(f"total chunks: accessor: {accessor.getChunkCount()} counts product: {math.prod(dim_chunk_counts)}") + + pitches = [1, 1, 1, 1] + for i in range(1, len(dim_chunk_counts)): + pitches[i] = pitches[i - 1] * dim_chunk_counts[i - 1] + + chunks_visited = set() + + for chunk_sequence in range(accessor.getChunkCount()): + # convert sequence number to chunk number for 4D crossline-sorted input + + chunk_coords = [0, 0, 0, 0] + if dimensionality == 3: + working, chunk_coords[0] = divmod(chunk_sequence, dim_chunk_counts[0]) + chunk_coords[1], chunk_coords[2] = divmod(working, dim_chunk_counts[2]) # 2 not 1 because we're working in crossline-major space + elif dimensionality == 4: + working, chunk_coords[0] = divmod(chunk_sequence, dim_chunk_counts[0]) + working, chunk_coords[1] = divmod(working, dim_chunk_counts[1]) + chunk_coords[2], chunk_coords[3] = divmod(working, dim_chunk_counts[3]) # 3 not 2 because we're working in crossline-major space + else: + raise ValueError(f"Invalid dimensionality: {dimensionality}") + + chunk_num = 0 + for i in range(len(chunk_coords)): + chunk_num += chunk_coords[i] * pitches[i] + + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + print( + f"{chunk_sequence:5} {chunk_num:5} ({chunk_min[0]:3}, {chunk_min[1]:3}, {chunk_min[2]:3}, {chunk_min[3]:3})") + chunks_visited.add(chunk_num) + + print() + print(f"Visited chunk count: {len(chunks_visited)}") + + +def create_3d_vds_and_iterate_universal(): + vds = create_3d_vds() + + print() + print("Gen 2 (universal) 3D VDS chunk iteration") + print() + + iterate_crossline_chunks_universal(vds) + + +def create_4d_vds_and_iterate_universal(): + vds = create_4d_vds() + + print() + print("Gen 2 (universal) 4D VDS chunk iteration") + print() + + iterate_crossline_chunks_universal(vds) + + +if __name__ == "__main__": + is_poststack = False + + if is_poststack: + create_vds_and_iterate_crossline_chunks_3d() + create_3d_vds_and_iterate_universal() + else: + with open("C:\\temp\\SEGY\\t\\chunk_order.txt", "w") as f: + with redirect_stdout(f): + start = time.perf_counter() + create_vds_and_iterate_crossline_chunks_4d() + stop1 = time.perf_counter() + create_4d_vds_and_iterate_universal() + stop2 = time.perf_counter() + + print() + print(f"Gen 1 elapsed: {stop1 - start}") + print(f"Gen 2 elapsed: {stop2 - stop1}") -- GitLab From 5324d36d51268cc3f3c7bd673d8845b19cd28b6a Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 27 Oct 2021 11:57:04 -0500 Subject: [PATCH 43/46] fix comments --- tools/SEGYImport/SEGYImport.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index b8edd17f..cb47cd4c 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -3597,12 +3597,9 @@ main(int argc, char* argv[]) auto &chunkInfo = chunkInfos[chunk]; - // if we've crossed to a new inline then trim the trace page cache + // if we've crossed to a new primary key range then trim the trace page cache if (chunk > 0) { - - // TODO alternate versions for offset sorted and crossline sorted - const auto& previousChunkInfo = chunkInfos[chunk - 1]; for (size_t chunkFileIndex = 0; chunkFileIndex < dataProviders.size(); ++chunkFileIndex) @@ -3613,12 +3610,12 @@ main(int argc, char* argv[]) auto currentIndexIter = chunkInfo.lowerUpperSegmentIndices.find(chunkFileIndex); if (currentIndexIter != chunkInfo.lowerUpperSegmentIndices.end()) { - // This file is active in both the current and previous chunks. Check to see if we've progressed to a new set of inlines. + // This file is active in both the current and previous chunks. Check to see if we've progressed to a new set of primary keys. auto previousLowerSegmentIndex = std::get<0>(prevIndexIter->second); auto currentLowerSegmentIndex = std::get<0>(currentIndexIter->second); if (currentLowerSegmentIndex > previousLowerSegmentIndex) { - // we've progressed to a new set of inlines; remove earlier pages from the cache + // we've progressed to a new set of primary keys; remove earlier pages from the cache traceDataManagers[chunkFileIndex].retirePagesBefore(fileInfo.m_segmentInfoLists[chunkFileIndex][currentLowerSegmentIndex].m_traceStart); } } -- GitLab From 0e1264847d187e75df432bc558ea1c0d35d88456 Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 27 Oct 2021 16:26:04 -0500 Subject: [PATCH 44/46] fix compile warnings/errors on Linux --- tools/SEGYImport/SEGYImport.cpp | 75 ++++++++++++------------- tools/SEGYImport/TraceInfo2DManager.cpp | 2 +- 2 files changed, 36 insertions(+), 41 deletions(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index cb47cd4c..9f5fd270 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -65,22 +65,6 @@ #include #include -constexpr double METERS_PER_FOOT = 0.3048; - -enum class TraceSpacingByOffset -{ - Off = 0, - On = 1, - Auto = 2 -}; - -enum class PrimaryKeyValue -{ - Other = 0, - InlineNumber = 1, - CrosslineNumber = 2 -}; - int64_t GetTotalSystemMemory() { MEMORYSTATUSEX status; @@ -98,8 +82,26 @@ int64_t GetTotalSystemMemory() long page_size = sysconf(_SC_PAGE_SIZE); return int64_t(pages) * int64_t(page_size); } + +#include #endif +constexpr double METERS_PER_FOOT = 0.3048; + +enum class TraceSpacingByOffset +{ + Off = 0, + On = 1, + Auto = 2 +}; + +enum class PrimaryKeyValue +{ + Other = 0, + InlineNumber = 1, + CrosslineNumber = 2 +}; + inline char asciitolower(char in) { if (in <= 'Z' && in >= 'A') return in - ('Z' - 'z'); @@ -222,7 +224,7 @@ SerializeSEGYHeaderField(SEGY::HeaderField const& headerField) } Json::Value -SerializeSEGYFileInfo(SEGYFileInfo const& fileInfo, const int fileIndex) +SerializeSEGYFileInfo(SEGYFileInfo const& fileInfo, const size_t fileIndex) { Json::Value jsonFileInfo; @@ -618,7 +620,7 @@ getOrderedSegmentListIndices(SEGYFileInfo const& fileInfo, size_t& globalTotalSe if (fileInfo.IsOffsetSorted()) { - for (int i = 0; i < fileInfo.m_segmentInfoListsByOffset.size(); ++i) + for (int i = 0; i < static_cast(fileInfo.m_segmentInfoListsByOffset.size()); ++i) { orderedListIndices.push_back(i); for (const auto& entry : fileInfo.m_segmentInfoListsByOffset[i]) @@ -1031,8 +1033,6 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg if (traceSpacingByOffset != TraceSpacingByOffset::Off) { // Determine if offset values are suitable for computed respacing: Values are monotonically increasing - bool - isSane = false; size_t numSorted = 0; @@ -1160,7 +1160,7 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c providerSegments; int64_t primaryKeyTraceCount = 0; - for (int fileIndex = 0; fileIndex < fileInfo.m_segmentInfoListsByOffset.size(); ++fileIndex) + for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoListsByOffset.size(); ++fileIndex) { const auto& offsetSegments = fileInfo.m_segmentInfoListsByOffset[fileIndex]; @@ -2295,7 +2295,7 @@ PrimaryKeyDimension(const SEGYFileInfo& fileInfo, PrimaryKeyValue primaryKey) int findChannelDescriptorIndex(const std::string& channelName, const std::vector& channelDescriptors) { - for (int index = 0; index < channelDescriptors.size(); ++index) + for (int index = 0; index < static_cast(channelDescriptors.size()); ++index) { if (channelName == channelDescriptors[index].GetName()) { @@ -2357,8 +2357,6 @@ CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std:: gatherOffsets.push_back(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness)); // read traces and stuff offset into a vector while primaryKey/secondaryKey match - int64_t - trace = firstTrace + 1; for (int64_t trace = firstTrace + 1; trace < traceDataManager.fileTraceCount() && trace < firstTrace + fold; ++trace) { header = traceDataManager.getTraceData(trace, error); @@ -2397,12 +2395,12 @@ CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std:: } // map traces to tertiary indices while we have room for dead traces and we have more input traces - while (hasRoom() && gatherIndex < gatherOffsets.size()) + while (hasRoom() && gatherIndex < static_cast(gatherOffsets.size())) { // process all the traces whose offset matches the current offset const auto currentOffset = offsetAtIndex(offsetIndex); - while (hasRoom() && gatherIndex < gatherOffsets.size() && gatherOffsets[gatherIndex] == currentOffset) + while (hasRoom() && gatherIndex < static_cast(gatherOffsets.size()) && gatherOffsets[gatherIndex] == currentOffset) { traceIndices[firstTrace + gatherIndex] = tertiaryIndex; ++gatherIndex; @@ -2413,7 +2411,7 @@ CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std:: ++offsetIndex; // try to advance to tertiary index so that the next trace's offset is placed near-ish to where it might be expected - if (gatherIndex < gatherOffsets.size() && tertiaryIndex <= offsetIndex) + if (gatherIndex < static_cast(gatherOffsets.size()) && tertiaryIndex <= offsetIndex) { while (hasRoom() && gatherOffsets[gatherIndex] != offsetAtIndex(offsetIndex)) { @@ -2436,7 +2434,7 @@ CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std:: } // map remaining traces to remaining indices - while (gatherIndex < gatherOffsets.size()) + while (gatherIndex < static_cast(gatherOffsets.size())) { traceIndices[firstTrace + gatherIndex] = tertiaryIndex; ++gatherIndex; @@ -2492,8 +2490,6 @@ main(int argc, char* argv[]) bool prestack = false; bool is2D = false; bool isOffsetSorted = false; - // TODO remove traceOrderByOffset - bool traceOrderByOffset = true; bool jsonOutput = false; bool help = false; bool helpConnection = false; @@ -2932,7 +2928,7 @@ main(int argc, char* argv[]) // If we are in scan mode we serialize the result of the file scan either to a fileInfo file (if specified) or to stdout and exit if (scan) { - for (int fileIndex = 0; fileIndex < fileNames.size(); ++fileIndex) + for (size_t fileIndex = 0; fileIndex < fileNames.size(); ++fileIndex) { Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, fileIndex); @@ -3010,7 +3006,7 @@ main(int argc, char* argv[]) OpenVDS::Error error; - for (int fileIndex = 0; fileIndex < fileInfoFileNames.size(); ++fileIndex) + for (size_t fileIndex = 0; fileIndex < fileInfoFileNames.size(); ++fileIndex) { const auto & fileInfoFileName = fileInfoFileNames[fileIndex]; @@ -3423,10 +3419,10 @@ main(int argc, char* argv[]) if (primaryKeyValue == PrimaryKeyValue::CrosslineNumber) { - for (auto i = 0; i < dimChunkCounts.size(); ++i) + for (size_t i = 0; i < dimChunkCounts.size(); ++i) { bool isZero = true; - for (auto j = 0; isZero && j < dimChunkCounts.size(); ++j) + for (size_t j = 0; isZero && j < dimChunkCounts.size(); ++j) { if (j != i) { @@ -3669,13 +3665,12 @@ main(int argc, char* argv[]) void* azimuthBuffer = azimuthPage ? azimuthPage->GetWritableBuffer(azimuthPitch) : nullptr; void* muteBuffer = mutePage ? mutePage->GetWritableBuffer(mutePitch) : nullptr; - const int pitchCheckDimension = fileInfo.IsOffsetSorted() ? 2 : 1; assert(amplitudePitch[0] == 1); - assert(!traceFlagBuffer || traceFlagPitch[pitchCheckDimension] == 1); - assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[pitchCheckDimension] == SEGY::TraceHeaderSize); - assert(!offsetBuffer || offsetPitch[pitchCheckDimension] == 1); - assert(!azimuthBuffer || azimuthPitch[pitchCheckDimension] == 1); - assert(!muteBuffer || mutePitch[pitchCheckDimension] == 1); + assert(!traceFlagBuffer || traceFlagPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); + assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == SEGY::TraceHeaderSize); + assert(!offsetBuffer || offsetPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); + assert(!azimuthBuffer || azimuthPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); + assert(!muteBuffer || mutePitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); diff --git a/tools/SEGYImport/TraceInfo2DManager.cpp b/tools/SEGYImport/TraceInfo2DManager.cpp index 3ce5f514..9dfcf1ad 100644 --- a/tools/SEGYImport/TraceInfo2DManager.cpp +++ b/tools/SEGYImport/TraceInfo2DManager.cpp @@ -63,7 +63,7 @@ int TraceInfo2DManagerStore::GetIndexOfEnsembleNumber(int ensembleNumber) void TraceInfo2DManagerStore::BuildEnsembleNumberMap() { m_ensembleToIndex.clear(); - for (int index = 0; index < m_traceInfo.size(); ++index) + for (int index = 0; index < static_cast(m_traceInfo.size()); ++index) { m_ensembleToIndex.insert(std::make_pair(m_traceInfo[index].ensembleNumber, index)); } -- GitLab From 3d0a889f6993bce952eaa0bc28482d90199ccabf Mon Sep 17 00:00:00 2001 From: Jim King Date: Wed, 27 Oct 2021 16:36:03 -0500 Subject: [PATCH 45/46] fix Windows compiler warning --- tools/SEGYImport/SEGYImport.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 9f5fd270..0d3c9acc 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -1156,7 +1156,7 @@ analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo c // 1. Find all the segments in all files that match the primary key // 2. Get a total trace count of all the segments; pass trace count to value range estimator - std::map>> + std::map>> providerSegments; int64_t primaryKeyTraceCount = 0; -- GitLab From 88ead15296fd7c031743f3e000e9b25b27ff51e5 Mon Sep 17 00:00:00 2001 From: Jim King Date: Thu, 4 Nov 2021 14:14:53 -0500 Subject: [PATCH 46/46] add warning message for offset-sorted SEGY containing traces with duplicate key combos --- .../SEGYImport/pytests/test_offset_sorted.py | 52 +++++++++++++++++-- tools/SEGYImport/SEGYImport.cpp | 13 +++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/tests/tools/SEGYImport/pytests/test_offset_sorted.py b/tests/tools/SEGYImport/pytests/test_offset_sorted.py index 7a05e4b3..077e2b68 100644 --- a/tests/tools/SEGYImport/pytests/test_offset_sorted.py +++ b/tests/tools/SEGYImport/pytests/test_offset_sorted.py @@ -20,18 +20,24 @@ def output_scan() -> TempScanFileGuard: @pytest.fixture def offset_sorted_segy() -> str: - return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "CrosslineSorted", "ST0202.segy") + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", "ST0202.segy") + + +@pytest.fixture +def offset_sorted_deduped_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", + "ST0202_deduped.segy") @pytest.fixture def offset_sorted_scan() -> str: - return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "CrosslineSorted", + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", "ST0202.segy.scan.json") @pytest.fixture def conventional_sorted_segy() -> str: - return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "CrosslineSorted", + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", "ST0202_conventional.segy") @@ -55,6 +61,21 @@ def offset_sorted_executor(offset_sorted_segy, output_vds) -> Tuple[ImportExecut return ex, output_vds +@pytest.fixture +def offset_sorted_deduped_executor(offset_sorted_deduped_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for de-duped version of offset-sorted SEGY""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(offset_sorted_deduped_segy) + + return ex, output_vds + + @pytest.fixture def conventional_sorted_executor(conventional_sorted_segy, conventional_output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: """Fixture to setup an ImportExecutor with common options for conventionally sorted SEGY""" @@ -343,3 +364,28 @@ def test_with_scan_file(offset_sorted_segy, offset_sorted_scan, output_vds): access_manager = openvds.getAccessManager(handle) assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable + + +def test_dupe_warning(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + s = ex.command_line() + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + # check that the output contains the warning message for traces with duplicate offset, inline, crossline key combos + assert "duplicate key combinations" in ex.output() + + +def test_dupe_no_warning(offset_sorted_deduped_executor): + ex, output_vds = offset_sorted_deduped_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + # check that the output contains the warning message for traces with duplicate offset, inline, crossline key combos + assert "duplicate key combinations" not in ex.output() diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index 0d3c9acc..bc0ea3f7 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -2490,6 +2490,7 @@ main(int argc, char* argv[]) bool prestack = false; bool is2D = false; bool isOffsetSorted = false; + bool isOffsetSortedDupeKeyWarned = false; bool jsonOutput = false; bool help = false; bool helpConnection = false; @@ -3751,6 +3752,18 @@ main(int argc, char* argv[]) break; } + if (fileInfo.IsOffsetSorted() && !isOffsetSortedDupeKeyWarned && trace > firstTrace && secondaryTest == currentSecondaryKey) + { + if (!jsonOutput && is_tty) + { + std::cout << std::endl; + } + auto + message = "This offset-sorted SEGY has traces with duplicate key combinations of Offset, Inline, and Crossline. Only one of the traces with duplicate key values will be written to the output VDS."; + OpenVDS::printWarning(jsonOutput, "SEGY", message); + isOffsetSortedDupeKeyWarned = true; + } + if (secondaryTest != currentSecondaryKey) { // we've progressed to a new secondary key, so reset the tertiary (gather) index -- GitLab