diff --git a/3rdparty/.gitignore b/3rdparty/.gitignore index e8d3639a6484fade6d7ec3d4eb53b35f0cf15d24..2e418824dd913e49e44a14e4d722635f7437ea54 100644 --- a/3rdparty/.gitignore +++ b/3rdparty/.gitignore @@ -18,3 +18,5 @@ libressl* dms* cmakerc* !dms-git +azure-sdk-for-cpp* +libxml2* diff --git a/src/SEGYUtils/SEGYFileInfo.cpp b/src/SEGYUtils/SEGYFileInfo.cpp index 6edcb3640e584e53a5feb90cb9e74f0c8d0af440..99f02f6c0e764167ae24614bd437958f6ddd6288 100644 --- a/src/SEGYUtils/SEGYFileInfo.cpp +++ b/src/SEGYUtils/SEGYFileInfo.cpp @@ -32,7 +32,7 @@ using namespace SEGY; bool SEGYFileInfo::Is4D() const { - return m_segyType == SEGY::SEGYType::Prestack; + return m_segyType == SEGY::SEGYType::Prestack || m_segyType == SEGY::SEGYType::PrestackOffsetSorted; } bool @@ -45,7 +45,19 @@ bool SEGYFileInfo::HasGatherOffset() const { // TODO unbinned gathers may be angle gathers? - return m_segyType == SEGY::SEGYType::Prestack || IsUnbinned(); + return m_segyType == SEGYType::Prestack || m_segyType == SEGYType::Prestack2D || IsOffsetSorted() || IsUnbinned(); +} + +bool +SEGYFileInfo::Is2D() const +{ + return m_segyType == SEGYType::Poststack2D || m_segyType == SEGYType::Prestack2D; +} + +bool +SEGYFileInfo::IsOffsetSorted() const +{ + return m_segyType == SEGYType::PrestackOffsetSorted; } SEGYBinInfo @@ -93,8 +105,15 @@ SEGYFileInfo::StaticGetUniqueID() } bool -SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Error &error, HeaderField const &primaryKeyHeaderField, HeaderField const &secondaryKeyHeaderField, SEGY::HeaderField const &startTimeHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields) +SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Error &error, HeaderField const &primaryKeyHeaderField, HeaderField const &secondaryKeyHeaderField, SEGY::HeaderField const &startTimeHeaderField, SEGY::HeaderField const& offsetHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields) { + std::function + readOffset = [](const char* traceHeader) { return -1; }; + if (IsOffsetSorted()) + { + readOffset = [offsetHeaderField, this](const char* traceHeader) { return ReadFieldFromHeader(traceHeader, offsetHeaderField, m_headerEndianness); }; + } + char textualFileHeader[TextualFileHeaderSize]; char binaryFileHeader[BinaryFileHeaderSize]; char traceHeader[TraceHeaderSize]; @@ -142,14 +161,6 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro for (const auto& dataProvider : dataProviders) { - m_segmentInfoLists.emplace_back(); - m_traceCounts.emplace_back(); - - auto& - segmentInfos = m_segmentInfoLists.back(); - auto& - traceCount = m_traceCounts.back(); - int64_t fileSize = dataProvider.Size(error); if (error.code != 0) @@ -170,6 +181,32 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro return false; } + int + currentOffset = readOffset(traceHeader); + + std::function + pushSegmentInfo; + + if (IsOffsetSorted()) + { + m_segmentInfoListsByOffset.emplace_back(); + auto& + offsetMap = m_segmentInfoListsByOffset.back(); + offsetMap.emplace(currentOffset, std::vector()); + + pushSegmentInfo = [this](int offset, const SEGYSegmentInfo& segmentInfo) { m_segmentInfoListsByOffset.back()[offset].push_back(segmentInfo); }; + } + else + { + m_segmentInfoLists.emplace_back(); + + pushSegmentInfo = [this](int offset, const SEGYSegmentInfo& segmentInfo) { m_segmentInfoLists.back().push_back(segmentInfo); }; + } + + m_traceCounts.emplace_back(); + auto& + traceCount = m_traceCounts.back(); + // If the sample count is not set in the binary header we try to find it from the first trace header if (m_sampleCount == 0) { @@ -182,7 +219,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro m_sampleIntervalMilliseconds = ReadFieldFromHeader(traceHeader, TraceHeader::SampleIntervalHeaderField, m_headerEndianness) / 1000.0; } - int64_t traceDataSize = (fileSize - TextualFileHeaderSize - BinaryFileHeaderSize); + const int64_t traceDataSize = (fileSize - TextualFileHeaderSize - BinaryFileHeaderSize); traceCount = traceDataSize / TraceByteSize(); @@ -203,7 +240,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro SEGYBinInfo outsideBinInfo; - int primaryKey = ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness), nextPrimaryKey = 0; + int primaryKey = Is2D() ? 0 : ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness), nextPrimaryKey = 0; SEGYSegmentInfo segmentInfo(primaryKey, 0, readBinInfoFromHeader(traceHeader, binInfoHeaderFields, m_headerEndianness, 1)); @@ -211,6 +248,10 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro int readCount = 1; + int + testOffset, + nextOffset; + while (segmentInfo.m_traceStop != lastTrace) { dataProvider.Read(traceHeader, TextualFileHeaderSize + BinaryFileHeaderSize + trace * TraceByteSize(), TraceHeaderSize, error); @@ -221,9 +262,10 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro } readCount++; - int primaryKey = ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness); + primaryKey = Is2D() ? 0 : ReadFieldFromHeader(traceHeader, primaryKeyHeaderField, m_headerEndianness); + testOffset = readOffset(traceHeader); - if (primaryKey == segmentInfo.m_primaryKey) // expand current segment if the primary key matches + if (primaryKey == segmentInfo.m_primaryKey && (!IsOffsetSorted() || testOffset == currentOffset)) // expand current segment if the primary key matches { assert(trace > segmentInfo.m_traceStop); segmentInfo.m_traceStop = trace; @@ -235,17 +277,21 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro outsideTrace = trace; outsideBinInfo = readBinInfoFromHeader(traceHeader, binInfoHeaderFields, m_headerEndianness, 1); nextPrimaryKey = primaryKey; + nextOffset = testOffset; } if (outsideTrace == segmentInfo.m_traceStop + 1) // current segment is finished { - segmentInfos.push_back(segmentInfo); - int64_t segmentLength = segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; + pushSegmentInfo(currentOffset, segmentInfo); + + const int64_t + segmentLength = segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; // start a new segment segmentInfo = SEGYSegmentInfo(nextPrimaryKey, outsideTrace, outsideBinInfo); trace = std::min(lastTrace, outsideTrace + segmentLength); outsideTrace = 0, jump = 1; + currentOffset = nextOffset; } else if (outsideTrace == 0) // looking for a trace outside the current segment { @@ -264,7 +310,7 @@ SEGYFileInfo::Scan(const std::vector& dataProviders, OpenVDS::Erro } // final segment in this file is finished - segmentInfos.push_back(segmentInfo); + pushSegmentInfo(testOffset, segmentInfo); } return true; diff --git a/src/SEGYUtils/SEGYUtils/SEGY.h b/src/SEGYUtils/SEGYUtils/SEGY.h index 37038f6322ccb938f4a8e79da4b958774d8a4d2a..b1ba23740a8223b168630873bb6de2cf95ea8c7f 100644 --- a/src/SEGYUtils/SEGYUtils/SEGY.h +++ b/src/SEGYUtils/SEGYUtils/SEGY.h @@ -55,6 +55,16 @@ struct HeaderField HeaderField(int byteLocation, FieldWidth fieldWidth) : byteLocation(byteLocation), fieldWidth(fieldWidth) {} bool Defined() const { return byteLocation != 0; } + + bool operator==(const HeaderField& hf) const + { + return byteLocation == hf.byteLocation && fieldWidth == hf.fieldWidth; + } + + bool operator!=(const HeaderField& hf) const + { + return !(*this == hf); + } }; namespace BinaryHeader @@ -194,6 +204,11 @@ static const HeaderField InlineNumberHeaderField(189, FieldWidth::FourByte); static const HeaderField CrosslineNumberHeaderField(193, FieldWidth::FourByte); static const HeaderField ReceiverHeaderField(13, FieldWidth::FourByte); static const HeaderField OffsetHeaderField(37, FieldWidth::FourByte); +static const HeaderField OffsetXHeaderField(97, FieldWidth::TwoByte); +static const HeaderField OffsetYHeaderField(95, FieldWidth::TwoByte); +static const HeaderField Azimuth(61, FieldWidth::FourByte); +static const HeaderField MuteStartTime(111, FieldWidth::TwoByte); +static const HeaderField MuteEndTime(113, FieldWidth::TwoByte); } // end namespace TraceHeader @@ -308,6 +323,22 @@ enum class SampleUnits Meters = 2 }; +enum class AzimuthUnits +{ + Radians = 0, + Degrees = 1, + MinValue = Radians, + MaxValue = Degrees +}; + +enum class AzimuthType +{ + Azimuth = 0, + OffsetXY = 1, + MinValue = Azimuth, + MaxValue = OffsetXY +}; + enum class SEGYType { Poststack = 0, @@ -317,6 +348,7 @@ enum class SEGYType CDPGathers = 4, ShotGathers = 5, ReceiverGathers = 6, + PrestackOffsetSorted = 7 }; OPENVDS_EXPORT bool IsSEGYTypeUnbinned(SEGYType segyType); diff --git a/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h b/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h index 333715e6e6f45ef5c3ce5d70e006f76168f5e98d..0bbae4cfceac3e5c3ff6c3fbf32f3c587f0b0750 100644 --- a/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h +++ b/src/SEGYUtils/SEGYUtils/SEGYFileInfo.h @@ -93,6 +93,11 @@ struct SEGYFileInfo std::vector> m_segmentInfoLists; + // vector of per-file offset-vector maps for SEGYType::PrestackOffsetSorted + std::vector>> + m_segmentInfoListsByOffset; + + SEGY::HeaderField m_primaryKey, m_secondaryKey; @@ -108,7 +113,7 @@ struct SEGYFileInfo OPENVDS_EXPORT int TraceByteSize() const; - OPENVDS_EXPORT bool Scan(const std::vector& dataProviders, OpenVDS::Error &error, SEGY::HeaderField const &primaryKeyHeaderField, SEGY::HeaderField const &secondaryKeyHeaderField = SEGY::HeaderField(), SEGY::HeaderField const &startTimeHeaderField = SEGY::TraceHeader::StartTimeHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields = SEGYBinInfoHeaderFields::StandardHeaderFields()); + OPENVDS_EXPORT bool Scan(const std::vector& dataProviders, OpenVDS::Error &error, SEGY::HeaderField const &primaryKeyHeaderField, SEGY::HeaderField const &secondaryKeyHeaderField = SEGY::HeaderField(), SEGY::HeaderField const &startTimeHeaderField = SEGY::TraceHeader::StartTimeHeaderField, SEGY::HeaderField const& offsetHeaderField = SEGY::TraceHeader::OffsetHeaderField, SEGYBinInfoHeaderFields const &binInfoHeaderFields = SEGYBinInfoHeaderFields::StandardHeaderFields()); OPENVDS_EXPORT SEGYBinInfo readBinInfoFromHeader(const char* header, SEGYBinInfoHeaderFields const& headerFields, SEGY::Endianness endianness, int segmentTraceIndex) const; @@ -117,6 +122,10 @@ struct SEGYFileInfo OPENVDS_EXPORT bool IsUnbinned() const; OPENVDS_EXPORT bool HasGatherOffset() const; + + OPENVDS_EXPORT bool Is2D() const; + + OPENVDS_EXPORT bool IsOffsetSorted() const; }; #endif // SEGY_FILE_INFO_H diff --git a/src/SEGYUtils/SEGYUtils/TraceDataManager.h b/src/SEGYUtils/SEGYUtils/TraceDataManager.h index 7cfc7b634e18e4328ce32fb65322db5cda41c6a6..3bb073d01c76da55a359080e7f1036bea1f27d3e 100644 --- a/src/SEGYUtils/SEGYUtils/TraceDataManager.h +++ b/src/SEGYUtils/SEGYUtils/TraceDataManager.h @@ -141,6 +141,12 @@ public: m_dataViewManager->retireAllDataViews(); } + int64_t + fileTraceCount() const + { + return m_numTraces; + } + private: std::shared_ptr m_dataViewManager; diff --git a/tests/tools/SEGYImport/pytests/.gitignore b/tests/tools/SEGYImport/pytests/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a348e5040c3d2ac0cee5da2f21942aa68f106669 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/.gitignore @@ -0,0 +1 @@ +/__pycache__/ diff --git a/tests/tools/SEGYImport/pytests/crossline_chunk_order.py b/tests/tools/SEGYImport/pytests/crossline_chunk_order.py new file mode 100644 index 0000000000000000000000000000000000000000..f87e8e8aa8195e38dcd576d84f9f48473d4e0c46 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/crossline_chunk_order.py @@ -0,0 +1,287 @@ +""" +Testing/prototyping code for ordering chunks for crossline-sorted data. +""" +import math +import time +from contextlib import redirect_stdout +from timeit import timeit + +import openvds + + +def create_3d_vds(): + layout_descriptor = openvds.VolumeDataLayoutDescriptor(openvds.VolumeDataLayoutDescriptor.BrickSize.BrickSize_64, + 0, 0, 4, + openvds.VolumeDataLayoutDescriptor.LODLevels.LODLevels_None, + openvds.VolumeDataLayoutDescriptor.Options.Options_None) + compression_method = openvds.CompressionMethod(0) + compression_tolerance = 0.01 + + sample_samples = 500 + crossline_samples = 800 + inline_samples = 350 + + axis_descriptors = [ + openvds.VolumeDataAxisDescriptor(sample_samples, openvds.KnownAxisNames.sample(), openvds.KnownUnitNames.millisecond(), 0.0, + (sample_samples - 1) * 4.0), + openvds.VolumeDataAxisDescriptor(crossline_samples, openvds.KnownAxisNames.crossline(), openvds.KnownUnitNames.unitless(), 2000.0, + 2000.0 + (crossline_samples - 1) * 4), + openvds.VolumeDataAxisDescriptor(inline_samples, openvds.KnownAxisNames.inline(), openvds.KnownUnitNames.unitless(), 1000.0, + 1000.0 + (inline_samples - 1) * 2), + ] + channel_descriptors = [openvds.VolumeDataChannelDescriptor(openvds.VolumeDataChannelDescriptor.Format.Format_R32, + openvds.VolumeDataChannelDescriptor.Components.Components_1, + "Amplitude", openvds.KnownUnitNames.millisecond(), + -20000.0, 20000.0) + ] + + metaData = openvds.MetadataContainer() + metaData.setMetadataDoubleVector2(openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name, (1234.0, 4321.0)) + + vds = openvds.create("C:\\temp\\SEGY\\t\\crossline_test.vds", "", layout_descriptor, axis_descriptors, channel_descriptors, metaData, + compression_method, compression_tolerance) + + return vds + + +def create_4d_vds(): + layout_descriptor = openvds.VolumeDataLayoutDescriptor(openvds.VolumeDataLayoutDescriptor.BrickSize.BrickSize_64, + 0, 0, 4, + openvds.VolumeDataLayoutDescriptor.LODLevels.LODLevels_None, + openvds.VolumeDataLayoutDescriptor.Options.Options_None) + compression_method = openvds.CompressionMethod(0) + compression_tolerance = 0.01 + + sample_samples = 500 + trace_samples = 100 + crossline_samples = 800 + inline_samples = 350 + + axis_descriptors = [ + openvds.VolumeDataAxisDescriptor(sample_samples, openvds.KnownAxisNames.sample(), openvds.KnownUnitNames.millisecond(), 0.0, + (sample_samples - 1) * 4.0), + openvds.VolumeDataAxisDescriptor(trace_samples, "Trace (offset)", openvds.KnownUnitNames.unitless(), 1.0, + trace_samples + 1.0), + openvds.VolumeDataAxisDescriptor(crossline_samples, openvds.KnownAxisNames.crossline(), openvds.KnownUnitNames.unitless(), 2000.0, + 2000.0 + (crossline_samples - 1) * 4), + openvds.VolumeDataAxisDescriptor(inline_samples, openvds.KnownAxisNames.inline(), openvds.KnownUnitNames.unitless(), 1000.0, + 1000.0 + (inline_samples - 1) * 2), + ] + channel_descriptors = [openvds.VolumeDataChannelDescriptor(openvds.VolumeDataChannelDescriptor.Format.Format_R32, + openvds.VolumeDataChannelDescriptor.Components.Components_1, + "Amplitude", openvds.KnownUnitNames.millisecond(), + -20000.0, 20000.0) + ] + + metaData = openvds.MetadataContainer() + metaData.setMetadataDoubleVector2(openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name, (1234.0, 4321.0)) + + vds = openvds.create("C:\\temp\\SEGY\\t\\crossline_test.vds", "", layout_descriptor, axis_descriptors, channel_descriptors, metaData, + compression_method, compression_tolerance) + + return vds + + +def create_vds_and_iterate_crossline_chunks_3d(): + vds = create_3d_vds() + + manager = openvds.getAccessManager(vds) + accessor = manager.createVolumeDataPageAccessor(openvds.DimensionsND.Dimensions_012, 0, 0, 8, + openvds.IVolumeDataAccessManager.AccessMode.AccessMode_Create, 1024) + + sample_axis_chunk_count = 0 + crossline_axis_chunk_count = 0 + inline_axis_chunk_count = 0 + + for chunk_num in range(accessor.getChunkCount()): + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + if chunk_min[0] == 0 and chunk_min[1] == 0: + inline_axis_chunk_count += 1 + if chunk_min[0] == 0 and chunk_min[2] == 0: + crossline_axis_chunk_count += 1 + if chunk_min[1] == 0 and chunk_min[2] == 0: + sample_axis_chunk_count += 1 + + print() + print("Gen 1 3D VDS chunk iteration") + print() + print(f"sample axis chunk count: {sample_axis_chunk_count}") + print(f"crossline axis chunk count: {crossline_axis_chunk_count}") + print(f"inline axis chunk count: {inline_axis_chunk_count}") + print(f"total chunks: {accessor.getChunkCount()}") + + chunks_visited = set() + + for chunk_sequence in range(accessor.getChunkCount()): + # convert sequence number to chunk number for crossline-sorted input + + # xl_chunk_coord = chunk_sequence // (inline_axis_chunk_count * sample_axis_chunk_count) + # il_chunk_coord = (chunk_sequence % (inline_axis_chunk_count * sample_axis_chunk_count)) // sample_axis_chunk_count + # sample_chunk_coord = chunk_sequence % sample_axis_chunk_count + + working, sample_chunk_coord = divmod(chunk_sequence, sample_axis_chunk_count) + xl_chunk_coord, il_chunk_coord = divmod(working, inline_axis_chunk_count) + + chunk_num = il_chunk_coord * crossline_axis_chunk_count * sample_axis_chunk_count + xl_chunk_coord * sample_axis_chunk_count + sample_chunk_coord + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + print(f"{chunk_sequence:3} {chunk_num:3} ({chunk_min[0]:3}, {chunk_min[1]:3}, {chunk_min[2]:3})") + chunks_visited.add(chunk_num) + + print() + print(f"Visited chunk count: {len(chunks_visited)}") + + +def create_vds_and_iterate_crossline_chunks_4d(): + vds = create_4d_vds() + + manager = openvds.getAccessManager(vds) + accessor = manager.createVolumeDataPageAccessor(openvds.DimensionsND.Dimensions_012, 0, 0, 8, + openvds.IVolumeDataAccessManager.AccessMode.AccessMode_Create, 1024) + + sample_axis_chunk_count = 0 + trace_axis_chunk_count = 0 + crossline_axis_chunk_count = 0 + inline_axis_chunk_count = 0 + + for chunk_num in range(accessor.getChunkCount()): + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + if chunk_min[0] == 0 and chunk_min[1] == 0 and chunk_min[2] == 0: + inline_axis_chunk_count += 1 + if chunk_min[0] == 0 and chunk_min[1] == 0 and chunk_min[3] == 0: + crossline_axis_chunk_count += 1 + if chunk_min[0] == 0 and chunk_min[2] == 0 and chunk_min[3] == 0: + trace_axis_chunk_count += 1 + if chunk_min[1] == 0 and chunk_min[2] == 0 and chunk_min[3] == 0: + sample_axis_chunk_count += 1 + + print() + print("Gen 1 4D VDS chunk iteration") + print() + print(f"sample axis chunk count: {sample_axis_chunk_count}") + print(f"trace axis chunk count: {trace_axis_chunk_count}") + print(f"crossline axis chunk count: {crossline_axis_chunk_count}") + print(f"inline axis chunk count: {inline_axis_chunk_count}") + print(f"total chunks: {accessor.getChunkCount()}") + + chunks_visited = set() + + for chunk_sequence in range(accessor.getChunkCount()): + # convert sequence number to chunk number for 4D crossline-sorted input + + working, sample_chunk_coord = divmod(chunk_sequence, sample_axis_chunk_count) + working, trace_chunk_coord = divmod(working, trace_axis_chunk_count) + xl_chunk_coord, il_chunk_coord = divmod(working, inline_axis_chunk_count) + + chunk_num = il_chunk_coord * crossline_axis_chunk_count * trace_axis_chunk_count * sample_axis_chunk_count\ + + xl_chunk_coord * trace_axis_chunk_count * sample_axis_chunk_count\ + + trace_chunk_coord * sample_axis_chunk_count\ + + sample_chunk_coord + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + print(f"{chunk_sequence:5} {chunk_num:5} ({chunk_min[0]:3}, {chunk_min[1]:3}, {chunk_min[2]:3}, {chunk_min[3]:3})") + chunks_visited.add(chunk_num) + + print() + print(f"Visited chunk count: {len(chunks_visited)}") + + +def iterate_crossline_chunks_universal(vds_handle): + layout = openvds.getLayout(vds_handle) + dimensionality = layout.getDimensionality() + assert dimensionality == 3 or dimensionality == 4 + + manager = openvds.getAccessManager(vds_handle) + accessor = manager.createVolumeDataPageAccessor(openvds.DimensionsND.Dimensions_012, 0, 0, 8, + openvds.IVolumeDataAccessManager.AccessMode.AccessMode_Create, 1024) + + dim_chunk_counts = [0, 0, 0, 0] + + for chunk_num in range(accessor.getChunkCount()): + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + for i in range(len(dim_chunk_counts)): + is_zero = True + j = 0 + while is_zero and j < len(dim_chunk_counts): + if j != i: + is_zero = chunk_min[j] == 0 + j += 1 + if is_zero: + dim_chunk_counts[i] += 1 + + for i in range(len(dim_chunk_counts)): + print(f"dim{i} axis chunk count: {dim_chunk_counts[i]}") + print() + print(f"total chunks: accessor: {accessor.getChunkCount()} counts product: {math.prod(dim_chunk_counts)}") + + pitches = [1, 1, 1, 1] + for i in range(1, len(dim_chunk_counts)): + pitches[i] = pitches[i - 1] * dim_chunk_counts[i - 1] + + chunks_visited = set() + + for chunk_sequence in range(accessor.getChunkCount()): + # convert sequence number to chunk number for 4D crossline-sorted input + + chunk_coords = [0, 0, 0, 0] + if dimensionality == 3: + working, chunk_coords[0] = divmod(chunk_sequence, dim_chunk_counts[0]) + chunk_coords[1], chunk_coords[2] = divmod(working, dim_chunk_counts[2]) # 2 not 1 because we're working in crossline-major space + elif dimensionality == 4: + working, chunk_coords[0] = divmod(chunk_sequence, dim_chunk_counts[0]) + working, chunk_coords[1] = divmod(working, dim_chunk_counts[1]) + chunk_coords[2], chunk_coords[3] = divmod(working, dim_chunk_counts[3]) # 3 not 2 because we're working in crossline-major space + else: + raise ValueError(f"Invalid dimensionality: {dimensionality}") + + chunk_num = 0 + for i in range(len(chunk_coords)): + chunk_num += chunk_coords[i] * pitches[i] + + chunk_min, _ = accessor.getChunkMinMax(chunk_num) + print( + f"{chunk_sequence:5} {chunk_num:5} ({chunk_min[0]:3}, {chunk_min[1]:3}, {chunk_min[2]:3}, {chunk_min[3]:3})") + chunks_visited.add(chunk_num) + + print() + print(f"Visited chunk count: {len(chunks_visited)}") + + +def create_3d_vds_and_iterate_universal(): + vds = create_3d_vds() + + print() + print("Gen 2 (universal) 3D VDS chunk iteration") + print() + + iterate_crossline_chunks_universal(vds) + + +def create_4d_vds_and_iterate_universal(): + vds = create_4d_vds() + + print() + print("Gen 2 (universal) 4D VDS chunk iteration") + print() + + iterate_crossline_chunks_universal(vds) + + +if __name__ == "__main__": + is_poststack = False + + if is_poststack: + create_vds_and_iterate_crossline_chunks_3d() + create_3d_vds_and_iterate_universal() + else: + with open("C:\\temp\\SEGY\\t\\chunk_order.txt", "w") as f: + with redirect_stdout(f): + start = time.perf_counter() + create_vds_and_iterate_crossline_chunks_4d() + stop1 = time.perf_counter() + create_4d_vds_and_iterate_universal() + stop2 = time.perf_counter() + + print() + print(f"Gen 1 elapsed: {stop1 - start}") + print(f"Gen 2 elapsed: {stop2 - stop1}") diff --git a/tests/tools/SEGYImport/pytests/segyimport_test_config.py b/tests/tools/SEGYImport/pytests/segyimport_test_config.py new file mode 100644 index 0000000000000000000000000000000000000000..9d3c9af8b69af7c9aad328ae31846cd7270cca26 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/segyimport_test_config.py @@ -0,0 +1,82 @@ +""" +Global configuration information for SEGYImport pytests +""" +import os +import subprocess +import tempfile +import weakref +import pytest +from typing import List + +test_data_dir = "c:\\temp\\SEGY\\RegressionTestData" + +_temp_dir = None + + +class ImportExecutor: + def __init__(self): + import_exe = os.getenv("SEGYIMPORT_EXECUTABLE") + if not import_exe: + raise EnvironmentError("SEGYIMPORT_EXECUTABLE environment variable is not set") + self.args = [import_exe] + self.run_result = None + + def add_arg(self, more_arg: str): + self.args.append(more_arg) + + def add_args(self, more_args: List[str]): + self.args.extend(more_args) + + def run(self) -> int: + self.run_result = subprocess.run(self.args, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + return self.run_result.returncode + + def output(self) -> str: + if self.run_result: + return str(self.run_result.stdout) + return "" + + def command_line(self) -> str: + """Convenience method to return a string showing the command and arguments""" + return " ".join(self.args) + pass + + +class TempFileGuard: + def __init__(self, base_name: str, extension: str): + global _temp_dir + if not _temp_dir or not _temp_dir(): + self.temp_dir = tempfile.TemporaryDirectory() + _temp_dir = weakref.ref(self.temp_dir) + else: + self.temp_dir = _temp_dir() + self.filename = os.path.join(self.temp_dir.name, base_name + extension) + + +class TempVDSGuard(TempFileGuard): + def __init__(self, base_name="import_test"): + super().__init__(base_name, ".vds") + + +class TempScanFileGuard(TempFileGuard): + def __init__(self, base_name="scan_test"): + super().__init__(base_name, ".scan.json") + + +@pytest.fixture +def teleport_test_data_dir() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport") + + +@pytest.fixture +def segyimport_test_data_dir() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest") + + +def platform_integration_test_data_dir() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration") + + +@pytest.fixture +def volve_data_dir() -> str: + return "D:\\SEGY\\Equinor\\Volve" diff --git a/tests/tools/SEGYImport/pytests/test_2d.py b/tests/tools/SEGYImport/pytests/test_2d.py new file mode 100644 index 0000000000000000000000000000000000000000..ec3caeb7490f70f906b6be727d78c701e804d76e --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_2d.py @@ -0,0 +1,284 @@ +import os +from pathlib import Path + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import2d_test") + + +@pytest.fixture +def poststack_2d_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "PICEANCE-2D", + "A_raw_migr_stack_FF04_03.segy") + + +@pytest.fixture +def prestack_2d_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "PICEANCE-2D", + "PostMigration_CDP_NMO_FF04_03_031314.sgy") + + +def test_2d_poststack_volume_info(poststack_2d_segy, output_vds): + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(poststack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + assert layout.getDimensionality() == 2 + assert layout.getChannelCount() == 3 + assert layout.getChannelName(0) == "Amplitude" + assert layout.getChannelIndex("Trace") > 0 + assert layout.getChannelIndex("SEGYTraceHeader") > 0 + + descriptor = layout.getAxisDescriptor(0) + assert descriptor.name == "Sample" + assert descriptor.unit == "ms" + assert descriptor.coordinateMin == 0.0 + assert descriptor.coordinateMax == 5000.0 + assert descriptor.coordinateStep == 2.0 + + descriptor = layout.getAxisDescriptor(1) + assert descriptor.name == "CDP" + assert descriptor.unit == "unitless" + assert descriptor.coordinateMin == 1.0 + assert descriptor.coordinateMax == 1977.0 + assert descriptor.coordinateStep == 1.0 + + assert layout.getChannelValueRangeMin(0) == -302681.9375 + assert layout.getChannelValueRangeMax(0) == 303745.375 + + # check lattice metadata is NOT there + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + # check 2D trace info metadata is present + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.tracePositions().category, + openvds.KnownMetadata.tracePositions().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.traceVerticalOffsets().category, + openvds.KnownMetadata.traceVerticalOffsets().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.energySourcePointNumbers().category, + openvds.KnownMetadata.energySourcePointNumbers().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.ensembleNumbers().category, + openvds.KnownMetadata.ensembleNumbers().name) + + +def test_2d_poststack_read(poststack_2d_segy, output_vds): + # run importer + # open VDS + # read VDS (entire thing at once) + # check that each trace is non-zero + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(poststack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + dim0_size = layout.getDimensionNumSamples(0) + dim1_size = layout.getDimensionNumSamples(1) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (dim0_size, dim1_size, 1, 1, 1, 1), + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_01) + trace_flag_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (1, dim1_size, 1, 1, 1, 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_01) + + data = request.data.reshape(dim1_size, dim0_size) + trace_flag_data = trace_flag_request.data + + for dim1 in range(dim1_size): + total = 0 + for dim0 in range(dim0_size): + total += abs(data[dim1, dim0]) + + if trace_flag_data[dim1] == 0: + assert total == 0.0, f"dead trace at {dim1}" + else: + assert total > 0.0, f"trace at {dim1}" + + +def test_2d_prestack_volume_info(prestack_2d_segy, output_vds): + # run importer + # open VDS + # check dimensionality (3), channel count (4), channel names + # check 3 axis descriptors + # check sample value range + # check lattice metadata is NOT there + # check 2D trace info metadata + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(prestack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + assert layout.getDimensionality() == 3 + assert layout.getChannelCount() == 4 + assert layout.getChannelName(0) == "Amplitude" + assert layout.getChannelIndex("Offset") > 0 + assert layout.getChannelIndex("Trace") > 0 + assert layout.getChannelIndex("SEGYTraceHeader") > 0 + + descriptor = layout.getAxisDescriptor(0) + assert descriptor.name == "Sample" + assert descriptor.unit == "ms" + assert descriptor.coordinateMin == 0.0 + assert descriptor.coordinateMax == 5000.0 + assert descriptor.coordinateStep == 2.0 + + descriptor = layout.getAxisDescriptor(1) + assert descriptor.name == "Trace (offset)" + assert descriptor.unit == "unitless" + assert descriptor.coordinateMin == 1.0 + assert descriptor.coordinateMax == 64.0 + assert descriptor.coordinateStep == 1.0 + + descriptor = layout.getAxisDescriptor(2) + assert descriptor.name == "CDP" + assert descriptor.unit == "unitless" + assert descriptor.coordinateMin == 1.0 + assert descriptor.coordinateMax == 1977.0 + assert descriptor.coordinateStep == 1.0 + + assert layout.getChannelValueRangeMin(0) == -420799.125 + assert layout.getChannelValueRangeMax(0) == 421478.375 + + # check lattice metadata is NOT there + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert not layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + # check 2D trace info metadata is present + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.tracePositions().category, + openvds.KnownMetadata.tracePositions().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.traceVerticalOffsets().category, + openvds.KnownMetadata.traceVerticalOffsets().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.energySourcePointNumbers().category, + openvds.KnownMetadata.energySourcePointNumbers().name) + assert layout.isMetadataBLOBAvailable(openvds.KnownMetadata.ensembleNumbers().category, + openvds.KnownMetadata.ensembleNumbers().name) + + +def test_2d_prestack_read(prestack_2d_segy, output_vds): + # run importer + # open VDS + # read VDS (entire thing at once) + # check that each trace is non-zero + + ex = ImportExecutor() + + ex.add_arg("--2d") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(prestack_2d_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + dim0_size = layout.getDimensionNumSamples(0) + dim1_size = layout.getDimensionNumSamples(1) + dim2_size = layout.getDimensionNumSamples(2) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + offset_channel = layout.getChannelIndex("Offset") + assert offset_channel > 0 + + request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (dim0_size, dim1_size, dim2_size, 1, 1, 1), + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + offset_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (1, dim1_size, dim2_size, 1, 1, 1), + channel=offset_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + trace_flag_request = access_manager.requestVolumeSubset((0, 0, 0, 0, 0, 0), + (1, dim1_size, dim2_size, 1, 1, 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + data = request.data.reshape(dim2_size, dim1_size, dim0_size) + offset_data = offset_request.data.reshape(dim2_size, dim1_size) + trace_flag_data = trace_flag_request.data.reshape(dim2_size, dim1_size) + + for dim2 in range(dim2_size): + for dim1 in range(dim1_size): + trace_flag = trace_flag_data[dim2, dim1] + trace_offset = offset_data[dim2, dim1] + ensemble_number = layout.getAxisDescriptor(2).sampleIndexToCoordinate(dim2) + + total = 0 + for dim0 in range(dim0_size): + total += abs(data[dim2, dim1, dim0]) + + message = f"trace at dim1 {dim1} dim2 {dim2} offset {trace_offset} CDP {ensemble_number}" + + if trace_flag == 0: + assert total == 0.0, f"dead {message}" + else: + assert total > 0.0, message diff --git a/tests/tools/SEGYImport/pytests/test_attribute_name.py b/tests/tools/SEGYImport/pytests/test_attribute_name.py new file mode 100644 index 0000000000000000000000000000000000000000..c79f36117f9c36f3e4806f7cd83a321841832a6d --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_attribute_name.py @@ -0,0 +1,113 @@ +import os +from pathlib import Path + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +@pytest.fixture +def poststack_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport", "Teleport_Trim", + "3D_Stack", "ST0202R08_TIME.segy") + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard() + + +def test_default_attribute_and_unit(poststack_segy, output_vds): + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + descriptor = layout.getChannelDescriptor(0) + assert descriptor.name == "Amplitude" + assert descriptor.unit == "" + + +def test_custom_attribute_name(poststack_segy, output_vds): + custom_attribute_name = "Vrms" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds.filename]) + ex.add_args(["--attribute-name", custom_attribute_name]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + descriptor = layout.getChannelDescriptor(0) + assert descriptor.name == custom_attribute_name + assert descriptor.unit == "" + + +def test_custom_attribute_unit(poststack_segy, output_vds): + custom_attribute_unit = "Hz" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds.filename]) + ex.add_args(["--attribute-unit", custom_attribute_unit]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + descriptor = layout.getChannelDescriptor(0) + assert descriptor.name == "Amplitude" + assert descriptor.unit == custom_attribute_unit + + +def test_invalid_attribute_name(poststack_segy, output_vds): + custom_attribute_name = "JulesVerne" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds.filename]) + ex.add_args(["--attribute-name", custom_attribute_name]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result > 0, ex.output() + assert not Path(output_vds.filename).exists() + + +def test_invalid_attribute_unit(poststack_segy, output_vds): + custom_attribute_unit = "furlong/fortnight" + + ex = ImportExecutor() + + ex.add_args(["--vdsfile", output_vds.filename]) + ex.add_args(["--attribute-unit", custom_attribute_unit]) + + ex.add_arg(poststack_segy) + + result = ex.run() + + assert result > 0, ex.output() + assert not Path(output_vds.filename).exists() diff --git a/tests/tools/SEGYImport/pytests/test_azimuth_channel.py b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py new file mode 100644 index 0000000000000000000000000000000000000000..4b435bb2a1fe58c593ea181ca500a7d91715896e --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_azimuth_channel.py @@ -0,0 +1,254 @@ +import os +from pathlib import Path +from typing import Tuple + +import pytest +import openvds +import numpy as np + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_mutes_test") + + +@pytest.fixture +def azimuth_degrees_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "WAZ", "waz_trial.segy") + + +@pytest.fixture +def azimuth_offset_xy_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport", "Teleport_Trim", + "waz", "CG3_090008B32871_subset.segy") + + +@pytest.fixture +def azimuth_from_azimuth_executor(azimuth_degrees_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with options for SEGY with azimuth trace header field""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "azimuth=25:4"]) + ex.add_arg("--azimuth") + ex.add_args(["--azimuth-type", "azimuth"]) + ex.add_args(["--azimuth-unit", "degrees"]) + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + # Need to ignore warnings because this data is only one segment + ex.add_arg("--ignore-warnings") + + ex.add_arg(azimuth_degrees_segy) + + return ex, output_vds + + +@pytest.fixture +def azimuth_from_offset_xy_executor(azimuth_offset_xy_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for SEGY with mutes""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offsetx=97:2"]) + ex.add_args(["--header-field", "offsety=95:2"]) + ex.add_arg("--azimuth") + ex.add_args(["--azimuth-type", "offsetxy"]) + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(azimuth_offset_xy_segy) + + return ex, output_vds + + +def test_azimuth_channel_descriptor(azimuth_from_azimuth_executor): + ex, output_vds = azimuth_from_azimuth_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + mutes_channel = layout.getChannelIndex("Azimuth") + assert mutes_channel > 0 + + +def test_azimuth_range_from_azimuth(azimuth_from_azimuth_executor): + ex, output_vds = azimuth_from_azimuth_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + # check axis dimensions + + assert layout.getDimensionMin(0) == 0 + assert layout.getDimensionMax(0) == 3100 + assert layout.getDimensionNumSamples(0) == 1551 + + assert layout.getDimensionMin(1) == 1 + assert layout.getDimensionMax(1) == 107 + assert layout.getDimensionNumSamples(1) == 107 + + assert layout.getDimensionMin(2) == 495 + assert layout.getDimensionMax(2) == 2050 + assert layout.getDimensionNumSamples(2) == 1556 + + assert layout.getDimensionMin(3) == 1000 + assert layout.getDimensionMax(3) == 1000 + assert layout.getDimensionNumSamples(3) == 1 + + azimuth_channel = layout.getChannelIndex("Azimuth") + assert azimuth_channel > 0 + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + + for dim3 in range(layout.getDimensionNumSamples(3)): + # read one inline of trace flags and azimuths + + voxel_min = (0, 0, 0, dim3, 0, 0) + trace_voxel_max = (1, dim1_size, dim2_size, dim3 + 1, 1, 1) + + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + azimuth_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=azimuth_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + trace_data = trace_flag_request.data.reshape(dim2_size, dim1_size) + azimuth_data = azimuth_request.data.reshape(dim2_size, dim1_size) + + for dim2 in range(dim2_size): + for dim1 in range(dim1_size): + if trace_data[dim2, dim1] > 0: + azimuth_value = azimuth_data[dim2, dim1] + assert 1.0 <= azimuth_value <= 107.0,\ + f"azimuth value {azimuth_value} dim1 {dim1} dim2 {dim2} dim3 {dim3}" + + +def test_azimuth_range_from_offset_xy(azimuth_from_offset_xy_executor): + ex, output_vds = azimuth_from_offset_xy_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + # check axis dimensions + + assert layout.getDimensionMin(0) == 0 + assert layout.getDimensionMax(0) == 7000 + assert layout.getDimensionNumSamples(0) == 1751 + + assert layout.getDimensionMin(1) == 1 + assert layout.getDimensionMax(1) == 280 + assert layout.getDimensionNumSamples(1) == 280 + + assert layout.getDimensionMin(2) == 4715 + assert layout.getDimensionMax(2) == 4715 + assert layout.getDimensionNumSamples(2) == 1 + + assert layout.getDimensionMin(3) == 6117 + assert layout.getDimensionMax(3) == 6163 + assert layout.getDimensionNumSamples(3) == 47 + + azimuth_channel = layout.getChannelIndex("Azimuth") + assert azimuth_channel > 0 + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + + for dim3 in range(layout.getDimensionNumSamples(3)): + # read one inline of trace flags and azimuths + + voxel_min = (0, 0, 0, dim3, 0, 0) + trace_voxel_max = (1, dim1_size, dim2_size, dim3 + 1, 1, 1) + + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + azimuth_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=azimuth_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + trace_data = trace_flag_request.data.reshape(dim2_size, dim1_size) + azimuth_data = azimuth_request.data.reshape(dim2_size, dim1_size) + + for dim2 in range(dim2_size): + for dim1 in range(dim1_size): + if trace_data[dim2, dim1] > 0: + azimuth_value = azimuth_data[dim2, dim1] + assert 3.691 <= azimuth_value <= 356.31, \ + f"azimuth value {azimuth_value} dim1 {dim1} dim2 {dim2} dim3 {dim3}" + + +def test_samples_with_azimuth(azimuth_from_azimuth_executor): + """Tests that samples can be read when the azimuth channel is enabled""" + ex, output_vds = azimuth_from_azimuth_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + # we're not going to read azimuths, but we require that the azimuth channel is present + azimuth_channel = layout.getChannelIndex("Azimuth") + assert azimuth_channel > 0 + + for dim3 in range(layout.getDimensionNumSamples(3)): + for dim2 in range(layout.getDimensionNumSamples(2)): + voxel_min = (0, 0, dim2, dim3, 0, 0) + voxel_max = (layout.getDimensionNumSamples(0), layout.getDimensionNumSamples(1), dim2 + 1, dim3 + 1, 1, 1) + trace_voxel_max = (1, layout.getDimensionNumSamples(1), dim2 + 1, dim3 + 1, 1, 1) + + sample_request = access_manager.requestVolumeSubset(voxel_min, + voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32) + trace_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8) + + sample_data = sample_request.data.reshape(layout.getDimensionNumSamples(1), + layout.getDimensionNumSamples(0)) + for dim1 in range(layout.getDimensionNumSamples(1)): + if trace_request.data[dim1] > 0: + total = np.sum(np.abs(sample_data[dim1, :])) + assert total > 0.0, f"zero-data trace at dim1 {dim1} dim2 {dim2} dim3 {dim3}" diff --git a/tests/tools/SEGYImport/pytests/test_crossline_sorted.py b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py new file mode 100644 index 0000000000000000000000000000000000000000..b77f859007490fdf7d9a3b0452a9323c5532bfb1 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_crossline_sorted.py @@ -0,0 +1,322 @@ +from pathlib import Path +from typing import List, Tuple + +import openvds +import pytest +import os + +from segyimport_test_config import ImportExecutor, TempVDSGuard, segyimport_test_data_dir, teleport_test_data_dir, \ + platform_integration_test_data_dir, volve_data_dir + + +def construct_crossline_executor(segy_filename: str, output_vds: TempVDSGuard, + additional_args: List[str] = None) -> ImportExecutor: + ex = ImportExecutor() + + ex.add_args(["--primary-key", "CrosslineNumber"]) + ex.add_args(["--secondary-key", "InlineNumber"]) + ex.add_args(["--vdsfile", output_vds.filename]) + if additional_args: + ex.add_args(additional_args) + ex.add_arg(segy_filename) + + return ex + + +@pytest.fixture +def crossline_output_vds() -> TempVDSGuard: + return TempVDSGuard("import_crossline_test") + + +@pytest.fixture +def conventional_output_vds() -> TempVDSGuard: + return TempVDSGuard("import_conventional_test") + + +@pytest.fixture +def teleport_conventional_segy(teleport_test_data_dir) -> str: + return os.path.join(teleport_test_data_dir, "Teleport_Trim", "3D_Stack", "ST0202R08_TIME.segy") + + +@pytest.fixture +def teleport_crossline_sorted_segy(segyimport_test_data_dir) -> str: + return os.path.join(segyimport_test_data_dir, "CrosslineSorted", "ST0202R08_TIME_crossline_sorted.segy") + + +@pytest.fixture +def volve_crossline_sorted_segy(volve_data_dir) -> str: + return os.path.join(volve_data_dir, "ST0202", "Stacks", "ST0202R08_PS_PSDM_FULL_OFFSET_PP_TIME.MIG_FIN.POST_STACK.3D.JS-017534_crossline_sorted.segy") + + +@pytest.fixture +def teleport_crossline_executor(teleport_crossline_sorted_segy, crossline_output_vds)\ + -> Tuple[ImportExecutor, TempVDSGuard]: + ex = construct_crossline_executor(teleport_crossline_sorted_segy, crossline_output_vds) + return ex, crossline_output_vds + + +@pytest.fixture +def teleport_conventional_executor(teleport_conventional_segy, conventional_output_vds)\ + -> Tuple[ImportExecutor, TempVDSGuard]: + ex = ImportExecutor() + + ex.add_args(["--vdsfile", conventional_output_vds.filename]) + ex.add_arg(teleport_conventional_segy) + + return ex, conventional_output_vds + + +def get_scs_metadata_vectors(layout: openvds.VolumeDataLayout) -> Tuple[Tuple[float, float], Tuple[float, float], + Tuple[float, float]]: + """Convenience method to retrieve survey coordinate system vectors from VDS metadata""" + + origin = layout.getMetadataDoubleVector2(openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + inline_spacing = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + crossline_spacing = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + return origin, inline_spacing, crossline_spacing + + +def test_coordinate_system(teleport_crossline_executor): + """ + Examine the survey coordinate system metadata for a crossline-sorted SEGY. + + Note that these survey coordinate system values should be the same as the values examined in + test_coordinate_system_comparison, but we'll check explicit values here to ensure that the importer + is producing expected values (and not just two sets of incorrect but identical values). + """ + ex, output_vds = teleport_crossline_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + origin, inline_spacing, crossline_spacing = get_scs_metadata_vectors(layout) + + assert origin[0] == pytest.approx(431990.979266) + assert origin[1] == pytest.approx(6348549.7101) + assert inline_spacing[0] == pytest.approx(3.02145) + assert inline_spacing[1] == pytest.approx(12.13037) + assert crossline_spacing[0] == pytest.approx(-12.12871) + assert crossline_spacing[1] == pytest.approx(3.024143) + + +def test_coordinate_system_comparison(teleport_crossline_executor, teleport_conventional_executor): + """ + Compare the survey coordinate system metadata for a crossline-sorted SEGY with the same data stored in a + conventionally-sorted SEGY. + """ + xl_ex, xl_output_vds = teleport_crossline_executor + + result = xl_ex.run() + + assert result == 0, xl_ex.output() + assert Path(xl_output_vds.filename).exists() + + conv_ex, conv_output_vds = teleport_crossline_executor + + result = conv_ex.run() + + assert result == 0, conv_ex.output() + assert Path(conv_output_vds.filename).exists() + + with openvds.open(xl_output_vds.filename, "") as xl_handle,\ + openvds.open(conv_output_vds.filename, "") as conv_handle: + xl_layout = openvds.getLayout(xl_handle) + conv_layout = openvds.getLayout(conv_handle) + + xl_origin, xl_inline, xl_crossline = get_scs_metadata_vectors(xl_layout) + conv_origin, conv_inline, conv_crossline = get_scs_metadata_vectors(conv_layout) + + assert xl_origin == conv_origin + assert xl_inline == conv_inline + assert xl_crossline == conv_crossline + + +def test_samples_comparison(teleport_crossline_executor, teleport_conventional_executor): + xl_ex, xl_output_vds = teleport_crossline_executor + + result = xl_ex.run() + + s = xl_ex.command_line() + assert result == 0, xl_ex.output() + assert Path(xl_output_vds.filename).exists() + + conv_ex, conv_vds = teleport_conventional_executor + + result = conv_ex.run() + + assert result == 0, conv_ex.output() + assert Path(conv_vds.filename).exists() + + with openvds.open(xl_output_vds.filename, "") as handle_xl, openvds.open(conv_vds.filename, "") as handle_conv: + access_manager_xl = openvds.getAccessManager(handle_xl) + layout_xl = openvds.getLayout(handle_xl) + + access_manager_conv = openvds.getAccessManager(handle_conv) + layout_conv = openvds.getLayout(handle_conv) + + num_z = layout_xl.getDimensionNumSamples(0) + num_crossline = layout_xl.getDimensionNumSamples(1) + num_inline = layout_xl.getDimensionNumSamples(2) + + assert num_z == layout_conv.getDimensionNumSamples(0) + assert num_crossline == layout_conv.getDimensionNumSamples(1) + assert num_inline == layout_conv.getDimensionNumSamples(2) + + for inline_index in range(num_inline): + voxel_min = (0, 0, inline_index, 0, 0, 0) + voxel_max = (num_z, num_crossline, inline_index + 1, 1, 1) + + request = access_manager_xl.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + request_conv = access_manager_conv.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + assert len(request.data) == num_z * num_crossline + assert len(request_conv.data) == num_z * num_crossline + + sample_data = request.data.reshape(num_crossline, num_z) + sample_data_conv = request_conv.data.reshape(num_crossline, num_z) + + for crossline_index in range(num_crossline): + for sample_index in range(num_z): + assert sample_data[crossline_index, sample_index] == sample_data_conv[crossline_index, sample_index],\ + f"sample index {sample_index} xl index {crossline_index} il index {inline_index}" + + +poststack_axes = [ + (1477, "Sample", "ms", 0.0, 5904.0), + (101, "Crossline", "unitless", 2000.0, 2100.0), + (101, "Inline", "unitless", 1000.0, 1100.0), + () +] + +prestack_axes = [ + (1477, "Sample", "ms", 0.0, 5904.0), + (60, "Trace (offset)", "unitless", 1.0, 60.0), + (10, "Crossline", "unitless", 2031.0, 2040.0), + (101, "Inline", "unitless", 1000.0, 1100.0) +] + +poststack_survey_vectors = [ + (76276.3367, 949408.127), + (-15.496337, 19.612673), + (19.610, 15.48960) +] + +prestack_survey_vectors = [ + (530768.667, 5788901.760), + (-15.5040, 19.611520), + (19.555556, 15.573333) +] + +poststack_segy_filename = os.path.join(platform_integration_test_data_dir(), "ImporterTests", + "poststack_Xl-inc_In-inc.segy") +prestack_segy_filename = os.path.join(platform_integration_test_data_dir(), "ImporterTests", + "prestack_Xl-inc_In-inc_2031-2040.segy") + + +@pytest.mark.parametrize("test_params", [(poststack_segy_filename, 3, poststack_axes, poststack_survey_vectors), + (prestack_segy_filename, 4, prestack_axes, prestack_survey_vectors)]) +def test_dimensions_and_produce_status(crossline_output_vds, + test_params: Tuple[str, int, List[Tuple[int, str, str, float, float]], + List[Tuple[float, float]]]): + segy_filename, dimensions, axes, scs_vectors = test_params + + additional_args = [ + "--header-field", "InlineNumber=17:4", + "--header-field", "CrosslineNumber=21:4", + "--header-field", "EnsembleXCoordinate=73:4", + "--header-field", "EnsembleYCoordinate=77:4" + ] + if dimensions == 4: + additional_args.append("--prestack") + + ex = construct_crossline_executor(segy_filename, crossline_output_vds, additional_args) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(crossline_output_vds.filename).exists() + + with openvds.open(crossline_output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + + # check produce status + + if dimensions == 4: + # Dim_013 should be available/remapped + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_013) != openvds.VDSProduceStatus.Unavailable + + # These dimensions (and others) should be unavailable + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable + elif dimensions == 3: + # Dim_012 should be available/remapped + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_012) != openvds.VDSProduceStatus.Unavailable + + # These dimensions (and others) should be unavailable + assert access_manager.getVDSProduceStatus( + openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + else: + assert False, f"Invalid number of VDS dimensions: {dimensions}" + + # check axes + + for dim in range(dimensions): + descriptor = layout.getAxisDescriptor(dim) + axis_props = axes[dim] + assert descriptor.getNumSamples() == axis_props[0] + assert descriptor.getName() == axis_props[1] + assert descriptor.getUnit() == axis_props[2] + assert descriptor.getCoordinateMin() == axis_props[3] + assert descriptor.getCoordinateMax() == axis_props[4] + + # check survey coordinate system vectors + + origin, inline_spacing, crossline_spacing = get_scs_metadata_vectors(layout) + assert origin[0] == pytest.approx(scs_vectors[0][0]) + assert origin[1] == pytest.approx(scs_vectors[0][1]) + assert inline_spacing[0] == pytest.approx(scs_vectors[1][0]) + assert inline_spacing[1] == pytest.approx(scs_vectors[1][1]) + assert crossline_spacing[0] == pytest.approx(scs_vectors[2][0]) + assert crossline_spacing[1] == pytest.approx(scs_vectors[2][1]) + + +@pytest.mark.parametrize("disable_crossline_ordering", [False, True]) +def test_volve_crossline_sorted(volve_crossline_sorted_segy, crossline_output_vds, disable_crossline_ordering: bool): + """ + Executes SEGYImport on a large-ish crossline-sorted poststack SEGY from Volve. + + This test is mostly meant to compare execution time with and without the crossline input + re-ordering, so the test itself does minimal checks. + """ + additional_args = [] + if disable_crossline_ordering: + additional_args.append("--disable-crossline-input-reorder") + + ex = construct_crossline_executor(volve_crossline_sorted_segy, crossline_output_vds, additional_args) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(crossline_output_vds.filename).exists() diff --git a/tests/tools/SEGYImport/pytests/test_multi_file_scan.py b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py new file mode 100644 index 0000000000000000000000000000000000000000..4b8126f977b88b5d1d20fd08fa137d7284b0fe3a --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_multi_file_scan.py @@ -0,0 +1,128 @@ +import os +from pathlib import Path +from typing import List + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempScanFileGuard, TempVDSGuard + + +@pytest.fixture +def multi_file_input_base_name() -> str: + return "ST0202R08_Gather_Time_pt" + + +@pytest.fixture +def multi_file_input_parts_count() -> int: + # the multi-file SEGY consists of this many files + return 11 + + +@pytest.fixture +def multi_file_input_dir() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "MultiFile", "ST0202R08_Gather_Time") + + +@pytest.fixture +def multi_file_input_glob(multi_file_input_dir) -> str: + return os.path.join(multi_file_input_dir, "ST0202R08_Gather_Time_pt??.segy") + + +@pytest.fixture +def multi_file_input_files(multi_file_input_parts_count, multi_file_input_base_name, multi_file_input_dir) -> List[str]: + filenames = [] + for i in range(1, multi_file_input_parts_count + 1): + input_name = f"{multi_file_input_base_name}{i:02}.segy" + filenames.append(os.path.join(multi_file_input_dir, input_name)) + return filenames + + +@pytest.fixture +def multi_file_scan_file_info_files(multi_file_input_parts_count, multi_file_input_base_name) -> List[TempScanFileGuard]: + """File info filenames to be output when using --scan""" + guards = [] + for i in range(1, multi_file_input_parts_count + 1): + file_info_base_name = f"{multi_file_input_base_name}{i:02}.segy" + guards.append(TempScanFileGuard(file_info_base_name)) + return guards + + +@pytest.fixture +def multi_file_input_file_info_files(multi_file_input_parts_count, multi_file_input_base_name, multi_file_input_dir) -> List[str]: + """File info filenames to be used when importing""" + filenames = [] + for i in range(1, multi_file_input_parts_count + 1): + file_info_name = f"{multi_file_input_base_name}{i:02}.segy.scan.json" + filenames.append(os.path.join(multi_file_input_dir, file_info_name)) + return filenames + + +def test_multi_file_scan_one_file_info(multi_file_input_glob): + """ + Tests --scan with multiple input SEGY files, but only one file info file specified. + """ + file_info_guard = TempScanFileGuard("multi_single_test") + + ex = ImportExecutor() + ex.add_args(["--prestack", "--scan", "--file-info", file_info_guard.filename, multi_file_input_glob]) + result = ex.run() + + # import should have failed + assert result > 0, ex.output() + + # error message should contain this + assert "Different number of input SEG-Y file names and file-info file names".lower() in ex.output().lower() + + +def test_multi_file_scan(multi_file_input_files, multi_file_scan_file_info_files): + ex = ImportExecutor() + ex.add_args(["--prestack", "--scan"]) + + for scan_file_guard in multi_file_scan_file_info_files: + ex.add_args(["--file-info", scan_file_guard.filename]) + + # ensure the output file doesn't exist + if Path(scan_file_guard.filename).exists(): + os.remove(scan_file_guard.filename) + + ex.add_args(multi_file_input_files) + + result = ex.run() + + # import should have succeeded + assert result == 0, ex.output() + + # output files should exist + for filename in multi_file_scan_file_info_files: + assert Path(scan_file_guard.filename).exists() + + +def test_multi_file_import_with_file_infos(multi_file_input_files, multi_file_input_file_info_files): + ex = ImportExecutor() + ex.add_arg("--prestack") + + vds_guard = TempVDSGuard("import_test") + ex.add_args(["--vdsfile", vds_guard.filename]) + + for filename in multi_file_input_file_info_files: + ex.add_args(["--file-info", filename]) + + ex.add_args(multi_file_input_files) + + result = ex.run() + + # import should have succeeded + assert result == 0, ex.output() + + # output file should exist + assert Path(vds_guard.filename).exists() + + # check dimensions of VDS + with openvds.open(vds_guard.filename, "") as handle: + layout = openvds.getLayout(handle) + assert layout.dimensionality == 4 + assert layout.numSamples[0] == 851 + assert layout.numSamples[1] == 100 + assert layout.numSamples[2] == 71 + assert layout.numSamples[3] == 21 diff --git a/tests/tools/SEGYImport/pytests/test_mutes_channel.py b/tests/tools/SEGYImport/pytests/test_mutes_channel.py new file mode 100644 index 0000000000000000000000000000000000000000..a0d95e976f43c643ae979d10f6ff1271ab8ef879 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_mutes_channel.py @@ -0,0 +1,99 @@ +import os +from pathlib import Path +from typing import Tuple + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_mutes_test") + + +@pytest.fixture +def mutes_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "Mutes", + "ST0202R08_PS_PrSDM_CIP_gathers_in_PP_Time_modified_subset.segy") + + +@pytest.fixture +def mutes_executor(mutes_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for SEGY with mutes""" + ex = ImportExecutor() + + ex.add_arg("--mute") + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(mutes_segy) + + return ex, output_vds + + +def test_mutes_channel_descriptor(mutes_executor): + ex, output_vds = mutes_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + mutes_channel = layout.getChannelIndex("Mute") + assert mutes_channel > 0 + + +def test_mutes_read(mutes_executor): + ex, output_vds = mutes_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + mutes_channel = layout.getChannelIndex("Mute") + assert mutes_channel > 0 + + inline_index = layout.getDimensionNumSamples(3) // 2 + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + dim0_size = layout.getDimensionNumSamples(0) + + for crossline_index in range(dim2_size): + # read one gather of samples and trace flags and mutes + + voxel_min = (0, 0, crossline_index, inline_index, 0, 0) + # voxel_max = (dim0_size, dim1_size, crossline_index + 1, inline_index + 1, 1, 1) + trace_voxel_max = (1, dim1_size, crossline_index + 1, inline_index + 1, 1, 1) + + # request = access_manager.requestVolumeSubset(voxel_min, + # voxel_max, + # channel=0, + # format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + # dimensionsND=openvds.DimensionsND.Dimensions_012) + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + mutes_request = access_manager.requestVolumeSubset(voxel_min, + trace_voxel_max, + channel=mutes_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U16, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + for trace_index in range(dim1_size): + if trace_flag_request.data[trace_index] > 0: + assert 0 <= mutes_request.data[trace_index * 2] < mutes_request.data[trace_index * 2 + 1] diff --git a/tests/tools/SEGYImport/pytests/test_offset_sorted.py b/tests/tools/SEGYImport/pytests/test_offset_sorted.py new file mode 100644 index 0000000000000000000000000000000000000000..077e2b68d190217bba69efb6994d737f172c43a0 --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_offset_sorted.py @@ -0,0 +1,391 @@ +import os +from pathlib import Path +from typing import Tuple + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard, TempScanFileGuard + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_offset_sorted_test") + + +@pytest.fixture +def output_scan() -> TempScanFileGuard: + return TempScanFileGuard("ST0202.segy.test.scan.json") + + +@pytest.fixture +def offset_sorted_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", "ST0202.segy") + + +@pytest.fixture +def offset_sorted_deduped_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", + "ST0202_deduped.segy") + + +@pytest.fixture +def offset_sorted_scan() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", + "ST0202.segy.scan.json") + + +@pytest.fixture +def conventional_sorted_segy() -> str: + return os.path.join(test_data_dir, "Plugins", "ImportPlugins", "SEGYUnittest", "OffsetSorted", + "ST0202_conventional.segy") + + +@pytest.fixture +def conventional_output_vds() -> TempVDSGuard: + return TempVDSGuard("import_conventional_sorted_test") + + +@pytest.fixture +def offset_sorted_executor(offset_sorted_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for offset-sorted SEGY""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(offset_sorted_segy) + + return ex, output_vds + + +@pytest.fixture +def offset_sorted_deduped_executor(offset_sorted_deduped_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for de-duped version of offset-sorted SEGY""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(offset_sorted_deduped_segy) + + return ex, output_vds + + +@pytest.fixture +def conventional_sorted_executor(conventional_sorted_segy, conventional_output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Fixture to setup an ImportExecutor with common options for conventionally sorted SEGY""" + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", conventional_output_vds.filename]) + + ex.add_arg(conventional_sorted_segy) + + return ex, conventional_output_vds + + +def test_produce_status(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_02) == openvds.VDSProduceStatus.Unavailable + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_01) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_03) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_012) == openvds.VDSProduceStatus.Unavailable + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_013) == openvds.VDSProduceStatus.Unavailable + + +def test_survey_coordinate_system(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + # verify that coordinate system metadata exists + assert layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert layout.isMetadataDoubleVector2Available( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + + # verify coordinate system component values + value = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemOrigin().category, + openvds.KnownMetadata.surveyCoordinateSystemOrigin().name) + assert value[0] == pytest.approx(431961.93, abs=0.05) + assert value[1] == pytest.approx(6348554.9, abs=0.05) + + value = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemInlineSpacing().name) + assert value[0] == pytest.approx(6.05, abs=0.05) + assert value[1] == pytest.approx(24.26, abs=0.05) + + value = layout.getMetadataDoubleVector2( + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().category, + openvds.KnownMetadata.surveyCoordinateSystemCrosslineSpacing().name) + assert value[0] == pytest.approx(-12.13, abs=0.05) + assert value[1] == pytest.approx(3.02, abs=0.05) + + +def test_axes(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + + assert 4 == layout.getDimensionality() + + assert 1876 == layout.getDimensionNumSamples(0) + assert 0.0 == layout.getDimensionMin(0) + assert 7500.0 == layout.getDimensionMax(0) + + assert 65 == layout.getDimensionNumSamples(1) + assert 1.0 == layout.getDimensionMin(1) + assert 65.0 == layout.getDimensionMax(1) + + assert 160 == layout.getDimensionNumSamples(2) + assert 1961.0 == layout.getDimensionMin(2) + assert 2120.0 == layout.getDimensionMax(2) + + assert 40 == layout.getDimensionNumSamples(3) + assert 4982.0 == layout.getDimensionMin(3) + assert 5021.0 == layout.getDimensionMax(3) + + assert pytest.approx(-0.137178, abs=1.0e-05) == layout.getChannelValueRangeMin(0) + assert pytest.approx(0.141104, abs=1.0e-05) == layout.getChannelValueRangeMax(0) + + +def test_read_gather(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + dim0_size = layout.getDimensionNumSamples(0) + dim1_size = layout.getDimensionNumSamples(1) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0 + + inline_index = layout.getDimensionNumSamples(3) // 2 + crossline_index = layout.getDimensionNumSamples(2) // 2 + + request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (dim0_size, dim1_size, crossline_index + 1, inline_index + 1, 1, 1), + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_023) + trace_flag_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (1, dim1_size, crossline_index + 1, inline_index + 1, 1, 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_023) + + data = request.data.reshape(dim1_size, dim0_size) + trace_flag_data = trace_flag_request.data + + for dim1 in range(dim1_size): + total = 0 + for dim0 in range(dim0_size): + total += abs(data[dim1, dim0]) + + if trace_flag_data[dim1] == 0: + assert total == 0.0, f"dead trace at {dim1}" + else: + assert total > 0.0, f"trace at {dim1}" + + +@pytest.mark.skip(reason="This test is really inefficient and takes ~100 minutes to run") +def test_compare_with_conventional_sorted(offset_sorted_executor, conventional_sorted_executor): + ex, output_vds = offset_sorted_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + conventional_ex, conventional_vds = conventional_sorted_executor + + result = conventional_ex.run() + + assert result == 0, conventional_ex.output() + assert Path(conventional_vds.filename).exists() + + with openvds.open(output_vds.filename, "") as handle: + with openvds.open(conventional_vds.filename, "") as handle_conv: + access_manager = openvds.getAccessManager(handle) + layout = openvds.getLayout(handle) + + access_manager_conv = openvds.getAccessManager(handle_conv) + layout_conv = openvds.getLayout(handle_conv) + + num_z = layout.getDimensionNumSamples(0) + fold = layout.getDimensionNumSamples(1) + num_crossline = layout.getDimensionNumSamples(2) + num_inline = layout.getDimensionNumSamples(3) + + assert num_z == layout_conv.getDimensionNumSamples(0) + assert fold == layout_conv.getDimensionNumSamples(1) + assert num_crossline == layout_conv.getDimensionNumSamples(2) + assert num_inline == layout_conv.getDimensionNumSamples(3) + + trace_channel = layout.getChannelIndex("Trace") + trace_channel_conv = layout_conv.getChannelIndex("Trace") + offset_channel = layout.getChannelIndex("Offset") + offset_channel_conv = layout_conv.getChannelIndex("Offset") + + assert trace_channel > 0 + assert trace_channel_conv > 0 + assert offset_channel > 0 + assert offset_channel_conv > 0 + + for inline_index in range(num_inline): + for crossline_index in range(num_crossline): + voxel_min = (0, 0, crossline_index, inline_index, 0, 0) + voxel_max = (num_z, fold, crossline_index + 1, inline_index + 1, 1, 1) + trace_voxel_max = (1, fold, crossline_index + 1, inline_index + 1, 1, 1) + + request = access_manager.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_023) + request_conv = access_manager_conv.requestVolumeSubset(voxel_min, voxel_max, + channel=0, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + trace_flag_request = access_manager.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_023) + trace_flag_request_conv = access_manager_conv.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=trace_channel_conv, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + offset_request = access_manager.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=offset_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_023) + offset_request_conv = access_manager_conv.requestVolumeSubset(voxel_min, trace_voxel_max, + channel=offset_channel_conv, + format=openvds.VolumeDataChannelDescriptor.Format.Format_R32, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + sample_data = request.data.reshape(fold, num_z) + sample_data_conv = request_conv.data.reshape(fold, num_z) + + for trace_index in range(fold): + assert trace_flag_request.data[trace_index] == trace_flag_request_conv.data[trace_index],\ + f"trace index {trace_index} xl index {crossline_index} il index {inline_index}" + assert offset_request.data[trace_index] == offset_request_conv.data[trace_index],\ + f"trace index {trace_index} xl index {crossline_index} il index {inline_index}" + + if trace_flag_request.data[trace_index] != 0: + for sample_index in range(num_z): + assert sample_data[trace_index, sample_index] == sample_data_conv[trace_index, sample_index],\ + f"sample index {sample_index} trace index {trace_index} xl index {crossline_index} il index {inline_index}" + + +def test_create_scan_file(offset_sorted_segy, output_scan): + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_arg("--scan") + ex.add_args(["--file-info", output_scan.filename]) + + ex.add_arg(offset_sorted_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_scan.filename).exists() + + # might be nice to load the scan file do some light verification of contents... + + +def test_with_scan_file(offset_sorted_segy, offset_sorted_scan, output_vds): + ex = ImportExecutor() + + ex.add_args(["--header-field", "offset=177:4"]) + + ex.add_arg("--offset-sorted") + ex.add_args(["--file-info", offset_sorted_scan]) + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(offset_sorted_segy) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + # check produce status as a quick test for output VDS validity + with openvds.open(output_vds.filename, "") as handle: + access_manager = openvds.getAccessManager(handle) + + assert access_manager.getVDSProduceStatus(openvds.DimensionsND.Dimensions_023) != openvds.VDSProduceStatus.Unavailable + + +def test_dupe_warning(offset_sorted_executor): + ex, output_vds = offset_sorted_executor + + s = ex.command_line() + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + # check that the output contains the warning message for traces with duplicate offset, inline, crossline key combos + assert "duplicate key combinations" in ex.output() + + +def test_dupe_no_warning(offset_sorted_deduped_executor): + ex, output_vds = offset_sorted_deduped_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + # check that the output contains the warning message for traces with duplicate offset, inline, crossline key combos + assert "duplicate key combinations" not in ex.output() diff --git a/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py new file mode 100644 index 0000000000000000000000000000000000000000..a7f56a7fa3f28c4cd60eeb2ea5ed7c13602275be --- /dev/null +++ b/tests/tools/SEGYImport/pytests/test_prestack_gather_spacing.py @@ -0,0 +1,198 @@ +import os +from pathlib import Path +from typing import Tuple, Union + +import pytest +import openvds + +from segyimport_test_config import test_data_dir, ImportExecutor, TempVDSGuard + + +def construct_respace_executor(output_vds: TempVDSGuard, segy_name: str, respace_setting: Union[str, None]) -> ImportExecutor: + ex = ImportExecutor() + + if respace_setting: + ex.add_args(["--respace-gathers", respace_setting]) + + ex.add_arg("--prestack") + ex.add_args(["--vdsfile", output_vds.filename]) + + ex.add_arg(segy_name) + + return ex + + +@pytest.fixture +def output_vds() -> TempVDSGuard: + return TempVDSGuard("import_mutes_test") + + +@pytest.fixture +def prestack_segy() -> str: + return os.path.join(test_data_dir, "HeadwavePlatform", "PlatformIntegration", "Teleport", "Teleport_Trim", + "3D_Prestack", "ST0202R08_Gather_Time.segy") + + +@pytest.fixture +def off_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with respacing set to Off""" + ex = construct_respace_executor(output_vds, prestack_segy, "Off") + return ex, output_vds + + +@pytest.fixture +def invalid_executor(prestack_segy, output_vds) -> Tuple[ImportExecutor, TempVDSGuard]: + """Setup an ImportExecutor with respacing set to an invalid value""" + ex = construct_respace_executor(output_vds, prestack_segy, "Partial") + return ex, output_vds + + +def get_gathers_stats(vds_filename: str) -> Tuple[int, int, int, int, int, int, int]: + """ + Read all the gathers in a single inline and return stats on how many gathers have dead traces in various + regions of the gather. + + :return tuple of ( + total gathers, + number of gathers with dead traces only at front of gather, + number of gathers with dead traces only within the gather, + number of gathers with dead traces only at end of gather, + number of gathers with dead traces in multiple places, + number of gathers with no dead traces + number of consecutive duplicate offset values within a gather + ) + """ + with openvds.open(vds_filename, "") as handle: + layout = openvds.getLayout(handle) + access_manager = openvds.getAccessManager(handle) + + trace_channel = layout.getChannelIndex("Trace") + assert trace_channel > 0, "Trace channel not found" + + offset_channel = layout.getChannelIndex("Offset") + assert offset_channel > 0, "Offset channel not found" + + # only read one inline; make it the middle inline + inline_index = layout.getDimensionNumSamples(3) // 2 + dim2_size = layout.getDimensionNumSamples(2) + dim1_size = layout.getDimensionNumSamples(1) + + # stats counters + front_only = 0 + middle_only = 0 + end_only = 0 + mixed = 0 + no_dead = 0 + duplicate_offsets = 0 + + for crossline_index in range(dim2_size): + trace_flag_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (1, dim1_size, crossline_index + 1, + inline_index + 1, 1, 1), + channel=trace_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + offset_request = access_manager.requestVolumeSubset((0, 0, crossline_index, inline_index, 0, 0), + (1, dim1_size, crossline_index + 1, inline_index + 1, 1, + 1), + channel=offset_channel, + format=openvds.VolumeDataChannelDescriptor.Format.Format_U8, + dimensionsND=openvds.DimensionsND.Dimensions_012) + + assert dim1_size == trace_flag_request.data.shape[0] + assert dim1_size == offset_request.data.shape[0] + + # analyze trace flag and offset data to figure out which stats counters to bump + + dead_trace_ranges = [] + i = 0 + while i < dim1_size: + if trace_flag_request.data[i] == 0: + start = i + i += 1 + while i < dim1_size and trace_flag_request.data[i] == 0: + i += 1 + end = i - 1 + dead_trace_ranges.append((start, end)) + else: + i += 1 + + if len(dead_trace_ranges) == 0: + no_dead += 1 + elif len(dead_trace_ranges) == 1: + # either front, middle, end + start, stop = dead_trace_ranges[0] + if start == 0: + front_only += 1 + elif stop == trace_flag_request.data.shape[0] - 1: + end_only += 1 + else: + middle_only += 1 + else: + mixed += 1 + + # Find any traces with duplicate offset values within the gather + for i in range(1, offset_request.data.shape[0]): + if offset_request.data[i] == offset_request.data[i - 1]: + duplicate_offsets += 1 + + return dim2_size, front_only, middle_only, end_only, mixed, no_dead, duplicate_offsets + + +def test_gather_spacing_invalid_arg(invalid_executor): + ex, output_vds = invalid_executor + + result = ex.run() + + assert result != 0, ex.output() + assert "unknown --respace-gathers option" in ex.output().lower() + + +@pytest.mark.parametrize("respace_option", [None, "Auto", "On"]) +def test_gather_spacing_with_on_variations(prestack_segy, output_vds, respace_option): + """ + Parameterized test for the different ways to execute the importer that all result in having gather + respacing turned On. + """ + ex = construct_respace_executor(output_vds, prestack_segy, respace_option) + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + total, leading_only, middle_only, trailing_only, mixed, no_dead, duplicate_offsets =\ + get_gathers_stats(output_vds.filename) + + assert total == 71 + assert middle_only == 23 + assert mixed == 0 + assert trailing_only == 0 + assert leading_only == 0 + assert no_dead == 48 + + # There traces with duplicate offsets in this data, and the importer must preserve all those traces + assert duplicate_offsets == 40 + + +def test_gather_spacing_off(off_executor): + ex, output_vds = off_executor + + result = ex.run() + + assert result == 0, ex.output() + assert Path(output_vds.filename).exists() + + total, leading_only, middle_only, trailing_only, mixed, no_dead, duplicate_offsets =\ + get_gathers_stats(output_vds.filename) + + assert total == 71 + assert middle_only == 0 + assert mixed == 0 + assert trailing_only == 23 + assert leading_only == 0 + assert no_dead == 48 + + # There traces with duplicate offsets in this data, and the importer must preserve all those traces + assert duplicate_offsets == 40 diff --git a/tools/SEGYImport/CMakeLists.txt b/tools/SEGYImport/CMakeLists.txt index b09f076de9cbbb92c470c8bbfdc765e579ad0edf..71496143174a90f123fc7a6f06b75631fa4b30fb 100644 --- a/tools/SEGYImport/CMakeLists.txt +++ b/tools/SEGYImport/CMakeLists.txt @@ -1,5 +1,7 @@ add_executable(SEGYImport SEGYImport.cpp + TraceInfo2DManager.cpp + TraceInfo2DManager.h ) target_link_libraries(SEGYImport PRIVATE Threads::Threads openvds segyutils jsoncpp_lib_static fmt::fmt) diff --git a/tools/SEGYImport/SEGYImport.cpp b/tools/SEGYImport/SEGYImport.cpp index f45f094a396555c9d76b44ec09eb9d2b0bb48c17..bc0ea3f707e636fb1b95a57a565783eae8efb88f 100644 --- a/tools/SEGYImport/SEGYImport.cpp +++ b/tools/SEGYImport/SEGYImport.cpp @@ -21,6 +21,7 @@ #include "VDS/Hash.h" #include #include +#include "TraceInfo2DManager.h" #include #include @@ -41,6 +42,9 @@ #include #include +#define _USE_MATH_DEFINES +#include + #include #include #include @@ -51,6 +55,8 @@ #include #include +#include +#include #if defined(WIN32) #undef WIN32_LEAN_AND_MEAN // avoid warnings if defined on command line @@ -76,8 +82,26 @@ int64_t GetTotalSystemMemory() long page_size = sysconf(_SC_PAGE_SIZE); return int64_t(pages) * int64_t(page_size); } + +#include #endif +constexpr double METERS_PER_FOOT = 0.3048; + +enum class TraceSpacingByOffset +{ + Off = 0, + On = 1, + Auto = 2 +}; + +enum class PrimaryKeyValue +{ + Other = 0, + InlineNumber = 1, + CrosslineNumber = 2 +}; + inline char asciitolower(char in) { if (in <= 'Z' && in >= 'A') return in - ('Z' - 'z'); @@ -200,7 +224,7 @@ SerializeSEGYHeaderField(SEGY::HeaderField const& headerField) } Json::Value -SerializeSEGYFileInfo(SEGYFileInfo const& fileInfo, const int fileIndex) +SerializeSEGYFileInfo(SEGYFileInfo const& fileInfo, const size_t fileIndex) { Json::Value jsonFileInfo; @@ -215,15 +239,38 @@ SerializeSEGYFileInfo(SEGYFileInfo const& fileInfo, const int fileIndex) jsonFileInfo["primaryKey"] = SerializeSEGYHeaderField(fileInfo.m_primaryKey); jsonFileInfo["secondaryKey"] = SerializeSEGYHeaderField(fileInfo.m_secondaryKey); - Json::Value - jsonSegmentInfoArray(Json::ValueType::arrayValue); - - for (auto const& segmentInfo : fileInfo.m_segmentInfoLists[fileIndex]) + if (fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) { - jsonSegmentInfoArray.append(SerializeSEGYSegmentInfo(segmentInfo)); + Json::Value + jsonOffsetMap; + + for (const auto& entry : fileInfo.m_segmentInfoListsByOffset[fileIndex]) + { + Json::Value + jsonSegmentInfoArray(Json::ValueType::arrayValue); + + for (auto const& segmentInfo : entry.second) + { + jsonSegmentInfoArray.append(SerializeSEGYSegmentInfo(segmentInfo)); + } + + jsonOffsetMap[std::to_string(entry.first)] = jsonSegmentInfoArray; + } + + jsonFileInfo["segmentInfo"] = jsonOffsetMap; } + else + { + Json::Value + jsonSegmentInfoArray(Json::ValueType::arrayValue); + + for (auto const& segmentInfo : fileInfo.m_segmentInfoLists[fileIndex]) + { + jsonSegmentInfoArray.append(SerializeSEGYSegmentInfo(segmentInfo)); + } - jsonFileInfo["segmentInfo"] = jsonSegmentInfoArray; + jsonFileInfo["segmentInfo"] = jsonSegmentInfoArray; + } return jsonFileInfo; } @@ -251,7 +298,12 @@ g_traceHeaderFields = { "inlinenumber", SEGY::TraceHeader::InlineNumberHeaderField }, { "crosslinenumber", SEGY::TraceHeader::CrosslineNumberHeaderField }, { "receiver", SEGY::TraceHeader::ReceiverHeaderField }, - { "offset", SEGY::TraceHeader::OffsetHeaderField } + { "offset", SEGY::TraceHeader::OffsetHeaderField }, + { "offsetx", SEGY::TraceHeader::OffsetXHeaderField }, + { "offsety", SEGY::TraceHeader::OffsetYHeaderField }, + { "azimuth", SEGY::TraceHeader::Azimuth }, + { "mutestarttime", SEGY::TraceHeader::MuteStartTime }, + { "muteendtime", SEGY::TraceHeader::MuteEndTime }, }; std::map @@ -564,29 +616,56 @@ getOrderedSegmentListIndices(SEGYFileInfo const& fileInfo, size_t& globalTotalSe std::vector orderedListIndices; - size_t - longestList = 0; globalTotalSegments = 0; - for (size_t i = 0; i < fileInfo.m_segmentInfoLists.size(); ++i) + + if (fileInfo.IsOffsetSorted()) { - orderedListIndices.push_back(static_cast(i)); - globalTotalSegments += fileInfo.m_segmentInfoLists[i].size(); - if (fileInfo.m_segmentInfoLists[i].size() > fileInfo.m_segmentInfoLists[longestList].size()) + for (int i = 0; i < static_cast(fileInfo.m_segmentInfoListsByOffset.size()); ++i) { - longestList = i; + orderedListIndices.push_back(i); + for (const auto& entry : fileInfo.m_segmentInfoListsByOffset[i]) + { + globalTotalSegments += entry.second.size(); + } } + + // don't need to check for ascending/descending primary key because the map will have offsets (keys) sorted in ascending order + + auto + comparator = [&](int i1, int i2) + { + const auto + & v1 = fileInfo.m_segmentInfoListsByOffset[i1], + & v2 = fileInfo.m_segmentInfoListsByOffset[i2]; + return (*v1.begin()).first < (*v2.begin()).first; + }; + std::sort(orderedListIndices.begin(), orderedListIndices.end(), comparator); } - const bool - isAscending = fileInfo.m_segmentInfoLists[longestList].front().m_binInfoStart.m_inlineNumber <= fileInfo.m_segmentInfoLists[longestList].back().m_binInfoStart.m_inlineNumber; - auto - comparator = [&](int i1, int i2) + else { - const auto - & v1 = fileInfo.m_segmentInfoLists[i1], - & v2 = fileInfo.m_segmentInfoLists[i2]; - return isAscending ? v1.front().m_binInfoStart.m_inlineNumber < v2.front().m_binInfoStart.m_inlineNumber : v2.front().m_binInfoStart.m_inlineNumber < v1.front().m_binInfoStart.m_inlineNumber; - }; - std::sort(orderedListIndices.begin(), orderedListIndices.end(), comparator); + size_t + longestList = 0; + for (size_t i = 0; i < fileInfo.m_segmentInfoLists.size(); ++i) + { + orderedListIndices.push_back(static_cast(i)); + globalTotalSegments += fileInfo.m_segmentInfoLists[i].size(); + if (fileInfo.m_segmentInfoLists[i].size() > fileInfo.m_segmentInfoLists[longestList].size()) + { + longestList = i; + } + } + const bool + isAscending = fileInfo.m_segmentInfoLists[longestList].front().m_binInfoStart.m_inlineNumber <= fileInfo.m_segmentInfoLists[longestList].back().m_binInfoStart.m_inlineNumber; + auto + comparator = [&](int i1, int i2) + { + const auto + & v1 = fileInfo.m_segmentInfoLists[i1], + & v2 = fileInfo.m_segmentInfoLists[i2]; + return isAscending ? v1.front().m_binInfoStart.m_inlineNumber < v2.front().m_binInfoStart.m_inlineNumber : v2.front().m_binInfoStart.m_inlineNumber < v1.front().m_binInfoStart.m_inlineNumber; + }; + std::sort(orderedListIndices.begin(), orderedListIndices.end(), comparator); + } return orderedListIndices; } @@ -594,6 +673,8 @@ getOrderedSegmentListIndices(SEGYFileInfo const& fileInfo, size_t& globalTotalSe SEGYSegmentInfo const& findRepresentativeSegment(SEGYFileInfo const& fileInfo, int& primaryStep, int& bestListIndex) { + assert(!fileInfo.IsOffsetSorted()); + // Since we give more weight to segments near the center of the data we need a sorted index of segment lists so that we can // traverse the lists in data order, instead of the arbitrary order given by the filename ordering. size_t @@ -660,6 +741,97 @@ findRepresentativeSegment(SEGYFileInfo const& fileInfo, int& primaryStep, int& b return fileInfo.m_segmentInfoLists[bestListIndex][bestIndex]; } +// Analog of findRepresentativeSegment for offset-sorted prestack +int +findRepresentativePrimaryKey(SEGYFileInfo const& fileInfo, int& primaryStep) +{ + // If primary keys are reverse-sorted in the file the structures we build below will hide that. + // For the computations we're doing in this method it's OK that we're not preserving the original order. + + // scan segment infos to get primary key values and total trace count per primary key + std::map + primaryKeyLengths; + for (const auto& offsetMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetMap) + { + for (const auto& segmentInfo : entry.second) + { + const auto + segmentLength = segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; + const auto + newSum = segmentLength + primaryKeyLengths[segmentInfo.m_primaryKey]; + primaryKeyLengths[segmentInfo.m_primaryKey] = newSum; + } + } + } + + // determine primary step + primaryStep = 0; + int + segmentPrimaryStep = 0, + previousPrimaryKey = 0, + firstPrimaryKey = 0, + lastPrimaryKey = 0; + bool + firstSegment = true; + for (const auto& entry : primaryKeyLengths) + { + if (firstSegment) + { + firstSegment = false; + previousPrimaryKey = entry.first; + firstPrimaryKey = entry.first; + lastPrimaryKey = entry.first; + } + else + { + lastPrimaryKey = entry.first; + + // Updating the primary step with the step for the previous segment intentionally ignores the step of the last segment since it can be anomalous + if (segmentPrimaryStep && (!primaryStep || std::abs(segmentPrimaryStep) < std::abs(primaryStep))) + { + primaryStep = segmentPrimaryStep; + } + + segmentPrimaryStep = entry.first - previousPrimaryKey; + previousPrimaryKey = entry.first; + } + } + + // If the primary step couldn't be determined, set it to the last step or 1 + primaryStep = primaryStep ? primaryStep : std::max(segmentPrimaryStep, 1); + + float + bestScore = 0.0f; + int + bestPrimaryKey = 0; + + for (const auto& entry : primaryKeyLengths) + { + const auto& + numTraces = entry.second; + + // index of this segment within the entirety of segments from all input files + const auto + factor = abs(static_cast(entry.first - firstPrimaryKey) / static_cast(lastPrimaryKey - firstPrimaryKey)); + + const auto + multiplier = 1.5f - abs(factor - 0.5f); // give 50% more importance to a segment in the middle of the dataset + + const auto + score = static_cast(numTraces) * multiplier; + + if (score > bestScore) + { + bestScore = score; + bestPrimaryKey = entry.first; + } + } + + return bestPrimaryKey; +} + void copySamples(const void* data, SEGY::BinaryHeader::DataSampleFormatCode dataSampleFormatCode, SEGY::Endianness endianness, float* target, int sampleStart, int sampleCount) { @@ -700,19 +872,57 @@ copySamples(const void* data, SEGY::BinaryHeader::DataSampleFormatCode dataSampl } } +void +updateValueRangeHeaps(const SEGYFileInfo& fileInfo, const int heapSizeMax, std::vector& minHeap, std::vector& maxHeap, const void * data, std::vector& sampleBuffer) +{ + if (fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IBMFloat || fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IEEEFloat) + { + sampleBuffer.resize(fileInfo.m_sampleCount); + copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, sampleBuffer.data(), 0, fileInfo.m_sampleCount); + + for (int sample = 0; sample < fileInfo.m_sampleCount; sample++) + { + if (int(minHeap.size()) < heapSizeMax) + { + minHeap.push_back(sampleBuffer[sample]); + std::push_heap(minHeap.begin(), minHeap.end(), std::less()); + } + else if (sampleBuffer[sample] < minHeap[0]) + { + std::pop_heap(minHeap.begin(), minHeap.end(), std::less()); + minHeap.back() = sampleBuffer[sample]; + std::push_heap(minHeap.begin(), minHeap.end(), std::less()); + } + + if (int(maxHeap.size()) < heapSizeMax) + { + maxHeap.push_back(sampleBuffer[sample]); + std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + } + else if (sampleBuffer[sample] > maxHeap[0]) + { + std::pop_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + maxHeap.back() = sampleBuffer[sample]; + std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + } + } + } +} + bool -analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, int& offsetStart, int& offsetEnd, int& offsetStep, bool jsonOutput, OpenVDS::Error& error) +analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSegmentInfo const& segmentInfo, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, const SEGY::SEGYType segyType, TraceInfo2DManager * pTraceInfo2DManager, TraceSpacingByOffset& traceSpacingByOffset, std::vector& offsetValues, bool jsonOutput, OpenVDS::Error& error) { assert(segmentInfo.m_traceStop >= segmentInfo.m_traceStart && "A valid segment info should always have a stop trace greater or equal to the start trace"); + assert(pTraceInfo2DManager != nullptr); + + pTraceInfo2DManager->Clear(); bool success = true; valueRange = OpenVDS::FloatRange(0.0f, 1.0f); secondaryStep = 0; fold = 1; - offsetStart = 0; - offsetEnd = 0; - offsetStep = 0; + offsetValues.clear(); const int traceByteSize = fileInfo.TraceByteSize(); @@ -729,16 +939,12 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg maxHeap.reserve(heapSizeMax); // Allocate sample buffer for converting samples to float - std::unique_ptr sampleBuffer(new float[fileInfo.m_sampleCount]); - float* samples = sampleBuffer.get(); + std::vector sampleBuffer(fileInfo.m_sampleCount); // Determine fold and secondary step int gatherSecondaryKey = 0, gatherFold = 0, gatherSecondaryStep = 0; - bool - hasPreviousGatherOffset = false; - int - previousGatherOffset = 0; + std::map> gatherOffsetValues; for (int64_t trace = segmentInfo.m_traceStart; trace <= segmentInfo.m_traceStop; trace++) { @@ -747,7 +953,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg traceBufferStart = trace; traceBufferSize = (segmentInfo.m_traceStop - trace + 1) < 1000 ? int(segmentInfo.m_traceStop - trace + 1) : 1000; - buffer.reset(new char[traceByteSize * traceBufferSize]); + buffer.reset(new char[static_cast(traceByteSize) * traceBufferSize]); int64_t offset = SEGY::TextualFileHeaderSize + SEGY::BinaryFileHeaderSize + traceByteSize * traceBufferStart; success = dataProvider.Read(buffer.get(), offset, traceByteSize * traceBufferSize, error); @@ -760,7 +966,7 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg const void *header = buffer.get() + traceByteSize * (trace - traceBufferStart); const void *data = buffer.get() + traceByteSize * (trace - traceBufferStart) + SEGY::TraceHeaderSize; - int tracePrimaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness); + int tracePrimaryKey = fileInfo.Is2D() ? 0 : SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness); int traceSecondaryKey = fileInfo.IsUnbinned() ? static_cast(trace - segmentInfo.m_traceStart + 1) : SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); if(tracePrimaryKey != segmentInfo.m_primaryKey) @@ -769,6 +975,8 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg continue; } + pTraceInfo2DManager->AddTraceInfo(static_cast(header)); + if(gatherFold > 0 && traceSecondaryKey == gatherSecondaryKey) { gatherFold++; @@ -792,73 +1000,84 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg if (fileInfo.HasGatherOffset()) { - auto + const auto thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - if (hasPreviousGatherOffset) - { - offsetStart = std::min(offsetStart, thisOffset); - offsetEnd = std::max(offsetEnd, thisOffset); - if (thisOffset != previousGatherOffset) - { - offsetStep = std::min(offsetStep, std::abs(thisOffset - previousGatherOffset)); - } - } - else + gatherOffsetValues[gatherSecondaryKey].push_back(thisOffset); + } + + // Update value range + updateValueRangeHeaps(fileInfo, heapSizeMax, minHeap, maxHeap, data, sampleBuffer); + } + + if (fileInfo.HasGatherOffset() && !fileInfo.IsUnbinned() && !gatherOffsetValues.empty()) + { + // use the longest gather in the segment to populate the list of offset values + + // find the longest gather + int + longestSecondaryKey = (*gatherOffsetValues.begin()).first; + + for (const auto& entry : gatherOffsetValues) + { + if (entry.second.size() > gatherOffsetValues[longestSecondaryKey].size()) { - offsetStart = thisOffset; - offsetEnd = thisOffset; - offsetStep = INT32_MAX; - hasPreviousGatherOffset = true; + longestSecondaryKey = entry.first; } - previousGatherOffset = thisOffset; } - // Update value range - if (fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IBMFloat || fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode::IEEEFloat) + // copy the longest gather's offset values to the method arg + const auto + & longestGatherValues = gatherOffsetValues[longestSecondaryKey]; + offsetValues.assign(longestGatherValues.begin(), longestGatherValues.end()); + + if (traceSpacingByOffset != TraceSpacingByOffset::Off) { - copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, samples, 0, fileInfo.m_sampleCount); + // Determine if offset values are suitable for computed respacing: Values are monotonically increasing + size_t + numSorted = 0; - for (int sample = 0; sample < fileInfo.m_sampleCount; sample++) + // check each gather to see if offsets are ordered within every gather + for (const auto& entry : gatherOffsetValues) { - if (int(minHeap.size()) < heapSizeMax) + const auto + & offsets = entry.second; + if (std::is_sorted(offsets.begin(), offsets.end())) { - minHeap.push_back(samples[sample]); - std::push_heap(minHeap.begin(), minHeap.end(), std::less()); + ++numSorted; } - else if (samples[sample] < minHeap[0]) + else { - std::pop_heap(minHeap.begin(), minHeap.end(), std::less()); - minHeap.back() = samples[sample]; - std::push_heap(minHeap.begin(), minHeap.end(), std::less()); + // if a gather is unordered then we're done + break; } + } - if (int(maxHeap.size()) < heapSizeMax) - { - maxHeap.push_back(samples[sample]); - std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); - } - else if (samples[sample] > maxHeap[0]) + if (numSorted == gatherOffsetValues.size()) + { + // we can do respacing + if (traceSpacingByOffset == TraceSpacingByOffset::Auto) { - std::pop_heap(maxHeap.begin(), maxHeap.end(), std::greater()); - maxHeap.back() = samples[sample]; - std::push_heap(maxHeap.begin(), maxHeap.end(), std::greater()); + // change the parameter to On to indicate that respacing is in effect + traceSpacingByOffset = TraceSpacingByOffset::On; } } - } - } + else if (traceSpacingByOffset == TraceSpacingByOffset::Auto) + { + // the offset spacing doesn't conform to our needs for respacing, so force the setting from Auto to Off + traceSpacingByOffset = TraceSpacingByOffset::Off; + } + else + { + // traceSpacingByOffset is set to On, but detected offset values aren't compatible with the respacing scheme - if (fileInfo.HasGatherOffset() && !fileInfo.IsUnbinned()) - { - // check that offset start/end/step is consistent - if (offsetStart + (fold - 1) * offsetStep != offsetEnd) - { - const auto - msgFormat = "The detected gather offset start/end/step of '{0}/{1}/{2}' is not consistent with the detected fold of '{3}'. This usually indicates using the wrong header format for the input dataset.\n."; - error.string = fmt::format(msgFormat, offsetStart, offsetEnd, offsetStep, fold); - error.code = -1; - return false; + auto msg = "The detected gather offset values are not monotonically increasing. This may indicate an incorrect header format for the trace header Offset field and/or an incorrect option value for --respace-gathers."; + OpenVDS::printError(jsonOutput, "analyzeSegment", msg); + return false; + return false; + } } + } // If the secondary step couldn't be determined, set it to the last step or 1 @@ -882,134 +1101,539 @@ analyzeSegment(DataProvider &dataProvider, SEGYFileInfo const& fileInfo, SEGYSeg return success; } +// Analog of analyzeSegment for offset-sorted SEGY bool -createSEGYMetadata(DataProvider &dataProvider, SEGYFileInfo const &fileInfo, OpenVDS::MetadataContainer& metadataContainer, SEGY::BinaryHeader::MeasurementSystem &measurementSystem, OpenVDS::Error& error) +analyzePrimaryKey(const std::vector& dataProviders, SEGYFileInfo const& fileInfo, const int primaryKey, float valueRangePercentile, OpenVDS::FloatRange& valueRange, int& fold, int& secondaryStep, std::vector& offsetValues, bool jsonOutput, OpenVDS::Error& error) { - std::vector textHeader(SEGY::TextualFileHeaderSize); - std::vector binaryHeader(SEGY::BinaryFileHeaderSize); + assert(fileInfo.IsOffsetSorted()); - // Read headers - bool success = dataProvider.Read(textHeader.data(), 0, SEGY::TextualFileHeaderSize, error) && - dataProvider.Read(binaryHeader.data(), SEGY::TextualFileHeaderSize, SEGY::BinaryFileHeaderSize, error); + // TODO what kind of sanity checking should we do on offset-sorted segments? + //if (segmentInfo.m_traceStop < segmentInfo.m_traceStart) + //{ + // m_errorString = "Invalid parameters, a valid segment info should always have a stop trace greater than or equal to the start trace"; + // VDSImportSEGY::pLoggingInterface->LogError("VDSImportSEGY::analyzeSegment", m_errorString); + // return false; + //} - if (!success) return false; + bool + success = true; - // Create metadata - metadataContainer.SetMetadataBLOB("SEGY", "TextHeader", textHeader); + valueRange = OpenVDS::FloatRange(0.0f, 1.0f); + secondaryStep = 0; + fold = 1; + offsetValues.clear(); - metadataContainer.SetMetadataBLOB("SEGY", "BinaryHeader", binaryHeader); + if (fileInfo.m_segmentInfoListsByOffset.empty() || fileInfo.m_segmentInfoListsByOffset.front().empty()) + { + // no data to analyze + return true; + } - metadataContainer.SetMetadataInt("SEGY", "Endianness", int(fileInfo.m_headerEndianness)); - metadataContainer.SetMetadataInt("SEGY", "DataSampleFormatCode", int(fileInfo.m_dataSampleFormatCode)); + // get all offset values from all files + std::set + globalOffsets; + for (const auto& offsetSegments : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetSegments) + { + globalOffsets.insert(entry.first); + } + } - measurementSystem = SEGY::BinaryHeader::MeasurementSystem(SEGY::ReadFieldFromHeader(binaryHeader.data(), SEGY::BinaryHeader::MeasurementSystemHeaderField, fileInfo.m_headerEndianness)); - return success; -} + fold = static_cast(globalOffsets.size()); -void -createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem measurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer) -{ - if (fileInfo.m_segmentInfoLists.empty()) return; + offsetValues.assign(globalOffsets.begin(), globalOffsets.end()); - double inlineSpacing[2] = { 0, 0 }; - double crosslineSpacing[2] = { 0, 0 }; + const int32_t + traceByteSize = fileInfo.TraceByteSize(); - size_t - globalTotalSegments; - auto - orderedListIndices = getOrderedSegmentListIndices(fileInfo, globalTotalSegments); + int64_t + traceBufferStart = 0, + traceBufferSize = 0; - // Determine crossline spacing - int countedCrosslineSpacings = 0; + std::vector + buffer; - for (size_t listIndex : orderedListIndices) + // 1. Find all the segments in all files that match the primary key + // 2. Get a total trace count of all the segments; pass trace count to value range estimator + std::map>> + providerSegments; + int64_t + primaryKeyTraceCount = 0; + for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoListsByOffset.size(); ++fileIndex) { - const auto - & segmentInfoList = fileInfo.m_segmentInfoLists[listIndex]; - - for (auto const& segmentInfo : segmentInfoList) + const auto& + offsetSegments = fileInfo.m_segmentInfoListsByOffset[fileIndex]; + for (const auto& entry : offsetSegments) { - int crosslineCount = segmentInfo.m_binInfoStop.m_crosslineNumber - segmentInfo.m_binInfoStart.m_crosslineNumber; + for (const auto& segmentInfo : entry.second) + { + if (segmentInfo.m_primaryKey == primaryKey) + { + providerSegments[fileIndex].push_back(std::cref(segmentInfo)); + primaryKeyTraceCount += segmentInfo.m_traceStop - segmentInfo.m_traceStart + 1; + } + } + } + } - if (crosslineCount == 0 || segmentInfo.m_binInfoStart.m_inlineNumber != segmentInfo.m_binInfoStop.m_inlineNumber) continue; + // Create min/max heaps for determining value range + int heapSizeMax = int(((100.0f - valueRangePercentile) / 100.0f) * primaryKeyTraceCount * fileInfo.m_sampleCount / 2) + 1; - double segmentCrosslineSpacing[3]; + std::vector minHeap, maxHeap; - segmentCrosslineSpacing[0] = (segmentInfo.m_binInfoStop.m_ensembleXCoordinate - segmentInfo.m_binInfoStart.m_ensembleXCoordinate) / crosslineCount; - segmentCrosslineSpacing[1] = (segmentInfo.m_binInfoStop.m_ensembleYCoordinate - segmentInfo.m_binInfoStart.m_ensembleYCoordinate) / crosslineCount; + minHeap.reserve(heapSizeMax); + maxHeap.reserve(heapSizeMax); - crosslineSpacing[0] += segmentCrosslineSpacing[0]; - crosslineSpacing[1] += segmentCrosslineSpacing[1]; + // Allocate sample buffer for converting samples to float + std::vector sampleBuffer(fileInfo.m_sampleCount); - countedCrosslineSpacings++; - } - } + // For secondary step need to scan all traces in all segments to get a complete (hopefully) list of secondary keys, then figure out start/end/step. + // We need to get all secondary keys before doing any analysis because we may not encounter them in least-to-greatest order in the offset-sorted segments. + std::set + allSecondaryKeys; - if (countedCrosslineSpacings > 0) + for (const auto& entry : providerSegments) { - crosslineSpacing[0] /= countedCrosslineSpacings; - crosslineSpacing[1] /= countedCrosslineSpacings; - } - else - { - crosslineSpacing[0] = 0; - crosslineSpacing[1] = 1; - } + auto& + dataProvider = dataProviders[entry.first]; + auto& + primaryKeySegments = entry.second; - // Determine inline spacing - SEGYSegmentInfo const& firstSegmentInfo = fileInfo.m_segmentInfoLists[orderedListIndices.front()].front(); - SEGYSegmentInfo const& lastSegmentInfo = fileInfo.m_segmentInfoLists[orderedListIndices.back()].back(); + for (const auto segmentInfoRef : primaryKeySegments) + { + const auto& + segmentInfo = segmentInfoRef.get(); + for (int64_t trace = segmentInfo.m_traceStart; trace <= segmentInfo.m_traceStop; trace++) + { + if (trace - traceBufferStart >= traceBufferSize) + { + traceBufferStart = trace; + traceBufferSize = (segmentInfo.m_traceStop - trace + 1) < 1000 ? int(segmentInfo.m_traceStop - trace + 1) : 1000; - if (firstSegmentInfo.m_binInfoStart.m_inlineNumber != lastSegmentInfo.m_binInfoStart.m_inlineNumber) - { - int inlineNunberDelta = lastSegmentInfo.m_binInfoStart.m_inlineNumber - firstSegmentInfo.m_binInfoStart.m_inlineNumber; - int crosslineNunberDelta = lastSegmentInfo.m_binInfoStart.m_crosslineNumber - firstSegmentInfo.m_binInfoStart.m_crosslineNumber; + buffer.resize(traceByteSize * traceBufferSize); - double offset[2] = { crosslineSpacing[0] * crosslineNunberDelta, - crosslineSpacing[1] * crosslineNunberDelta }; + const int64_t + offset = SEGY::TextualFileHeaderSize + SEGY::BinaryFileHeaderSize + traceByteSize * traceBufferStart; - inlineSpacing[0] = (lastSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - offset[0]) / inlineNunberDelta; - inlineSpacing[1] = (lastSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - offset[1]) / inlineNunberDelta; - } - else - { - // make square voxels - inlineSpacing[0] = crosslineSpacing[1]; - inlineSpacing[1] = -crosslineSpacing[0]; - } + OpenVDS::Error + readError; - // Determine origin - double origin[2]; + success = dataProvider.Read(buffer.data(), offset, (int32_t)(traceByteSize * traceBufferSize), readError); - origin[0] = firstSegmentInfo.m_binInfoStart.m_ensembleXCoordinate; - origin[1] = firstSegmentInfo.m_binInfoStart.m_ensembleYCoordinate; + if (!success) + { + OpenVDS::printError(jsonOutput, "analyzePrimaryKey", error.string); + break; + } + } - origin[0] -= inlineSpacing[0] * firstSegmentInfo.m_binInfoStart.m_inlineNumber; - origin[1] -= inlineSpacing[1] * firstSegmentInfo.m_binInfoStart.m_inlineNumber; + const void + * header = buffer.data() + traceByteSize * (trace - traceBufferStart), + * data = buffer.data() + traceByteSize * (trace - traceBufferStart) + SEGY::TraceHeaderSize; - origin[0] -= crosslineSpacing[0] * firstSegmentInfo.m_binInfoStart.m_crosslineNumber; - origin[1] -= crosslineSpacing[1] * firstSegmentInfo.m_binInfoStart.m_crosslineNumber; + const int + tracePrimaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + traceSecondaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); - // Set coordinate system - metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_ORIGIN, OpenVDS::DoubleVector2(origin[0], origin[1])); - metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_INLINE_SPACING, OpenVDS::DoubleVector2(inlineSpacing[0], inlineSpacing[1])); - metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_CROSSLINE_SPACING, OpenVDS::DoubleVector2(crosslineSpacing[0], crosslineSpacing[1])); + allSecondaryKeys.insert(traceSecondaryKey); - if(!crsWkt.empty()) + if (tracePrimaryKey != segmentInfo.m_primaryKey) + { + auto + warnString = fmt::format("Warning: trace {} has a primary key that doesn't match with the segment. This trace will be ignored.", segmentInfo.m_traceStart + trace); + OpenVDS::printWarning(jsonOutput, "analyzePrimaryKey", warnString); + continue; + } + + // Update value range + updateValueRangeHeaps(fileInfo, heapSizeMax, minHeap, maxHeap, data, sampleBuffer); + } + } + } + + // Determine the secondary step + if (allSecondaryKeys.size() <= 1) { - metadataContainer.SetMetadataString(LATTICE_CATEGORY, CRS_WKT, crsWkt); + secondaryStep = 1; } - if(measurementSystem == SEGY::BinaryHeader::MeasurementSystem::Meters) + else if (allSecondaryKeys.size() == 2) { - metadataContainer.SetMetadataString(LATTICE_CATEGORY, LATTICE_UNIT, KNOWNMETADATA_UNIT_METER); + secondaryStep = *allSecondaryKeys.rbegin() - *allSecondaryKeys.begin(); } - else if(measurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + else { - metadataContainer.SetMetadataString(LATTICE_CATEGORY, LATTICE_UNIT, KNOWNMETADATA_UNIT_FOOT); + auto + secondaryIter = allSecondaryKeys.begin(); + auto + previousSecondary = *secondaryIter; + auto + workingStep = 0; + secondaryStep = 0; + while (++secondaryIter != allSecondaryKeys.end()) + { + // Updating the secondary step with the previously computed step intentionally ignores the step of the final secondary key since it can be anomalous + if (workingStep && (!secondaryStep || std::abs(workingStep) < std::abs(secondaryStep))) + { + secondaryStep = workingStep; + } + + workingStep = *secondaryIter - previousSecondary; + previousSecondary = *secondaryIter; + } + + // If the secondary step couldn't be determined, set it to the last step or 1 + secondaryStep = secondaryStep ? secondaryStep : std::max(workingStep, 1); } -} -///////////////////////////////////////////////////////////////////////////// + // Set value range + if (!minHeap.empty()) + { + assert(!maxHeap.empty()); + + if (minHeap[0] != maxHeap[0]) + { + valueRange = OpenVDS::FloatRange(minHeap[0], maxHeap[0]); + } + else + { + valueRange = OpenVDS::FloatRange(minHeap[0], minHeap[0] + 1.0f); + } + } + + return success; +} + +std::pair +findFirstLastInlineSegmentInfos(const SEGYFileInfo& fileInfo, const std::vector& orderedListIndices) +{ + if (fileInfo.IsOffsetSorted()) + { + // Find least/greatest inline numbers + // Create synthetic segment infos for least/greatest inline using least/greatest crossline numbers on those inlines + // If inline is ascending: return (least-inline segment info, greatest-inline segment info) + // else: return (greatest-inline segment info, least-inline segment info) + + int + minInline = INT32_MAX, + maxInline = -INT32_MAX; + const std::vector + * pLongestList = nullptr; + for (const auto& offsetMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetMap) + { + if (pLongestList == nullptr || (entry.second.size() > pLongestList->size())) + { + pLongestList = &entry.second; + } + for (const auto& segmentInfo : entry.second) + { + minInline = std::min(minInline, segmentInfo.m_primaryKey); + maxInline = std::max(maxInline, segmentInfo.m_primaryKey); + } + } + } + + SEGYSegmentInfo + minSegmentInfo{}, + maxSegmentInfo{}; + minSegmentInfo.m_primaryKey = minInline; + minSegmentInfo.m_binInfoStart = { minInline, INT32_MAX, 0.0, 0.0 }; + minSegmentInfo.m_binInfoStop = { minInline, -INT32_MAX, 0.0, 0.0 }; + maxSegmentInfo.m_primaryKey = maxInline; + maxSegmentInfo.m_binInfoStart = { maxInline, INT32_MAX, 0.0, 0.0 }; + maxSegmentInfo.m_binInfoStop = { maxInline, -INT32_MAX, 0.0, 0.0 }; + + for (const auto& offsetMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetMap) + { + for (const auto& segmentInfo : entry.second) + { + if (segmentInfo.m_primaryKey == minInline) + { + if (segmentInfo.m_binInfoStart.m_crosslineNumber < minSegmentInfo.m_binInfoStart.m_crosslineNumber) + { + minSegmentInfo.m_binInfoStart = segmentInfo.m_binInfoStart; + } + if (segmentInfo.m_binInfoStop.m_crosslineNumber > minSegmentInfo.m_binInfoStop.m_crosslineNumber) + { + minSegmentInfo.m_binInfoStop = segmentInfo.m_binInfoStop; + } + } + if (segmentInfo.m_primaryKey == maxInline) + { + if (segmentInfo.m_binInfoStart.m_crosslineNumber < maxSegmentInfo.m_binInfoStart.m_crosslineNumber) + { + maxSegmentInfo.m_binInfoStart = segmentInfo.m_binInfoStart; + } + if (segmentInfo.m_binInfoStop.m_crosslineNumber > maxSegmentInfo.m_binInfoStop.m_crosslineNumber) + { + maxSegmentInfo.m_binInfoStop = segmentInfo.m_binInfoStop; + } + } + } + } + } + + const bool + isAscending = pLongestList->front().m_primaryKey <= pLongestList->back().m_primaryKey; + + if (isAscending) + { + return { minSegmentInfo, maxSegmentInfo }; + } + return { maxSegmentInfo, minSegmentInfo }; + } + + // else it's not offset-sorted + return { std::cref(fileInfo.m_segmentInfoLists[orderedListIndices.front()].front()), std::cref(fileInfo.m_segmentInfoLists[orderedListIndices.back()].back()) }; +} + +int +BinInfoPrimaryKeyValue(PrimaryKeyValue primaryKey, const SEGYBinInfo& binInfo) +{ + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return binInfo.m_crosslineNumber; + } + return binInfo.m_inlineNumber; +} + +int +BinInfoSecondaryKeyValue(PrimaryKeyValue primaryKey, const SEGYBinInfo& binInfo) +{ + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return binInfo.m_inlineNumber; + } + return binInfo.m_crosslineNumber; +} + +double +ConvertDistance(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, double distance) +{ + // convert SEGY distance to meters + if (segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + { + return distance * METERS_PER_FOOT; + } + + // if measurement system field in header is Meters or Unknown then return the value unchanged + return distance; +} + +float +ConvertDistance(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, float distance) +{ + return static_cast(ConvertDistance(segyMeasurementSystem, static_cast(distance))); +} + +double +ConvertDistanceInverse(SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, double distance) +{ + // convert meters to SEGY distance + if (segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + { + return distance / METERS_PER_FOOT; + } + + // if measurement system field in header is Meters or Unknown then return the value unchanged + return distance; +} + +bool +createSEGYMetadata(DataProvider &dataProvider, SEGYFileInfo const &fileInfo, OpenVDS::MetadataContainer& metadataContainer, SEGY::BinaryHeader::MeasurementSystem &measurementSystem, OpenVDS::Error& error) +{ + std::vector textHeader(SEGY::TextualFileHeaderSize); + std::vector binaryHeader(SEGY::BinaryFileHeaderSize); + + // Read headers + bool success = dataProvider.Read(textHeader.data(), 0, SEGY::TextualFileHeaderSize, error) && + dataProvider.Read(binaryHeader.data(), SEGY::TextualFileHeaderSize, SEGY::BinaryFileHeaderSize, error); + + if (!success) return false; + + // Create metadata + metadataContainer.SetMetadataBLOB("SEGY", "TextHeader", textHeader); + + metadataContainer.SetMetadataBLOB("SEGY", "BinaryHeader", binaryHeader); + + metadataContainer.SetMetadataInt("SEGY", "Endianness", int(fileInfo.m_headerEndianness)); + metadataContainer.SetMetadataInt("SEGY", "DataSampleFormatCode", int(fileInfo.m_dataSampleFormatCode)); + + measurementSystem = SEGY::BinaryHeader::MeasurementSystem(SEGY::ReadFieldFromHeader(binaryHeader.data(), SEGY::BinaryHeader::MeasurementSystemHeaderField, fileInfo.m_headerEndianness)); + return success; +} + +void +createSurveyCoordinateSystemMetadata(SEGYFileInfo const& fileInfo, SEGY::BinaryHeader::MeasurementSystem segyMeasurementSystem, std::string const &crsWkt, OpenVDS::MetadataContainer& metadataContainer, PrimaryKeyValue primaryKey) +{ + if (fileInfo.m_segmentInfoLists.empty() && fileInfo.m_segmentInfoListsByOffset.empty()) return; + + if (fileInfo.Is2D()) return; + + double secondarySpacing[2] = { 0, 0 }; + + size_t + globalTotalSegments; + auto + orderedListIndices = getOrderedSegmentListIndices(fileInfo, globalTotalSegments); + + // Determine secondary spacing + int countedSecondarySpacings = 0; + + auto + secondaryUpdater = [&primaryKey, &countedSecondarySpacings, &secondarySpacing](const std::vector& segmentInfoList) + { + for (auto const& segmentInfo : segmentInfoList) + { + const auto + secondaryCount = BinInfoSecondaryKeyValue(primaryKey, segmentInfo.m_binInfoStop) - BinInfoSecondaryKeyValue(primaryKey, segmentInfo.m_binInfoStart); + + if (secondaryCount == 0 || BinInfoPrimaryKeyValue(primaryKey, segmentInfo.m_binInfoStart) != BinInfoPrimaryKeyValue(primaryKey, segmentInfo.m_binInfoStop)) continue; + + double segmentSecondarySpacing[3]; + + segmentSecondarySpacing[0] = (segmentInfo.m_binInfoStop.m_ensembleXCoordinate - segmentInfo.m_binInfoStart.m_ensembleXCoordinate) / secondaryCount; + segmentSecondarySpacing[1] = (segmentInfo.m_binInfoStop.m_ensembleYCoordinate - segmentInfo.m_binInfoStart.m_ensembleYCoordinate) / secondaryCount; + + secondarySpacing[0] += segmentSecondarySpacing[0]; + secondarySpacing[1] += segmentSecondarySpacing[1]; + + countedSecondarySpacings++; + } + }; + + for (size_t listIndex : orderedListIndices) + { + if (fileInfo.IsOffsetSorted()) + { + for (const auto& offsetSegmentMap : fileInfo.m_segmentInfoListsByOffset) + { + for (const auto& entry : offsetSegmentMap) + { + secondaryUpdater(entry.second); + } + } + } + else + { + secondaryUpdater(fileInfo.m_segmentInfoLists[listIndex]); + } + } + + if (countedSecondarySpacings > 0) + { + secondarySpacing[0] /= countedSecondarySpacings; + secondarySpacing[1] /= countedSecondarySpacings; + } + else + { + secondarySpacing[0] = 0; + secondarySpacing[1] = 1; + } + + // Determine primary spacing + + double primarySpacing[2] = { 0, 0 }; + + const auto + firstLastSegmentInfos = findFirstLastInlineSegmentInfos(fileInfo, orderedListIndices); + const SEGYSegmentInfo + & firstSegmentInfo = firstLastSegmentInfos.first, + & lastSegmentInfo = firstLastSegmentInfos.second; + + if (BinInfoPrimaryKeyValue(primaryKey, firstSegmentInfo.m_binInfoStart) != BinInfoPrimaryKeyValue(primaryKey, lastSegmentInfo.m_binInfoStart)) + { + int primaryNunberDelta = BinInfoPrimaryKeyValue(primaryKey, lastSegmentInfo.m_binInfoStart) - BinInfoPrimaryKeyValue(primaryKey, firstSegmentInfo.m_binInfoStart); + int secondaryNunberDelta = BinInfoSecondaryKeyValue(primaryKey, lastSegmentInfo.m_binInfoStart) - BinInfoSecondaryKeyValue(primaryKey, firstSegmentInfo.m_binInfoStart); + + double offset[2] = { secondarySpacing[0] * secondaryNunberDelta, + secondarySpacing[1] * secondaryNunberDelta }; + + primarySpacing[0] = (lastSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleXCoordinate - offset[0]) / primaryNunberDelta; + primarySpacing[1] = (lastSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - firstSegmentInfo.m_binInfoStart.m_ensembleYCoordinate - offset[1]) / primaryNunberDelta; + } + else + { + if (fileInfo.Is2D()) + { + // Headwave convention is 1, 0 for 2D inline spacing + primarySpacing[0] = ConvertDistanceInverse(segyMeasurementSystem, 1.0); + primarySpacing[1] = ConvertDistanceInverse(segyMeasurementSystem, 0.0); + } + else + { + // make square voxels + primarySpacing[0] = secondarySpacing[1]; + primarySpacing[1] = -secondarySpacing[0]; + } + } + + double + inlineSpacing[2] = {}, + crosslineSpacing[2] = {}; + + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + // map inline to secondary, crossline to primary + inlineSpacing[0] = secondarySpacing[0]; + inlineSpacing[1] = secondarySpacing[1]; + crosslineSpacing[0] = primarySpacing[0]; + crosslineSpacing[1] = primarySpacing[1]; + } + else + { + // map inline to primary, crossline to secondary + inlineSpacing[0] = primarySpacing[0]; + inlineSpacing[1] = primarySpacing[1]; + crosslineSpacing[0] = secondarySpacing[0]; + crosslineSpacing[1] = secondarySpacing[1]; + } + + if (crosslineSpacing[0] == 0.0 && crosslineSpacing[1] == 1.0 && inlineSpacing[0] == 0.0) + { + // Deal with Headwave quirk: If crossline spacing is (0, 1) (i.e. only one crossline) and the inline spacing is (0, anything) then Headwave + // won't display sample data. In this case we'll change the crossline spacing to (-1, 0) (which seems to be how the Headwave SEGY importer + // handles this case). + crosslineSpacing[0] = -1; + crosslineSpacing[1] = 0; + } + + // Determine origin + double origin[2]; + + origin[0] = firstSegmentInfo.m_binInfoStart.m_ensembleXCoordinate; + origin[1] = firstSegmentInfo.m_binInfoStart.m_ensembleYCoordinate; + + origin[0] -= inlineSpacing[0] * firstSegmentInfo.m_binInfoStart.m_inlineNumber; + origin[1] -= inlineSpacing[1] * firstSegmentInfo.m_binInfoStart.m_inlineNumber; + + origin[0] -= crosslineSpacing[0] * firstSegmentInfo.m_binInfoStart.m_crosslineNumber; + origin[1] -= crosslineSpacing[1] * firstSegmentInfo.m_binInfoStart.m_crosslineNumber; + + // Set coordinate system + metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_ORIGIN, OpenVDS::DoubleVector2(ConvertDistance(segyMeasurementSystem, origin[0]), ConvertDistance(segyMeasurementSystem, origin[1]))); + metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_INLINE_SPACING, OpenVDS::DoubleVector2(ConvertDistance(segyMeasurementSystem, inlineSpacing[0]), ConvertDistance(segyMeasurementSystem, inlineSpacing[1]))); + metadataContainer.SetMetadataDoubleVector2(LATTICE_CATEGORY, LATTICE_CROSSLINE_SPACING, OpenVDS::DoubleVector2(ConvertDistance(segyMeasurementSystem, crosslineSpacing[0]), ConvertDistance(segyMeasurementSystem, crosslineSpacing[1]))); + + if(!crsWkt.empty()) + { + metadataContainer.SetMetadataString(LATTICE_CATEGORY, CRS_WKT, crsWkt); + } + if(segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Meters) + { + metadataContainer.SetMetadataString(LATTICE_CATEGORY, LATTICE_UNIT, KNOWNMETADATA_UNIT_METER); + } + else if(segyMeasurementSystem == SEGY::BinaryHeader::MeasurementSystem::Feet) + { + metadataContainer.SetMetadataString(LATTICE_CATEGORY, LATTICE_UNIT, KNOWNMETADATA_UNIT_FOOT); + } +} + +///////////////////////////////////////////////////////////////////////////// bool createImportInformationMetadata(const std::vector &dataProviders, OpenVDS::MetadataContainer& metadataContainer, OpenVDS::Error &error) { @@ -1079,9 +1703,57 @@ createImportInformationMetadata(const std::vector &dataProviders, } bool -parseSEGYFileInfoFile(DataProvider &dataProvider, SEGYFileInfo& fileInfo, OpenVDS::Error &error) +create2DTraceInformationMetadata(const bool is2D, TraceInfo2DManager * pTraceInfo2DManager, OpenVDS::MetadataContainer& metadataContainer, OpenVDS::Error& error) { - int64_t fileSize = dataProvider.Size(error); + if (!is2D || pTraceInfo2DManager == nullptr || pTraceInfo2DManager->Count() == 0) + { + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEPOSITIONS); + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEVERTICALOFFSETS); + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENERGYSOURCEPOINTNUMBERS); + metadataContainer.ClearMetadata(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENSEMBLENUMBERS); + return true; + } + + // create KNOWNMETADATA_TRACECOORDINATES metadata items: KNOWNMETADATA_TRACEPOSITIONS, KNOWNMETADATA_TRACEVERTICALOFFSETS, KNOWNMETADATA_ENERGYSOURCEPOINTNUMBERS, KNOWNMETADATA_ENSEMBLENUMBERS + + const auto + nItems = pTraceInfo2DManager->Count(); + + std::vector + tracePositions, + verticalOffsets; + std::vector + espNumbers, + ensembleNumbers; + + tracePositions.reserve(nItems * 2); + verticalOffsets.reserve(nItems); + espNumbers.reserve(nItems); + ensembleNumbers.reserve(nItems); + + for (int i = 0; i < nItems; ++i) + { + const auto + & item = pTraceInfo2DManager->Get(i); + tracePositions.push_back(item.x); + tracePositions.push_back(item.y); + verticalOffsets.push_back(item.startTime); + espNumbers.push_back(item.energySourcePointNumber); + ensembleNumbers.push_back(item.ensembleNumber); + } + + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEPOSITIONS, tracePositions); + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_TRACEVERTICALOFFSETS, verticalOffsets); + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENERGYSOURCEPOINTNUMBERS, espNumbers); + metadataContainer.SetMetadataBLOB(KNOWNMETADATA_TRACECOORDINATES, KNOWNMETADATA_ENSEMBLENUMBERS, ensembleNumbers); + + return true; +} + +bool +parseSEGYFileInfoFile(const DataProvider& dataProvider, SEGYFileInfo& fileInfo, const size_t fileIndex, const bool isValidateHeader, OpenVDS::Error& error) +{ + const int64_t fileSize = dataProvider.Size(error); if (error.code != 0) { @@ -1129,24 +1801,80 @@ parseSEGYFileInfoFile(DataProvider &dataProvider, SEGYFileInfo& fileInfo, OpenVD return false; } - fileInfo.m_persistentID = strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16); - fileInfo.m_headerEndianness = EndiannessFromJson(jsonFileInfo["headerEndianness"]); - fileInfo.m_dataSampleFormatCode = SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()); - fileInfo.m_sampleCount = jsonFileInfo["sampleCount"].asInt(); - fileInfo.m_startTimeMilliseconds = jsonFileInfo["startTime"].asDouble(); - fileInfo.m_sampleIntervalMilliseconds = jsonFileInfo["sampleInterval"].asDouble(); - fileInfo.m_traceCounts.clear(); - fileInfo.m_traceCounts.push_back(jsonFileInfo["traceCount"].asInt64()); - fileInfo.m_primaryKey = HeaderFieldFromJson(jsonFileInfo["primaryKey"]); - fileInfo.m_secondaryKey = HeaderFieldFromJson(jsonFileInfo["secondaryKey"]); + if (isValidateHeader) + { + const bool + isValid = + fileInfo.m_persistentID == strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16) && + fileInfo.m_headerEndianness == EndiannessFromJson(jsonFileInfo["headerEndianness"]) && + fileInfo.m_dataSampleFormatCode == SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()) && + fileInfo.m_sampleCount == jsonFileInfo["sampleCount"].asInt() && + fileInfo.m_sampleIntervalMilliseconds == jsonFileInfo["sampleInterval"].asDouble() && + fileInfo.m_primaryKey == HeaderFieldFromJson(jsonFileInfo["primaryKey"]) && + fileInfo.m_secondaryKey == HeaderFieldFromJson(jsonFileInfo["secondaryKey"]); + if (!isValid) + { + error.string = "SEGY header data in JSON scan file does not match existing SEGY header data"; + error.code = -1; + return false; + } + } + else + { + fileInfo.m_persistentID = strtoull(jsonFileInfo["persistentID"].asCString(), nullptr, 16); + fileInfo.m_headerEndianness = EndiannessFromJson(jsonFileInfo["headerEndianness"]); + fileInfo.m_dataSampleFormatCode = SEGY::BinaryHeader::DataSampleFormatCode(jsonFileInfo["dataSampleFormatCode"].asInt()); + fileInfo.m_sampleCount = jsonFileInfo["sampleCount"].asInt(); + fileInfo.m_startTimeMilliseconds = jsonFileInfo["startTime"].asDouble(); + fileInfo.m_sampleIntervalMilliseconds = jsonFileInfo["sampleInterval"].asDouble(); + fileInfo.m_traceCounts.clear(); + fileInfo.m_traceCounts.push_back(jsonFileInfo["traceCount"].asInt64()); + fileInfo.m_primaryKey = HeaderFieldFromJson(jsonFileInfo["primaryKey"]); + fileInfo.m_secondaryKey = HeaderFieldFromJson(jsonFileInfo["secondaryKey"]); + } - fileInfo.m_segmentInfoLists.clear(); - fileInfo.m_segmentInfoLists.emplace_back(); - auto& - segmentInfo = fileInfo.m_segmentInfoLists.back(); - for (Json::Value jsonSegmentInfo : jsonFileInfo["segmentInfo"]) + if (fileInfo.m_traceCounts.size() <= fileIndex) + { + fileInfo.m_traceCounts.resize(fileIndex + 1); + } + fileInfo.m_traceCounts[fileIndex] = jsonFileInfo["traceCount"].asInt64(); + + if (fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) + { + if (fileInfo.m_segmentInfoListsByOffset.size() <= fileIndex) + { + fileInfo.m_segmentInfoListsByOffset.resize(fileIndex + 1); + } + auto& + offsetMap = fileInfo.m_segmentInfoListsByOffset[fileIndex]; + offsetMap.clear(); + + const auto& + jsonSegmentInfo = jsonFileInfo["segmentInfo"]; + for (Json::ValueConstIterator iter = jsonSegmentInfo.begin(); iter != jsonSegmentInfo.end(); ++iter) + { + const auto + offsetValue = std::stoi(iter.key().asString()); + auto& + segmentInfoList = offsetMap[offsetValue]; + for (const auto& jsonSegmentInfo : *iter) + { + segmentInfoList.push_back(segmentInfoFromJson(jsonSegmentInfo)); + } + } + } + else { - segmentInfo.push_back(segmentInfoFromJson(jsonSegmentInfo)); + if (fileInfo.m_segmentInfoLists.size() <= fileIndex) + { + fileInfo.m_segmentInfoLists.resize(fileIndex + 1); + } + auto& + segmentInfo = fileInfo.m_segmentInfoLists[fileIndex]; + for (Json::Value jsonSegmentInfo : jsonFileInfo["segmentInfo"]) + { + segmentInfo.push_back(segmentInfoFromJson(jsonSegmentInfo)); + } } } catch (Json::Exception &e) @@ -1178,7 +1906,24 @@ createAxisDescriptors(SEGYFileInfo const& fileInfo, SEGY::SampleUnits sampleUnit axisDescriptors.emplace_back(fileInfo.m_sampleCount, KNOWNMETADATA_SURVEYCOORDINATE_INLINECROSSLINE_AXISNAME_SAMPLE, sampleUnit, (float)fileInfo.m_startTimeMilliseconds, (float)fileInfo.m_startTimeMilliseconds + (fileInfo.m_sampleCount - 1) * (float)fileInfo.m_sampleIntervalMilliseconds); - if (fileInfo.IsUnbinned()) + if (fileInfo.Is2D()) + { + assert(fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1); + + if (fold > 1) + { + axisDescriptors.emplace_back(fold, VDS_DIMENSION_TRACE_NAME(VDS_DIMENSION_TRACE_SORT_OFFSET), KNOWNMETADATA_UNIT_UNITLESS, 1.0f, static_cast(fold)); + } + + const auto + & segmentInfo = fileInfo.m_segmentInfoLists[0][0]; + + // the secondary key axis will be 1..N + const auto + secondaryKeyCount = segmentInfo.m_binInfoStop.m_crosslineNumber - segmentInfo.m_binInfoStart.m_crosslineNumber + 1; + axisDescriptors.emplace_back(secondaryKeyCount, VDS_DIMENSION_CDP_NAME, KNOWNMETADATA_UNIT_UNITLESS, 1.0f, static_cast(secondaryKeyCount)); + } + else if (fileInfo.IsUnbinned()) { int maxTraceNumber = 0; @@ -1229,24 +1974,58 @@ createAxisDescriptors(SEGYFileInfo const& fileInfo, SEGY::SampleUnits sampleUnit axisDescriptors.emplace_back(fold, VDS_DIMENSION_TRACE_NAME(VDS_DIMENSION_TRACE_SORT_OFFSET), KNOWNMETADATA_UNIT_UNITLESS, 1.0f, static_cast(fold)); } - int minInline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_inlineNumber, - minCrossline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_crosslineNumber, - maxInline = minInline, - maxCrossline = minCrossline; + int + minInline, + minCrossline, + maxInline, + maxCrossline; - for (const auto& segmentInfoList : fileInfo.m_segmentInfoLists) + auto updateMinMaxes = [&minInline, &minCrossline, &maxInline, &maxCrossline](const SEGYSegmentInfo& segmentInfo) { - for (const auto& segmentInfo : segmentInfoList) + minInline = std::min(minInline, segmentInfo.m_binInfoStart.m_inlineNumber); + minInline = std::min(minInline, segmentInfo.m_binInfoStop.m_inlineNumber); + maxInline = std::max(maxInline, segmentInfo.m_binInfoStart.m_inlineNumber); + maxInline = std::max(maxInline, segmentInfo.m_binInfoStop.m_inlineNumber); + + minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); + minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); + maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); + maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); + }; + + if (fileInfo.IsOffsetSorted()) + { + const auto& + firstSegment = fileInfo.m_segmentInfoListsByOffset.front().begin()->second.front(); + minInline = firstSegment.m_binInfoStart.m_inlineNumber; + minCrossline = firstSegment.m_binInfoStart.m_crosslineNumber; + maxInline = minInline; + maxCrossline = minCrossline; + + for (const auto& offsetSegments : fileInfo.m_segmentInfoListsByOffset) { - minInline = std::min(minInline, segmentInfo.m_binInfoStart.m_inlineNumber); - minInline = std::min(minInline, segmentInfo.m_binInfoStop.m_inlineNumber); - maxInline = std::max(maxInline, segmentInfo.m_binInfoStart.m_inlineNumber); - maxInline = std::max(maxInline, segmentInfo.m_binInfoStop.m_inlineNumber); + for (const auto& entry : offsetSegments) + { + for (const auto& segmentInfo : entry.second) + { + updateMinMaxes(segmentInfo); + } + } + } + } + else + { + minInline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_inlineNumber; + minCrossline = fileInfo.m_segmentInfoLists[0][0].m_binInfoStart.m_crosslineNumber; + maxInline = minInline; + maxCrossline = minCrossline; - minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); - minCrossline = std::min(minCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); - maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStart.m_crosslineNumber); - maxCrossline = std::max(maxCrossline, segmentInfo.m_binInfoStop.m_crosslineNumber); + for (const auto& segmentInfoList : fileInfo.m_segmentInfoLists) + { + for (const auto& segmentInfo : segmentInfoList) + { + updateMinMaxes(segmentInfo); + } } } @@ -1267,22 +2046,22 @@ createAxisDescriptors(SEGYFileInfo const& fileInfo, SEGY::SampleUnits sampleUnit struct OffsetChannelInfo { - int offsetStart; - int offsetEnd; - int offsetStep; + float offsetStart; + float offsetEnd; + float offsetStep; bool hasOffset; - OffsetChannelInfo(bool has, int start, int end, int step) : offsetStart(start), offsetEnd(end), offsetStep(step), hasOffset(has) {} + OffsetChannelInfo(bool has, float start, float end, float step) : offsetStart(start), offsetEnd(end), offsetStep(step), hasOffset(has) {} }; std::vector -createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const& valueRange, const OffsetChannelInfo& offsetInfo) +createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const& valueRange, const OffsetChannelInfo& offsetInfo, const std::string& attributeName, const std::string& attributeUnit, bool isAzimuthEnabled, bool isMuteEnabled) { std::vector channelDescriptors; // Primary channel - channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, AMPLITUDE_ATTRIBUTE_NAME, "", valueRange.Min, valueRange.Max); + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, attributeName.c_str(), attributeUnit.c_str(), valueRange.Min, valueRange.Max); // Trace defined flag channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_U8, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Trace", "", 0.0f, 1.0f, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::DiscreteData); @@ -1293,11 +2072,21 @@ createChannelDescriptors(SEGYFileInfo const& fileInfo, OpenVDS::FloatRange const if (offsetInfo.hasOffset) { // offset channel - channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Offset", KNOWNMETADATA_UNIT_METER, static_cast(offsetInfo.offsetStart), static_cast(offsetInfo.offsetEnd),OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Offset", KNOWNMETADATA_UNIT_METER, offsetInfo.offsetStart, offsetInfo.offsetEnd, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); // TODO channels for other gather types - "Angle", "Vrms", "Frequency" } + if (isAzimuthEnabled) + { + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_R32, OpenVDS::VolumeDataChannelDescriptor::Components_1, "Azimuth", "deg", FLT_MIN, FLT_MAX, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); + } + + if (isMuteEnabled) + { + channelDescriptors.emplace_back(OpenVDS::VolumeDataChannelDescriptor::Format_U16, OpenVDS::VolumeDataChannelDescriptor::Components_2, "Mute", KNOWNMETADATA_UNIT_MILLISECOND, 0.0f, 65535.0f, OpenVDS::VolumeDataMapping::PerTrace, OpenVDS::VolumeDataChannelDescriptor::NoLossyCompression); + } + return channelDescriptors; } @@ -1404,19 +2193,80 @@ findFirstTrace(TraceDataManager& traceDataManager, int primaryKey, int secondary } int64_t -findFirstTrace(TraceDataManager& traceDataManager, const SEGYSegmentInfo& segment, int secondaryKey, SEGYFileInfo const& fileInfo, OpenVDS::Error error) +findFirstTrace(TraceDataManager& traceDataManager, const SEGYSegmentInfo& segment, int secondaryKey, SEGYFileInfo const& fileInfo, PrimaryKeyValue primaryKey, OpenVDS::Error error) { - // TODO do the info start and stop reflect the start and stop for reverse-sorted inlines, or are they simply min and max? - const int secondaryStart = segment.m_binInfoStart.m_crosslineNumber; - const int secondaryStop = segment.m_binInfoStop.m_crosslineNumber; + const int secondaryStart = BinInfoSecondaryKeyValue(primaryKey, segment.m_binInfoStart); + const int secondaryStop = BinInfoSecondaryKeyValue(primaryKey, segment.m_binInfoStop); return findFirstTrace(traceDataManager, segment.m_primaryKey, secondaryKey, fileInfo, segment.m_traceStart, segment.m_traceStop, secondaryStart, secondaryStop, error); } int -SecondaryKeyDimension(const SEGYFileInfo& fileInfo) +VoxelIndexToDataIndex(const SEGYFileInfo& fileInfo, const int primaryIndex, const int secondaryIndex, const int tertiaryIndex, const int chunkMin[6], const int pitch[6]) { + // The given voxel position consists of the indices for sample, trace (offset), secondary key, primary key. Which of those indices is actually used will depend on the SEGY type. + + // In all cases we assume the sample index is 0, and we won't use it when computing the data index + + if (fileInfo.Is2D()) + { + if (fileInfo.HasGatherOffset()) + { + // use indices for trace, secondary key + return (secondaryIndex - chunkMin[2]) * pitch[2] + (tertiaryIndex - chunkMin[1]) * pitch[1]; + } + // else use index for secondary key + return (secondaryIndex - chunkMin[1]) * pitch[1]; + } + if (fileInfo.Is4D()) + { + // use all the indices + return (primaryIndex - chunkMin[3]) * pitch[3] + (secondaryIndex - chunkMin[2]) * pitch[2] + (tertiaryIndex - chunkMin[1]) * pitch[1]; + } + + // else it's 3D poststack; use the indices for secondary key, primary key + return (primaryIndex - chunkMin[2]) * pitch[2] + (secondaryIndex - chunkMin[1]) * pitch[1]; +} + +int +TraceIndex2DToEnsembleNumber(TraceInfo2DManager * pTraceInfo2DManager, int traceIndex, OpenVDS::Error & error) +{ + assert(pTraceInfo2DManager != nullptr); + + error = {}; + + if (pTraceInfo2DManager == nullptr) + { + error.code = 1; + error.string = "TraceIndex2DToEnsembleNumber: 2D trace information is missing"; + return 0; + } + + assert(traceIndex >= 0); + assert(traceIndex < pTraceInfo2DManager->Count()); + if (traceIndex < 0 || traceIndex >= pTraceInfo2DManager->Count()) + { + error.code = 1; + error.string = "TraceIndex2DToEnsembleNumber: Requested trace index is missing from 2D trace info"; + return 0; + } + + return pTraceInfo2DManager->Get(traceIndex).ensembleNumber; +} + +int +SecondaryKeyDimension(const SEGYFileInfo& fileInfo, PrimaryKeyValue primaryKey) +{ + if ((fileInfo.m_segyType == SEGY::SEGYType::Prestack && primaryKey != PrimaryKeyValue::CrosslineNumber) || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D || fileInfo.m_segyType == SEGY::SEGYType::PrestackOffsetSorted) + { + return 2; + } + if (fileInfo.m_segyType == SEGY::SEGYType::Prestack && primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return 3; + } + if (fileInfo.m_segyType == SEGY::SEGYType::Poststack && primaryKey == PrimaryKeyValue::CrosslineNumber) { return 2; } @@ -1424,14 +2274,180 @@ SecondaryKeyDimension(const SEGYFileInfo& fileInfo) } int -PrimaryKeyDimension(const SEGYFileInfo& fileInfo) +PrimaryKeyDimension(const SEGYFileInfo& fileInfo, PrimaryKeyValue primaryKey) { - if (fileInfo.Is4D()) + if (fileInfo.Is4D() || fileInfo.m_segyType == SEGY::SEGYType::Prestack2D) { + // Prestack2D doesn't really have a primary key dimension, but we'll use the same one as for Prestack as sort of a placeholder. + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return 2; + } return 3; } + if (primaryKey == PrimaryKeyValue::CrosslineNumber) + { + return 1; + } return 2; } + +int +findChannelDescriptorIndex(const std::string& channelName, const std::vector& channelDescriptors) +{ + for (int index = 0; index < static_cast(channelDescriptors.size()); ++index) + { + if (channelName == channelDescriptors[index].GetName()) + { + return index; + } + } + + // not found + return -1; +} + +class GatherSpacing +{ +public: + GatherSpacing(int64_t firstTrace) : m_isRespace(false), m_firstTrace(firstTrace) {} + + GatherSpacing(int64_t firstTrace, std::map traceIndices) : m_isRespace(true), m_firstTrace(firstTrace), m_traceIndices(std::move(traceIndices)) {} + + int GetTertiaryIndex(int64_t traceNumber) + { + if (m_isRespace) + { + assert(traceNumber >= m_firstTrace && traceNumber < m_firstTrace + static_cast(m_traceIndices.size())); + return m_traceIndices[traceNumber]; + } + + assert(traceNumber >= m_firstTrace); + return static_cast(traceNumber - m_firstTrace); + } + +private: + bool m_isRespace; + int64_t m_firstTrace; + std::map + m_traceIndices; +}; + +std::unique_ptr +CalculateGatherSpacing(const SEGYFileInfo& fileInfo, const int fold, const std::vector& globalOffsetValues, TraceDataManager& traceDataManager, TraceSpacingByOffset traceSpacingByOffset, int64_t firstTrace, bool jsonOutput) +{ + if (fileInfo.Is4D() && traceSpacingByOffset != TraceSpacingByOffset::Off) + { + OpenVDS::Error + error; + const char + * header = traceDataManager.getTraceData(firstTrace, error); + if (error.code != 0) + { + return std::unique_ptr(new GatherSpacing(firstTrace)); + } + + const auto + primaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + secondaryKey = SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); + + std::vector + gatherOffsets; + gatherOffsets.reserve(fold); + gatherOffsets.push_back(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness)); + + // read traces and stuff offset into a vector while primaryKey/secondaryKey match + for (int64_t trace = firstTrace + 1; trace < traceDataManager.fileTraceCount() && trace < firstTrace + fold; ++trace) + { + header = traceDataManager.getTraceData(trace, error); + if (error.code != 0) + { + break; + } + const auto + testPrimary = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + testSecondary = SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); + if (testPrimary != primaryKey || testSecondary != secondaryKey) + { + break; + } + + gatherOffsets.push_back(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness)); + } + + // apply respace algorithm + int + gatherIndex = 0, + offsetIndex = 0, + tertiaryIndex = 0; + std::map + traceIndices; + auto + hasRoom = [&fold, &tertiaryIndex, &gatherOffsets, &gatherIndex]() { return (fold - tertiaryIndex) - (gatherOffsets.size() - gatherIndex) > 0; }; + auto + offsetAtIndex = [&globalOffsetValues](int index) { return globalOffsetValues[index]; }; + + // skip tertiary indices before the first trace's offset + while (hasRoom() && offsetIndex < fold && offsetAtIndex(offsetIndex) != gatherOffsets[0]) + { + ++offsetIndex; + ++tertiaryIndex; + } + + // map traces to tertiary indices while we have room for dead traces and we have more input traces + while (hasRoom() && gatherIndex < static_cast(gatherOffsets.size())) + { + // process all the traces whose offset matches the current offset + const auto + currentOffset = offsetAtIndex(offsetIndex); + while (hasRoom() && gatherIndex < static_cast(gatherOffsets.size()) && gatherOffsets[gatherIndex] == currentOffset) + { + traceIndices[firstTrace + gatherIndex] = tertiaryIndex; + ++gatherIndex; + ++tertiaryIndex; + } + + // advance to the next offset + ++offsetIndex; + + // try to advance to tertiary index so that the next trace's offset is placed near-ish to where it might be expected + if (gatherIndex < static_cast(gatherOffsets.size()) && tertiaryIndex <= offsetIndex) + { + while (hasRoom() && gatherOffsets[gatherIndex] != offsetAtIndex(offsetIndex)) + { + ++tertiaryIndex; + ++offsetIndex; + + // hasRoom() will effectively take care of bounds-checking tertiaryIndex, but we need to explicitly check offsetIndex + if (offsetIndex >= fold) + { + // if the trace's offset doesn't match any expected offset values then this is unexpected and may indicate a data problem + const auto + warnString = fmt::format("Warning: Unknown trace header Offset value {} found at primary key {}, secondary key {}. This may indicate corrupt data or an incorrect header format.", gatherOffsets[gatherIndex], primaryKey, secondaryKey); + OpenVDS::printWarning(jsonOutput, "Data", warnString); + + // return a no-respacing GatherSpacing + return std::unique_ptr(new GatherSpacing(firstTrace)); + } + } + } + } + + // map remaining traces to remaining indices + while (gatherIndex < static_cast(gatherOffsets.size())) + { + traceIndices[firstTrace + gatherIndex] = tertiaryIndex; + ++gatherIndex; + ++tertiaryIndex; + } + + return std::unique_ptr(new GatherSpacing(firstTrace, traceIndices)); + } + + // else return a GatherSpacing that doesn't alter the spacing + return std::unique_ptr(new GatherSpacing(firstTrace)); +} + int main(int argc, char* argv[]) { @@ -1441,13 +2457,13 @@ main(int argc, char* argv[]) bool is_tty = isatty(fileno(stdout)); #endif //auto start_time = std::chrono::high_resolution_clock::now(); - cxxopts::Options options("SEGYImport", "SEGYImport - A tool to scan and import a SEG-Y file to a volume data store (VDS)\n\nUse -H or see online documentation for connection string paramters:\nhttp://osdu.pages.community.opengroup.org/platform/domain-data-mgmt-services/seismic/open-vds/connection.html\n"); + cxxopts::Options options("SEGYImport", "SEGYImport - A tool to scan and import a SEG-Y file to a volume data store (VDS)\n\nUse -H or see online documentation for connection string parameters:\nhttp://osdu.pages.community.opengroup.org/platform/domain-data-mgmt-services/seismic/open-vds/connection.html\n"); options.positional_help(""); std::string headerFormatFileName; std::vector headerFields; - std::string primaryKey = "InlineNumber"; - std::string secondaryKey = "CrosslineNumber"; + std::string primaryKey; + std::string secondaryKey; std::string sampleUnit; std::string crsWkt; double scale = 0; @@ -1457,7 +2473,7 @@ main(int argc, char* argv[]) double sampleStart = 0; bool littleEndian = false; bool scan = false; - std::string fileInfoFileName; + std::vector fileInfoFileNames; int brickSize = 64; int margin = 0; bool force = false; @@ -1472,26 +2488,52 @@ main(int argc, char* argv[]) bool uniqueID = false; bool disablePersistentID = false; bool prestack = false; - bool traceOrderByOffset = true; + bool is2D = false; + bool isOffsetSorted = false; + bool isOffsetSortedDupeKeyWarned = false; bool jsonOutput = false; bool help = false; bool helpConnection = false; bool version = false; + std::string attributeName = AMPLITUDE_ATTRIBUTE_NAME; + std::string attributeUnit; + bool isMutes = false; + bool isAzimuth = false; + SEGY::AzimuthType azimuthType = SEGY::AzimuthType::Azimuth; + SEGY::AzimuthUnits azimuthUnit = SEGY::AzimuthUnits::Degrees; + float azimuthScaleFactor = 1.0f; + std::string azimuthTypeString; + std::string azimuthUnitString; + + TraceSpacingByOffset traceSpacingByOffset = TraceSpacingByOffset::Auto; + std::string traceSpacingByOffsetString; + bool isDisableCrosslineReordering = false; + + PrimaryKeyValue primaryKeyValue = PrimaryKeyValue::InlineNumber; + + const std::string supportedAzimuthTypes("Azimuth (from trace header field) (default), OffsetXY (computed from OffsetX and OffsetY header fields)"); + const std::string supportedAzimuthUnits("Radians, Degrees (default)"); + const std::string supportedTraceSpacingTypes("Off, On, Auto (default)"); + + // default key names used if not supplied by user + std::string defaultPrimaryKey = "InlineNumber"; + std::string defaultSecondaryKey = "CrosslineNumber"; + std::string supportedCompressionMethods = "None"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Wavelet)) supportedCompressionMethods += ", Wavelet"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::RLE)) supportedCompressionMethods += ", RLE"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Zip)) supportedCompressionMethods += ", Zip"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlock)) supportedCompressionMethods += ", WaveletNormalizeBlock"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletLossless)) supportedCompressionMethods += ", WaveletLossless"; - if(OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless)) supportedCompressionMethods += ", WaveletNormalizeBlockLossless"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Wavelet)) supportedCompressionMethods += ", Wavelet"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::RLE)) supportedCompressionMethods += ", RLE"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::Zip)) supportedCompressionMethods += ", Zip"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlock)) supportedCompressionMethods += ", WaveletNormalizeBlock"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletLossless)) supportedCompressionMethods += ", WaveletLossless"; + if (OpenVDS::IsCompressionMethodSupported(OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless)) supportedCompressionMethods += ", WaveletNormalizeBlockLossless"; std::vector fileNames; options.add_option("", "", "header-format", "A JSON file defining the header format for the input SEG-Y file. The expected format is a dictonary of strings (field names) to pairs (byte position, field width) where field width can be \"TwoByte\" or \"FourByte\". Additionally, an \"Endianness\" key can be specified as \"BigEndian\" or \"LittleEndian\".", cxxopts::value(headerFormatFileName), ""); options.add_option("", "", "header-field", "A single definition of a header field. The expected format is a \"fieldname=offset:width\" where the \":width\" is optional. Its also possible to specify range: \"fieldname=begin-end\". Multiple header-fields is specified by providing multiple --header-field arguments.", cxxopts::value>(headerFields), "header_name=offset:width"); - options.add_option("", "p", "primary-key", "The name of the trace header field to use as the primary key.", cxxopts::value(primaryKey)->default_value("Inline"), ""); - options.add_option("", "s", "secondary-key", "The name of the trace header field to use as the secondary key.", cxxopts::value(secondaryKey)->default_value("Crossline"), ""); + options.add_option("", "p", "primary-key", "The name of the trace header field to use as the primary key.", cxxopts::value(primaryKey), ""); + options.add_option("", "s", "secondary-key", "The name of the trace header field to use as the secondary key.", cxxopts::value(secondaryKey), ""); options.add_option("", "", "prestack", "Import binned prestack data (PSTM/PSDM gathers).", cxxopts::value(prestack), ""); options.add_option("", "", "scale", "If a scale override (floating point) is given, it is used to scale the coordinates in the header instead of determining the scale factor from the coordinate scale trace header field.", cxxopts::value(scale), ""); options.add_option("", "", "sample-unit", "A sample unit of 'ms' is used for datasets in the time domain (default), while a sample unit of 'm' or 'ft' is used for datasets in the depth domain", cxxopts::value(sampleUnit), ""); @@ -1499,9 +2541,9 @@ main(int argc, char* argv[]) options.add_option("", "", "crs-wkt", "A coordinate reference system in well-known text format can optionally be provided", cxxopts::value(crsWkt), ""); options.add_option("", "l", "little-endian", "Force little-endian trace headers.", cxxopts::value(littleEndian), ""); options.add_option("", "", "scan", "Generate a JSON file containing information about the input SEG-Y file.", cxxopts::value(scan), ""); - options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about the input SEG-Y file.", cxxopts::value(fileInfoFileName), ""); + options.add_option("", "i", "file-info", "A JSON file (generated by the --scan option) containing information about an input SEG-Y file.", cxxopts::value>(fileInfoFileNames), ""); options.add_option("", "b", "brick-size", "The brick size for the volume data store.", cxxopts::value(brickSize), ""); - options.add_option("", "", "margin", "The margin size (overlap) of the bricks.", cxxopts::value(margin), ""); + options.add_option("", "", "margin", "The margin size (overlap) of the bricks.", cxxopts::value(margin), ""); options.add_option("", "f", "force", "Continue on upload error.", cxxopts::value(force), ""); options.add_option("", "", "ignore-warnings", "Ignore warnings about import parameters.", cxxopts::value(ignoreWarnings), ""); options.add_option("", "", "compression-method", std::string("Compression method. Supported compression methods are: ") + supportedCompressionMethods + ".", cxxopts::value(compressionMethodString), ""); @@ -1515,7 +2557,17 @@ main(int argc, char* argv[]) options.add_option("", "", "uniqueID", "Generate a new globally unique ID when scanning the input SEG-Y file.", cxxopts::value(uniqueID), ""); options.add_option("", "", "disable-persistentID", "Disable the persistentID usage, placing the VDS directly into the url location.", cxxopts::value(disablePersistentID), ""); options.add_option("", "", "json-output", "Enable json output.", cxxopts::value(jsonOutput), ""); - // TODO add option for turning off traceOrderByOffset + options.add_option("", "", "attribute-name", "The name of the primary VDS channel. The name may be Amplitude (default), Attribute, Depth, Probability, Time, Vavg, Vint, or Vrms", cxxopts::value(attributeName)->default_value(AMPLITUDE_ATTRIBUTE_NAME), ""); + options.add_option("", "", "attribute-unit", "The units of the primary VDS channel. The unit name may be blank (default), ft, ft/s, Hz, m, m/s, ms, or s", cxxopts::value(attributeUnit), ""); + options.add_option("", "", "2d", "Import 2D data.", cxxopts::value(is2D), ""); + options.add_option("", "", "offset-sorted", "Import prestack data sorted by trace header Offset value.", cxxopts::value(isOffsetSorted), ""); + options.add_option("", "", "mute", "Enable Mutes channel in output VDS.", cxxopts::value(isMutes), ""); + options.add_option("", "", "azimuth", "Enable Azimuth channel in output VDS.", cxxopts::value(isAzimuth), ""); + options.add_option("", "", "azimuth-type", std::string("Azimuth type. Supported azimuth types are: ") + supportedAzimuthTypes + ".", cxxopts::value(azimuthTypeString), ""); + options.add_option("", "", "azimuth-unit", std::string("Azimuth unit. Supported azimuth units are: ") + supportedAzimuthUnits + ".", cxxopts::value(azimuthUnitString), ""); + options.add_option("", "", "azimuth-scale", "Azimuth scale factor. Trace header field Azimuth values will be multiplied by this factor.", cxxopts::value(azimuthScaleFactor), ""); + options.add_option("", "", "respace-gathers", std::string("Respace traces in prestack gathers by Offset trace header field. Supported options are: ") + supportedTraceSpacingTypes + ".", cxxopts::value(traceSpacingByOffsetString), ""); + options.add_option("", "", "disable-crossline-input-reorder", "Disable VDS chunk reordering for crossline-sorted SEGY. This option will probably reduce performance.", cxxopts::value(isDisableCrosslineReordering), ""); options.add_option("", "h", "help", "Print this help information", cxxopts::value(help), ""); options.add_option("", "H", "help-connection", "Print help information about the connection string", cxxopts::value(helpConnection), ""); @@ -1537,12 +2589,18 @@ main(int argc, char* argv[]) overrideBrickSize = result.count("brick-size") != 0; overrideMargin = result.count("margin") != 0; } - catch (cxxopts::OptionParseException &e) + catch (cxxopts::OptionParseException& e) { OpenVDS::printError(jsonOutput, "Args", e.what()); return EXIT_FAILURE; } + if (!fileInfoFileNames.empty() && fileInfoFileNames.size() != fileNames.size()) + { + OpenVDS::printError(jsonOutput, "Args", "Different number of input SEG-Y file names and file-info file names. Use multiple --file-info arguments to specify a file-info file for each input SEG-Y file."); + return EXIT_FAILURE; + } + if (help) { OpenVDS::printInfo(jsonOutput, "Args", options.help()); @@ -1565,47 +2623,90 @@ main(int argc, char* argv[]) std::transform(compressionMethodString.begin(), compressionMethodString.end(), compressionMethodString.begin(), asciitolower); - if(compressionMethodString.empty()) compressionMethod = OpenVDS::CompressionMethod::None; - else if(compressionMethodString == "none") compressionMethod = OpenVDS::CompressionMethod::None; - else if(compressionMethodString == "wavelet") compressionMethod = OpenVDS::CompressionMethod::Wavelet; - else if(compressionMethodString == "rle") compressionMethod = OpenVDS::CompressionMethod::RLE; - else if(compressionMethodString == "zip") compressionMethod = OpenVDS::CompressionMethod::Zip; - else if(compressionMethodString == "waveletnormalizeblock") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlock; - else if(compressionMethodString == "waveletlossless") compressionMethod = OpenVDS::CompressionMethod::WaveletLossless; - else if(compressionMethodString == "waveletnormalizeblocklossless") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless; + if (compressionMethodString.empty()) compressionMethod = OpenVDS::CompressionMethod::None; + else if (compressionMethodString == "none") compressionMethod = OpenVDS::CompressionMethod::None; + else if (compressionMethodString == "wavelet") compressionMethod = OpenVDS::CompressionMethod::Wavelet; + else if (compressionMethodString == "rle") compressionMethod = OpenVDS::CompressionMethod::RLE; + else if (compressionMethodString == "zip") compressionMethod = OpenVDS::CompressionMethod::Zip; + else if (compressionMethodString == "waveletnormalizeblock") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlock; + else if (compressionMethodString == "waveletlossless") compressionMethod = OpenVDS::CompressionMethod::WaveletLossless; + else if (compressionMethodString == "waveletnormalizeblocklossless") compressionMethod = OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless; else { OpenVDS::printError(jsonOutput, "CompressionMethod", "Unknown compression method", compressionMethodString); return EXIT_FAILURE; } - if(!OpenVDS::IsCompressionMethodSupported(compressionMethod)) + if (!OpenVDS::IsCompressionMethodSupported(compressionMethod)) { OpenVDS::printError(jsonOutput, "CompressionMethod", "Unsupported compression method", compressionMethodString); return EXIT_FAILURE; } - if(compressionMethod == OpenVDS::CompressionMethod::Wavelet || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlock || compressionMethod == OpenVDS::CompressionMethod::WaveletLossless || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless) + if (compressionMethod == OpenVDS::CompressionMethod::Wavelet || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlock || compressionMethod == OpenVDS::CompressionMethod::WaveletLossless || compressionMethod == OpenVDS::CompressionMethod::WaveletNormalizeBlockLossless) { - if(!overrideBrickSize) + if (!overrideBrickSize) { brickSize = 128; } - if(!overrideMargin) + if (!overrideMargin) { margin = 4; } } + if (is2D) + { + defaultSecondaryKey = "EnsembleNumber"; + } + + if (primaryKey.empty()) + { + primaryKey = defaultPrimaryKey; + } + if (secondaryKey.empty()) + { + secondaryKey = defaultSecondaryKey; + } + // get the canonical field name for the primary and secondary key ResolveAlias(primaryKey); ResolveAlias(secondaryKey); + // set the enum value for primary key + if (primaryKey == "inlinenumber") + { + primaryKeyValue = PrimaryKeyValue::InlineNumber; + } + else if (primaryKey == "crosslinenumber") + { + primaryKeyValue = PrimaryKeyValue::CrosslineNumber; + } + else + { + primaryKeyValue = PrimaryKeyValue::Other; + } + SEGY::SEGYType segyType = SEGY::SEGYType::Poststack; - if (primaryKey == "inlinenumber" || primaryKey == "crosslinenumber" ) + if (isOffsetSorted) + { + segyType = SEGY::SEGYType::PrestackOffsetSorted; + } + else if (is2D) + { + if (prestack) + { + segyType = SEGY::SEGYType::Prestack2D; + } + else + { + segyType = SEGY::SEGYType::Poststack2D; + } + } + else if (primaryKey == "inlinenumber" || primaryKey == "crosslinenumber") { - if(prestack) + if (prestack) { segyType = SEGY::SEGYType::Prestack; } @@ -1648,13 +2749,13 @@ main(int argc, char* argv[]) if (singleConnection) inputConnection = urlConnection; - if(uniqueID && !persistentID.empty()) + if (uniqueID && !persistentID.empty()) { OpenVDS::printError(jsonOutput, "Args", "--uniqueID does not make sense when the persistentID is specified"); return EXIT_FAILURE; } - - if(disablePersistentID && !persistentID.empty()) + + if (disablePersistentID && !persistentID.empty()) { OpenVDS::printError(jsonOutput, "Args", "--disable-PersistentID does not make sense when the persistentID is specified"); return EXIT_FAILURE; @@ -1692,6 +2793,64 @@ main(int argc, char* argv[]) } } + if (!azimuthTypeString.empty()) + { + std::transform(azimuthTypeString.begin(), azimuthTypeString.end(), azimuthTypeString.begin(), asciitolower); + if (azimuthTypeString == "azimuth") + { + azimuthType = SEGY::AzimuthType::Azimuth; + } + else if (azimuthTypeString == "offsetxy") + { + azimuthType = SEGY::AzimuthType::OffsetXY; + } + else + { + OpenVDS::printError(jsonOutput, "Args", fmt::format("Unknown Azimuth type '{}'", azimuthTypeString)); + return EXIT_FAILURE; + } + } + + if (!azimuthUnitString.empty()) + { + std::transform(azimuthUnitString.begin(), azimuthUnitString.end(), azimuthUnitString.begin(), asciitolower); + if (azimuthUnitString == "radians") + { + azimuthUnit = SEGY::AzimuthUnits::Radians; + } + else if (azimuthUnitString == "degrees") + { + azimuthUnit = SEGY::AzimuthUnits::Degrees; + } + else + { + OpenVDS::printError(jsonOutput, "Args", fmt::format("Unknown Azimuth unit '{}'", azimuthUnitString)); + return EXIT_FAILURE; + } + } + + if (!traceSpacingByOffsetString.empty()) + { + std::transform(traceSpacingByOffsetString.begin(), traceSpacingByOffsetString.end(), traceSpacingByOffsetString.begin(), asciitolower); + if (traceSpacingByOffsetString == "off") + { + traceSpacingByOffset = TraceSpacingByOffset::Off; + } + else if (traceSpacingByOffsetString == "on") + { + traceSpacingByOffset = TraceSpacingByOffset::On; + } + else if (traceSpacingByOffsetString == "auto") + { + traceSpacingByOffset = TraceSpacingByOffset::Auto; + } + else + { + OpenVDS::printError(jsonOutput, "Args", fmt::format("Unknown --respace-gathers option '{}'", traceSpacingByOffsetString)); + return EXIT_FAILURE; + } + } + SEGY::HeaderField primaryKeyHeaderField, secondaryKeyHeaderField; @@ -1712,10 +2871,8 @@ main(int argc, char* argv[]) } SEGY::HeaderField - startTimeHeaderField = g_traceHeaderFields["starttime"]; - - SEGYBinInfoHeaderFields - binInfoHeaderFields(g_traceHeaderFields[primaryKey], g_traceHeaderFields[secondaryKey], g_traceHeaderFields["coordinatescale"], g_traceHeaderFields["ensemblexcoordinate"], g_traceHeaderFields["ensembleycoordinate"], scale); + startTimeHeaderField = g_traceHeaderFields["starttime"], + offsetHeaderField = g_traceHeaderFields["offset"]; OpenVDS::Error error; @@ -1732,15 +2889,23 @@ main(int argc, char* argv[]) fileInfo(headerEndianness); fileInfo.m_segyType = segyType; + const auto + binInfoCrosslineFieldIndex = fileInfo.Is2D() ? secondaryKey.c_str() : "crosslinenumber", // for 2D need to borrow the BinInfo crossline field for ensemble number + xcoordHeaderFieldIndex = fileInfo.Is2D() ? "sourcexcoordinate" : "ensemblexcoordinate", + ycoordHeaderFieldIndex = fileInfo.Is2D() ? "sourceycoordinate" : "ensembleycoordinate"; + + SEGYBinInfoHeaderFields + binInfoHeaderFields(g_traceHeaderFields["inlinenumber"], g_traceHeaderFields[binInfoCrosslineFieldIndex], g_traceHeaderFields["coordinatescale"], g_traceHeaderFields[xcoordHeaderFieldIndex], g_traceHeaderFields[ycoordHeaderFieldIndex], scale); + // Scan the file if '--scan' was passed or we're uploading but no fileInfo file was specified - if(scan || fileInfoFileName.empty()) + if (scan || fileInfoFileNames.empty()) { - if(!uniqueID) + if (!uniqueID) { OpenVDS::HashCombiner hash; - for(std::string const &fileName : fileNames) + for (std::string const& fileName : fileNames) { hash.Add(fileName); } @@ -1748,7 +2913,7 @@ main(int argc, char* argv[]) fileInfo.m_persistentID = OpenVDS::HashCombiner(hash); } - bool success = fileInfo.Scan(dataProviders, error, primaryKeyHeaderField, secondaryKeyHeaderField, startTimeHeaderField, binInfoHeaderFields); + bool success = fileInfo.Scan(dataProviders, error, primaryKeyHeaderField, secondaryKeyHeaderField, startTimeHeaderField, offsetHeaderField, binInfoHeaderFields); if (!success) { @@ -1756,106 +2921,115 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } - if(overrideSampleStart) + if (overrideSampleStart) { fileInfo.m_startTimeMilliseconds = sampleStart; } // If we are in scan mode we serialize the result of the file scan either to a fileInfo file (if specified) or to stdout and exit - if(scan) + if (scan) { - // TODO if we have multiple input files we need to serialize multiple scan files - Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, 0); - - Json::StreamWriterBuilder wbuilder; - wbuilder["indentation"] = " "; - std::string document = Json::writeString(wbuilder, jsonFileInfo); - - if (fileInfoFileName.empty()) - { - fmt::print(stdout, "{}", document); - } - else + for (size_t fileIndex = 0; fileIndex < fileNames.size(); ++fileIndex) { - OpenVDS::Error - error; + Json::Value jsonFileInfo = SerializeSEGYFileInfo(fileInfo, fileIndex); + + Json::StreamWriterBuilder wbuilder; + wbuilder["indentation"] = " "; + std::string document = Json::writeString(wbuilder, jsonFileInfo); - if (OpenVDS::IsSupportedProtocol(fileInfoFileName)) + if (fileInfoFileNames.empty()) { - std::string dirname; - std::string basename; - std::string parameters; - splitUrl(fileInfoFileName, dirname, basename, parameters, error); - if (error.code) - { - OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); - return EXIT_FAILURE; - } - std::string scanUrl = dirname + parameters; - std::unique_ptr ioManager(OpenVDS::IOManager::CreateIOManager(scanUrl, urlConnection, OpenVDS::IOManager::ReadWrite, error)); - if (error.code) - { - OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); - return EXIT_FAILURE; - } - auto shared_data = std::make_shared>(); - shared_data->insert(shared_data->end(), document.begin(), document.end()); - auto req = ioManager->WriteObject(basename, "", "text/plain", {}, shared_data, {}); - req->WaitForFinish(error); - if (error.code) - { - OpenVDS::printError(jsonOutput, "IO", "Failed to write", fileInfoFileName, error.string); - return EXIT_FAILURE; - } + fmt::print(stdout, "{}", document); } else { - OpenVDS::File - fileInfoFile; - - fileInfoFile.Open(fileInfoFileName.c_str(), true, false, true, error); + OpenVDS::Error + error; + const auto + & fileInfoFileName = fileInfoFileNames[fileIndex]; - if (error.code != 0) + if (OpenVDS::IsSupportedProtocol(fileInfoFileName)) { - OpenVDS::printError(jsonOutput, "IO", "Could not create file info file", fileInfoFileName); - return EXIT_FAILURE; + std::string dirname; + std::string basename; + std::string parameters; + splitUrl(fileInfoFileName, dirname, basename, parameters, error); + if (error.code) + { + OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); + return EXIT_FAILURE; + } + std::string scanUrl = dirname + parameters; + std::unique_ptr ioManager(OpenVDS::IOManager::CreateIOManager(scanUrl, urlConnection, OpenVDS::IOManager::ReadWrite, error)); + if (error.code) + { + OpenVDS::printError(jsonOutput, "IO", "Failed to creating IOManager for", fileInfoFileName, error.string); + return EXIT_FAILURE; + } + auto shared_data = std::make_shared>(); + shared_data->insert(shared_data->end(), document.begin(), document.end()); + auto req = ioManager->WriteObject(basename, "", "text/plain", {}, shared_data, {}); + req->WaitForFinish(error); + if (error.code) + { + OpenVDS::printError(jsonOutput, "IO", "Failed to write", fileInfoFileName, error.string); + return EXIT_FAILURE; + } } + else + { + OpenVDS::File + fileInfoFile; - fileInfoFile.Write(document.data(), 0, (int32_t)document.size(), error); + fileInfoFile.Open(fileInfoFileName, true, false, true, error); - if (error.code != 0) - { - OpenVDS::printError(jsonOutput, "IO", "Could not write file info to file", fileInfoFileName); - return EXIT_FAILURE; + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "IO", "Could not create file info file", fileInfoFileName); + return EXIT_FAILURE; + } + + fileInfoFile.Write(document.data(), 0, (int32_t)document.size(), error); + + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "IO", "Could not write file info to file", fileInfoFileName); + return EXIT_FAILURE; + } } } - } return EXIT_SUCCESS; } } - else if (!fileInfoFileName.empty()) + else if (!fileInfoFileNames.empty()) { OpenVDS::Error error; - DataProvider fileInfoDataProvider = CreateDataProvider(fileInfoFileName, inputConnection, error); - - if (error.code != 0) + for (size_t fileIndex = 0; fileIndex < fileInfoFileNames.size(); ++fileIndex) { - OpenVDS::printError(jsonOutput, "IO", "Could not create data provider for", fileInfoFileName, error.string); - return EXIT_FAILURE; - } + const auto + & fileInfoFileName = fileInfoFileNames[fileIndex]; - bool success = parseSEGYFileInfoFile(fileInfoDataProvider, fileInfo, error); + DataProvider fileInfoDataProvider = CreateDataProvider(fileInfoFileName, inputConnection, error); - if (!success) - { - OpenVDS::printError(jsonOutput, "FileInfo", "Parse SEGYFileInfo", fileInfoFileName, error.string); - return EXIT_FAILURE; + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "IO", "Could not create data provider for", fileInfoFileName, error.string); + return EXIT_FAILURE; + } + + bool success = parseSEGYFileInfoFile(fileInfoDataProvider, fileInfo, fileIndex, fileIndex != 0, error); + + if (!success) + { + OpenVDS::printError(jsonOutput, "FileInfo", "Parse SEGYFileInfo", fileInfoFileName, error.string); + return EXIT_FAILURE; + } } - if(overrideSampleStart) + if (overrideSampleStart) { fileInfo.m_startTimeMilliseconds = sampleStart; } @@ -1873,24 +3047,51 @@ main(int argc, char* argv[]) // Check for only a single segment - if(fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1) + const bool isOneSegment = fileInfo.IsOffsetSorted() + ? fileInfo.m_segmentInfoListsByOffset.size() == 1 && fileInfo.m_segmentInfoListsByOffset[0].size() == 1 + : fileInfo.m_segmentInfoLists.size() == 1 && fileInfo.m_segmentInfoLists[0].size() == 1; + + if (!is2D && isOneSegment) { - OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SegmentInfoList", "Warning: There is only one segment, either this is (as of now unsupported) 2D data or this usually indicates using the wrong header format for the input dataset.", "Use --ignore-warnings to force the import to go ahead."); + OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SegmentInfoList", "Warning: There is only one segment, either this is 2D data or this usually indicates using the wrong header format for the input dataset.", "Use --2d for 2D data. Use --ignore-warnings to force the import to go ahead."); } // Determine value range, fold and primary/secondary step OpenVDS::FloatRange valueRange; int fold = 1, primaryStep = 1, secondaryStep = 1; - int offsetStart, offsetEnd, offsetStep; + std::vector gatherOffsetValues; const float valueRangePercentile = 99.5f; // 99.5f is the same default as Petrel uses. - int fileIndex; - auto representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); - analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, offsetStart, offsetEnd, offsetStep, jsonOutput, error); + // Create the appropriate TraceInfo2DManager here so that it can be called during AnalzyeSegment. + // + // Note: Piggybacking the gathering of 2D trace info onto AnalyzeSegment is possible because + // a) 2D SEGY has exactly one segment, and b) AnalyzeSegment reads every trace in the segment its + // analyzing. If (b) is no longer true because we've changed the analysis strategy then something + // will need to take the place of this piggybacking. + // + auto traceInfo2DManager = fileInfo.Is2D() + ? std::unique_ptr(new TraceInfo2DManagerImpl(fileInfo.m_headerEndianness, scale, g_traceHeaderFields["coordinatescale"], g_traceHeaderFields["sourcexcoordinate"], g_traceHeaderFields["sourceycoordinate"], g_traceHeaderFields["starttime"], g_traceHeaderFields["energysourcepointnumber"], g_traceHeaderFields["ensemblenumber"])) + : std::unique_ptr(new TraceInfo2DManager()); - if (error.code != 0) + bool + analyzeResult = false; + if (fileInfo.IsOffsetSorted()) + { + const auto + primaryKey = findRepresentativePrimaryKey(fileInfo, primaryStep); + analyzeResult = analyzePrimaryKey(dataProviders, fileInfo, primaryKey, valueRangePercentile, valueRange, fold, secondaryStep, gatherOffsetValues, jsonOutput, error); + } + else + { + int fileIndex; + const auto& representativeSegment = findRepresentativeSegment(fileInfo, primaryStep, fileIndex); + assert(fileIndex < dataProviders.size()); + analyzeResult = analyzeSegment(dataProviders[fileIndex], fileInfo, representativeSegment, valueRangePercentile, valueRange, fold, secondaryStep, segyType, traceInfo2DManager.get(), traceSpacingByOffset, gatherOffsetValues, jsonOutput, error); + } + + if (!analyzeResult || error.code != 0) { OpenVDS::printError(jsonOutput, "SEGY", error.string); return EXIT_FAILURE; @@ -1904,7 +3105,7 @@ main(int argc, char* argv[]) } else if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Poststack2D) { - if(fold > 1) + if (fold > 1) { OpenVDS::printError(jsonOutput, "SEGY", fmt::format("Detected a fold of '{0}', this usually indicates using the wrong header format or primary key for the input dataset or that the input data is binned prestack data (PSTM/PSDM gathers) in which case the --prestack option should be used.", fold)); return EXIT_FAILURE; @@ -1946,15 +3147,15 @@ main(int argc, char* argv[]) SEGY::SampleUnits sampleUnits; - if(sampleUnit.empty() || sampleUnit == "ms" || sampleUnit == "millisecond" || sampleUnit == "milliseconds") + if (sampleUnit.empty() || sampleUnit == "ms" || sampleUnit == "millisecond" || sampleUnit == "milliseconds") { sampleUnits = SEGY::SampleUnits::Milliseconds; } - else if(sampleUnit == "m" || sampleUnit == "meter" || sampleUnit == "meters") + else if (sampleUnit == "m" || sampleUnit == "meter" || sampleUnit == "meters") { sampleUnits = SEGY::SampleUnits::Meters; } - else if(sampleUnit == "ft" || sampleUnit == "foot" || sampleUnit == "feet") + else if (sampleUnit == "ft" || sampleUnit == "foot" || sampleUnit == "feet") { sampleUnits = SEGY::SampleUnits::Feet; } @@ -1964,6 +3165,28 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } + if (!(attributeName == DEFAULT_ATTRIBUTE_NAME || attributeName == AMPLITUDE_ATTRIBUTE_NAME || attributeName == DEPTH_ATTRIBUTE_NAME + || attributeName == PROBABILITY_ATTRIBUTE_NAME || attributeName == TIME_ATTRIBUTE_NAME || attributeName == AVERAGE_VELOCITY_ATTRIBUTE_NAME + || attributeName == INTERVAL_VELOCITY_ATTRIBUTE_NAME || attributeName == RMS_VELOCITY_ATTRIBUTE_NAME)) + { + const auto msg = fmt::format("Unknown attribute name: {}, legal names are '{}', '{}', '{}', '{}', '{}', '{}', '{}', or '{}'\n", + attributeName, DEFAULT_ATTRIBUTE_NAME, AMPLITUDE_ATTRIBUTE_NAME, DEPTH_ATTRIBUTE_NAME, PROBABILITY_ATTRIBUTE_NAME, TIME_ATTRIBUTE_NAME, AVERAGE_VELOCITY_ATTRIBUTE_NAME, + INTERVAL_VELOCITY_ATTRIBUTE_NAME, RMS_VELOCITY_ATTRIBUTE_NAME); + OpenVDS::printError(jsonOutput, "Args", msg); + return EXIT_FAILURE; + } + + if (!(attributeUnit.empty() || attributeUnit == KNOWNMETADATA_UNIT_FOOT || attributeUnit == KNOWNMETADATA_UNIT_FEET_PER_SECOND + || attributeUnit == "Hz" || attributeUnit == KNOWNMETADATA_UNIT_METER || attributeUnit == KNOWNMETADATA_UNIT_METERS_PER_SECOND + || attributeUnit == KNOWNMETADATA_UNIT_MILLISECOND || attributeUnit == KNOWNMETADATA_UNIT_SECOND)) + { + const auto msg = fmt::format("Unknown attribute unit: {}, legal units are blank (no units), '{}', '{}', '{}', '{}', '{}', '{}', or '{}'\n", + attributeUnit, KNOWNMETADATA_UNIT_FOOT, KNOWNMETADATA_UNIT_FEET_PER_SECOND, "Hz", KNOWNMETADATA_UNIT_METER, KNOWNMETADATA_UNIT_METERS_PER_SECOND, + KNOWNMETADATA_UNIT_MILLISECOND, KNOWNMETADATA_UNIT_SECOND); + OpenVDS::printError(jsonOutput, "Args", msg); + return EXIT_FAILURE; + } + // Create axis descriptors std::vector axisDescriptors = createAxisDescriptors(fileInfo, sampleUnits, fold, primaryStep, secondaryStep); @@ -1987,9 +3210,6 @@ main(int argc, char* argv[]) OpenVDS::printWarning_with_condition_fatal(jsonOutput, !ignoreWarnings, "SEGY", msg, fatal_msg); } - // Create channel descriptors - std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, OffsetChannelInfo(fileInfo.HasGatherOffset(), offsetStart, offsetEnd, offsetStep)); - // Create metadata OpenVDS::MetadataContainer metadataContainer; @@ -2003,10 +3223,10 @@ main(int argc, char* argv[]) } SEGY::BinaryHeader::MeasurementSystem - measurementSystem; + segyMeasurementSystem; // get SEGY metadata from first file - createSEGYMetadata(dataProviders[0], fileInfo, metadataContainer, measurementSystem, error); + createSEGYMetadata(dataProviders[0], fileInfo, metadataContainer, segyMeasurementSystem, error); if (error.code != 0) { @@ -2014,9 +3234,35 @@ main(int argc, char* argv[]) return EXIT_FAILURE; } - if (segyType == SEGY::SEGYType::Poststack || segyType == SEGY::SEGYType::Prestack) + int offsetStart = 0, offsetEnd = 0; + + if (!gatherOffsetValues.empty()) + { + // use minmax_element because the offset values are not necessarily sorted (and use 1 for the offset step because the values aren't necessarily spaced consistently) + const auto + minMax = std::minmax_element(gatherOffsetValues.begin(), gatherOffsetValues.end()); + offsetStart = *minMax.first, + offsetEnd = *minMax.second; + } + + OffsetChannelInfo + offsetInfo(fileInfo.HasGatherOffset(), ConvertDistance(segyMeasurementSystem, static_cast(offsetStart)), ConvertDistance(segyMeasurementSystem, static_cast(offsetEnd)), ConvertDistance(segyMeasurementSystem, 1.0f)); + + // Create channel descriptors + std::vector channelDescriptors = createChannelDescriptors(fileInfo, valueRange, offsetInfo, attributeName, attributeUnit, isAzimuth, isMutes); + + if (primaryKeyValue == PrimaryKeyValue::InlineNumber || primaryKeyValue == PrimaryKeyValue::CrosslineNumber) { - createSurveyCoordinateSystemMetadata(fileInfo, measurementSystem, crsWkt, metadataContainer); + // only create the lattice metadata if the primary key is Inline or Crossline, else we may not have Inline/Crossline bin data + createSurveyCoordinateSystemMetadata(fileInfo, segyMeasurementSystem, crsWkt, metadataContainer, primaryKeyValue); + } + + create2DTraceInformationMetadata(is2D, traceInfo2DManager.get(), metadataContainer, error); + + if (error.code != 0) + { + OpenVDS::printError(jsonOutput, "Metadata", error.string); + return EXIT_FAILURE; } OpenVDS::Error createError; @@ -2059,7 +3305,7 @@ main(int argc, char* argv[]) OpenVDS::DimensionsND writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_012; - if (IsSEGYTypeUnbinned(segyType)) + if (IsSEGYTypeUnbinned(segyType) || segyType == SEGY::SEGYType::Poststack2D) { writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_01; } @@ -2067,11 +3313,49 @@ main(int argc, char* argv[]) { writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_013; } + else if (fileInfo.IsOffsetSorted()) + { + writeDimensionGroup = OpenVDS::DimensionsND::Dimensions_023; + } + + int offsetChannelIndex = 0, azimuthChannelIndex = 0, muteChannelIndex = 0; + + if (fileInfo.HasGatherOffset()) + { + offsetChannelIndex = findChannelDescriptorIndex("Offset", channelDescriptors); + if (offsetChannelIndex < 0) + { + OpenVDS::printError(jsonOutput, "VDS", "Could not find VDS channel descriptor for Offset"); + return EXIT_FAILURE; + } + } + + if (isAzimuth) + { + azimuthChannelIndex = findChannelDescriptorIndex("Azimuth", channelDescriptors); + if (azimuthChannelIndex < 0) + { + OpenVDS::printError(jsonOutput, "VDS", "Could not find VDS channel descriptor for Azimuth"); + return EXIT_FAILURE; + } + } + + if (isMutes) + { + muteChannelIndex = findChannelDescriptorIndex("Mute", channelDescriptors); + if (muteChannelIndex < 0) + { + OpenVDS::printError(jsonOutput, "VDS", "Could not find VDS channel descriptor for Mute"); + return EXIT_FAILURE; + } + } auto amplitudeAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 0, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); auto traceFlagAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 1, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); auto segyTraceHeaderAccessor = accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 2, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create); - auto offsetAccessor = fileInfo.HasGatherOffset() ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, 3, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; + auto offsetAccessor = fileInfo.HasGatherOffset() ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, offsetChannelIndex, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; + auto azimuthAccessor = isAzimuth ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, azimuthChannelIndex, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; + auto muteAccessor = isMutes ? accessManager.CreateVolumeDataPageAccessor(writeDimensionGroup, 0, muteChannelIndex, 8, OpenVDS::VolumeDataAccessManager::AccessMode_Create) : nullptr; int64_t traceByteSize = fileInfo.TraceByteSize(); @@ -2095,6 +3379,13 @@ main(int argc, char* argv[]) lowerUpperSegmentIndices; }; + // chunk information for iterating crossline-sorted data in input order (instead of default chunk order) + std::array + dimChunkCounts = { 0, 0, 0, 0 }, + dimChunkPitches = { 1, 1, 1, 1 }; + const auto + dimensionality = axisDescriptors.size(); + // limit DataViewManager's memory use to 1.5 sets of brick inlines const int64_t dvmMemoryLimit = 3LL * (writeDimensionGroup == OpenVDS::DimensionsND::Dimensions_01 ? 1 : brickSize) * axisDescriptors[1].GetNumSamples() * fileInfo.TraceByteSize() / 2LL; @@ -2106,12 +3397,18 @@ main(int argc, char* argv[]) const auto perFileMemoryLimit = dvmMemoryLimit / dataProviders.size(); - for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) + assert(dataProviders.size() == (fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size())); + + for (size_t fileIndex = 0; fileIndex < dataProviders.size(); ++fileIndex) { dataViewManagers.emplace_back(std::make_shared(dataProviders[fileIndex], perFileMemoryLimit)); traceDataManagers.emplace_back(dataViewManagers.back(), 128, traceByteSize, fileInfo.m_traceCounts[fileIndex]); } + const int + primaryKeyDimension = PrimaryKeyDimension(fileInfo, primaryKeyValue), + secondaryKeyDimension = SecondaryKeyDimension(fileInfo, primaryKeyValue); + std::vector chunkInfos; chunkInfos.resize(amplitudeAccessor->GetChunkCount()); std::vector dataRequests; @@ -2121,21 +3418,69 @@ main(int argc, char* argv[]) auto &chunkInfo = chunkInfos[chunk]; amplitudeAccessor->GetChunkMinMax(chunk, chunkInfo.min, chunkInfo.max); + if (primaryKeyValue == PrimaryKeyValue::CrosslineNumber) + { + for (size_t i = 0; i < dimChunkCounts.size(); ++i) + { + bool isZero = true; + for (size_t j = 0; isZero && j < dimChunkCounts.size(); ++j) + { + if (j != i) + { + isZero = chunkInfo.min[j] == 0; + } + } + if (isZero) + { + ++dimChunkCounts[i]; + } + } + } + chunkInfo.sampleStart = chunkInfo.min[0]; chunkInfo.sampleCount = chunkInfo.max[0] - chunkInfo.min[0]; - chunkInfo.secondaryKeyStart = (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[SecondaryKeyDimension(fileInfo)]) + 0.5f); - chunkInfo.secondaryKeyStop = (int)floorf(layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[SecondaryKeyDimension(fileInfo)] - 1) + 0.5f); + OpenVDS::Error traceIndexError; + + chunkInfo.secondaryKeyStart = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.min[secondaryKeyDimension], traceIndexError) : (int)floorf(layout->GetAxisDescriptor(secondaryKeyDimension).SampleIndexToCoordinate(chunkInfo.min[secondaryKeyDimension]) + 0.5f); + chunkInfo.secondaryKeyStop = fileInfo.Is2D() ? TraceIndex2DToEnsembleNumber(traceInfo2DManager.get(), chunkInfo.max[secondaryKeyDimension] - 1, traceIndexError) : (int)floorf(layout->GetAxisDescriptor(secondaryKeyDimension).SampleIndexToCoordinate(chunkInfo.max[secondaryKeyDimension] - 1) + 0.5f); + + if (fileInfo.Is2D() && traceIndexError.code != 0) + { + OpenVDS::printWarning(jsonOutput, "2DEnsembleIndex", "Could not translate trace index to ensemble number", fmt::format("{}", error.code), error.string); + break; + } - chunkInfo.primaryKeyStart = (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.min[PrimaryKeyDimension(fileInfo)]) + 0.5f); - chunkInfo.primaryKeyStop = (int)floorf(layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).SampleIndexToCoordinate(chunkInfo.max[PrimaryKeyDimension(fileInfo)] - 1) + 0.5f); + chunkInfo.primaryKeyStart = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(primaryKeyDimension).SampleIndexToCoordinate(chunkInfo.min[primaryKeyDimension]) + 0.5f); + chunkInfo.primaryKeyStop = fileInfo.Is2D() ? 0 : (int)floorf(layout->GetAxisDescriptor(primaryKeyDimension).SampleIndexToCoordinate(chunkInfo.max[primaryKeyDimension] - 1) + 0.5f); // For each input file, find the lower/upper segments and then add data requests to that file's traceDataManager - for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) + const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); + + for (size_t fileIndex = 0; fileIndex < segmentInfoListsSize; ++fileIndex) { + assert(fileInfo.IsOffsetSorted() ? chunkInfo.min[1] < gatherOffsetValues.size() : true); + + const int + offsetSortedOffsetValue = fileInfo.IsOffsetSorted() ? gatherOffsetValues[chunkInfo.min[1]] : 0; + + if (fileInfo.IsOffsetSorted()) + { + // for offset sorted chunks should only have a single offset (i.e. dim 02 or 023) + assert(chunkInfo.min[1] + 1 == chunkInfo.max[1]); + + // If the calculated offset value isn't present in this file's map then skip + if (fileInfo.m_segmentInfoListsByOffset[fileIndex].find(offsetSortedOffsetValue) == fileInfo.m_segmentInfoListsByOffset[fileIndex].end()) + { + continue; + } + } + auto& - segmentInfoList = fileInfo.m_segmentInfoLists[fileIndex]; + segmentInfoList = fileInfo.IsOffsetSorted() + ? fileInfo.m_segmentInfoListsByOffset[fileIndex][offsetSortedOffsetValue] + : fileInfo.m_segmentInfoLists[fileIndex]; // does this file have any segments in the primary key range? bool hasSegments; @@ -2154,7 +3499,13 @@ main(int argc, char* argv[]) lower, upper; - if (fileInfo.IsUnbinned()) + if (fileInfo.Is2D()) + { + // Hopefully 2D files always a single line + lower = segmentInfoList.begin(); + upper = segmentInfoList.end(); + } + else if (fileInfo.IsUnbinned()) { // For unbinned data we set lower and upper based on the 1-based indices instead of searching on the segment primary keys. // When we implement raw gathers mode we'll need to modify this. @@ -2179,9 +3530,14 @@ main(int argc, char* argv[]) } } - for (int64_t chunk = 0; chunk < amplitudeAccessor->GetChunkCount() && error.code == 0; chunk++) + for (size_t i = 1; i < dimChunkCounts.size(); ++i) + { + dimChunkPitches[i] = dimChunkPitches[i - 1] * dimChunkCounts[i - 1]; + } + + for (int64_t chunkSequence = 0; chunkSequence < amplitudeAccessor->GetChunkCount() && error.code == 0; chunkSequence++) { - int new_percentage = int(double(chunk) / amplitudeAccessor->GetChunkCount() * 100); + int new_percentage = int(double(chunkSequence) / amplitudeAccessor->GetChunkCount() * 100); if (!jsonOutput && is_tty && percentage != new_percentage) { percentage = new_percentage; @@ -2202,9 +3558,43 @@ main(int argc, char* argv[]) } } + auto + chunk = chunkSequence; + + if (primaryKeyValue == PrimaryKeyValue::CrosslineNumber && (dimensionality == 3 || dimensionality == 4) && !isDisableCrosslineReordering) + { + // recompute the chunk number for the crossline-sorted chunk corresponding to the current sequence number + + std::array chunkCoords = { 0, 0, 0, 0 }; + if (dimensionality == 3) + { + auto dv = std::div(chunkSequence, dimChunkCounts[0]); + chunkCoords[0] = dv.rem; + dv = std::div(dv.quot, dimChunkCounts[2]); // 2 not 1 because we're working in crossline-major space + chunkCoords[1] = dv.quot; + chunkCoords[2] = dv.rem; + } + else + { + auto dv = std::div(chunkSequence, dimChunkCounts[0]); + chunkCoords[0] = dv.rem; + dv = std::div(dv.quot, dimChunkCounts[1]); + chunkCoords[1] = dv.rem; + dv = std::div(dv.quot, dimChunkCounts[3]); // 3 not 2 because we're working in crossline-major space + chunkCoords[2] = dv.quot; + chunkCoords[3] = dv.rem; + } + + chunk = 0; + for (size_t i = 0; i < chunkCoords.size(); ++i) + { + chunk += chunkCoords[i] * dimChunkPitches[i]; + } + } + auto &chunkInfo = chunkInfos[chunk]; - // if we've crossed to a new inline then trim the trace page cache + // if we've crossed to a new primary key range then trim the trace page cache if (chunk > 0) { const auto& previousChunkInfo = chunkInfos[chunk - 1]; @@ -2217,12 +3607,12 @@ main(int argc, char* argv[]) auto currentIndexIter = chunkInfo.lowerUpperSegmentIndices.find(chunkFileIndex); if (currentIndexIter != chunkInfo.lowerUpperSegmentIndices.end()) { - // This file is active in both the current and previous chunks. Check to see if we've progressed to a new set of inlines. + // This file is active in both the current and previous chunks. Check to see if we've progressed to a new set of primary keys. auto previousLowerSegmentIndex = std::get<0>(prevIndexIter->second); auto currentLowerSegmentIndex = std::get<0>(currentIndexIter->second); if (currentLowerSegmentIndex > previousLowerSegmentIndex) { - // we've progressed to a new set of inlines; remove earlier pages from the cache + // we've progressed to a new set of primary keys; remove earlier pages from the cache traceDataManagers[chunkFileIndex].retirePagesBefore(fileInfo.m_segmentInfoLists[chunkFileIndex][currentLowerSegmentIndex].m_traceStart); } } @@ -2241,6 +3631,8 @@ main(int argc, char* argv[]) OpenVDS::VolumeDataPage* traceFlagPage = nullptr; OpenVDS::VolumeDataPage* segyTraceHeaderPage = nullptr; OpenVDS::VolumeDataPage* offsetPage = nullptr; + OpenVDS::VolumeDataPage* azimuthPage = nullptr; + OpenVDS::VolumeDataPage* mutePage = nullptr; if (chunkInfo.min[0] == 0) { @@ -2250,24 +3642,40 @@ main(int argc, char* argv[]) { offsetPage = offsetAccessor->CreatePage(offsetAccessor->GetMappedChunkIndex(chunk)); } + if (azimuthAccessor != nullptr) + { + azimuthPage = azimuthAccessor->CreatePage(azimuthAccessor->GetMappedChunkIndex(chunk)); + } + if (muteAccessor != nullptr) + { + mutePage = muteAccessor->CreatePage(muteAccessor->GetMappedChunkIndex(chunk)); + } } int amplitudePitch[OpenVDS::Dimensionality_Max]; int traceFlagPitch[OpenVDS::Dimensionality_Max]; int segyTraceHeaderPitch[OpenVDS::Dimensionality_Max]; int offsetPitch[OpenVDS::Dimensionality_Max]; + int azimuthPitch[OpenVDS::Dimensionality_Max]; + int mutePitch[OpenVDS::Dimensionality_Max]; void* amplitudeBuffer = amplitudePage->GetWritableBuffer(amplitudePitch); void* traceFlagBuffer = traceFlagPage ? traceFlagPage->GetWritableBuffer(traceFlagPitch) : nullptr; void* segyTraceHeaderBuffer = segyTraceHeaderPage ? segyTraceHeaderPage->GetWritableBuffer(segyTraceHeaderPitch) : nullptr; void* offsetBuffer = offsetPage ? offsetPage->GetWritableBuffer(offsetPitch) : nullptr; + void* azimuthBuffer = azimuthPage ? azimuthPage->GetWritableBuffer(azimuthPitch) : nullptr; + void* muteBuffer = mutePage ? mutePage->GetWritableBuffer(mutePitch) : nullptr; assert(amplitudePitch[0] == 1); - assert(!traceFlagBuffer || traceFlagPitch[1] == 1); - assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[1] == SEGY::TraceHeaderSize); - assert(!offsetBuffer || offsetPitch[1] == 1); + assert(!traceFlagBuffer || traceFlagPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); + assert(!segyTraceHeaderBuffer || segyTraceHeaderPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == SEGY::TraceHeaderSize); + assert(!offsetBuffer || offsetPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); + assert(!azimuthBuffer || azimuthPitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); + assert(!muteBuffer || mutePitch[fileInfo.IsOffsetSorted() ? 2 : 1] == 1); - for (size_t fileIndex = 0; fileIndex < fileInfo.m_segmentInfoLists.size(); ++fileIndex) + const auto segmentInfoListsSize = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset.size() : fileInfo.m_segmentInfoLists.size(); + + for (size_t fileIndex = 0; fileIndex < segmentInfoListsSize; ++fileIndex) { auto result = chunkInfo.lowerUpperSegmentIndices.find(fileIndex); if (result == chunkInfo.lowerUpperSegmentIndices.end()) @@ -2275,11 +3683,16 @@ main(int argc, char* argv[]) continue; } + assert(fileInfo.IsOffsetSorted() ? chunkInfo.min[1] < gatherOffsetValues.size() : true); + + const int + offsetSortedOffsetValue = fileInfo.IsOffsetSorted() ? gatherOffsetValues[chunkInfo.min[1]] : 0; + const auto lowerSegmentIndex = std::get<0>(result->second); const auto upperSegmentIndex = std::get<1>(result->second); const auto& - segmentInfo = fileInfo.m_segmentInfoLists[fileIndex]; + segmentInfo = fileInfo.IsOffsetSorted() ? fileInfo.m_segmentInfoListsByOffset[fileIndex][offsetSortedOffsetValue] : fileInfo.m_segmentInfoLists[fileIndex]; auto& traceDataManager = traceDataManagers[fileIndex]; @@ -2294,11 +3707,11 @@ main(int argc, char* argv[]) { // For unbinned gathers the secondary key is the 1-based index of the trace, so to get the // first trace we convert the index to 0-based and add that to the segment's start trace. - firstTrace = segment->m_traceStart + (chunkInfo.secondaryKeyStart - 1); + firstTrace = segment->m_traceStart + (chunkInfo.secondaryKeyStart - 1L); } else { - firstTrace = findFirstTrace(traceDataManager, *segment, chunkInfo.secondaryKeyStart, fileInfo, error); + firstTrace = findFirstTrace(traceDataManager, *segment, chunkInfo.secondaryKeyStart, fileInfo, primaryKeyValue, error); if (error.code) { OpenVDS::printWarning(jsonOutput, "IO", "Failed when reading data", fmt::format("{}", error.code), error.string); @@ -2310,8 +3723,17 @@ main(int argc, char* argv[]) tertiaryIndex = 0, currentSecondaryKey = chunkInfo.secondaryKeyStart; + std::unique_ptr gatherSpacing; + for (int64_t trace = firstTrace; trace <= segment->m_traceStop; trace++, tertiaryIndex++) { + if (!static_cast(gatherSpacing)) + { + // get the first GatherSpacing + // (do it here instead of before the loop to handle the case where 'firstTrace' is past the segment) + gatherSpacing = CalculateGatherSpacing(fileInfo, fold, gatherOffsetValues, traceDataManager, traceSpacingByOffset, firstTrace, jsonOutput); + } + const char* header = traceDataManager.getTraceData(trace, error); if (error.code) { @@ -2321,7 +3743,7 @@ main(int argc, char* argv[]) const void* data = header + SEGY::TraceHeaderSize; - int primaryTest = SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), + int primaryTest = fileInfo.Is2D() ? 0 : SEGY::ReadFieldFromHeader(header, fileInfo.m_primaryKey, fileInfo.m_headerEndianness), secondaryTest = fileInfo.IsUnbinned() ? static_cast(trace - segment->m_traceStart + 1) : SEGY::ReadFieldFromHeader(header, fileInfo.m_secondaryKey, fileInfo.m_headerEndianness); // Check if the trace is outside the secondary range and go to the next segment if it is @@ -2330,40 +3752,73 @@ main(int argc, char* argv[]) break; } + if (fileInfo.IsOffsetSorted() && !isOffsetSortedDupeKeyWarned && trace > firstTrace && secondaryTest == currentSecondaryKey) + { + if (!jsonOutput && is_tty) + { + std::cout << std::endl; + } + auto + message = "This offset-sorted SEGY has traces with duplicate key combinations of Offset, Inline, and Crossline. Only one of the traces with duplicate key values will be written to the output VDS."; + OpenVDS::printWarning(jsonOutput, "SEGY", message); + isOffsetSortedDupeKeyWarned = true; + } + if (secondaryTest != currentSecondaryKey) { // we've progressed to a new secondary key, so reset the tertiary (gather) index currentSecondaryKey = secondaryTest; tertiaryIndex = 0; + + // then get respace info for the next gather + gatherSpacing = CalculateGatherSpacing(fileInfo, fold, gatherOffsetValues, traceDataManager, traceSpacingByOffset, trace, jsonOutput); } int primaryIndex, secondaryIndex; - if (fileInfo.IsUnbinned()) + if (fileInfo.Is2D()) + { + primaryIndex = 0; + secondaryIndex = traceInfo2DManager->GetIndexOfEnsembleNumber(secondaryTest); + } + else if (fileInfo.IsUnbinned()) { primaryIndex = static_cast(segment - segmentInfo.begin()); secondaryIndex = secondaryTest - 1; } else { - primaryIndex = layout->GetAxisDescriptor(PrimaryKeyDimension(fileInfo)).CoordinateToSampleIndex((float)segment->m_primaryKey); - secondaryIndex = layout->GetAxisDescriptor(SecondaryKeyDimension(fileInfo)).CoordinateToSampleIndex((float)secondaryTest); + primaryIndex = layout->GetAxisDescriptor(primaryKeyDimension).CoordinateToSampleIndex((float)segment->m_primaryKey); + secondaryIndex = layout->GetAxisDescriptor(secondaryKeyDimension).CoordinateToSampleIndex((float)secondaryTest); } - assert(primaryIndex >= chunkInfo.min[PrimaryKeyDimension(fileInfo)] && primaryIndex < chunkInfo.max[PrimaryKeyDimension(fileInfo)]); - assert(secondaryIndex >= chunkInfo.min[SecondaryKeyDimension(fileInfo)] && secondaryIndex < chunkInfo.max[SecondaryKeyDimension(fileInfo)]); + assert(primaryIndex >= chunkInfo.min[primaryKeyDimension] && primaryIndex < chunkInfo.max[primaryKeyDimension]); + assert(secondaryIndex >= chunkInfo.min[secondaryKeyDimension] && secondaryIndex < chunkInfo.max[secondaryKeyDimension]); - if (fileInfo.Is4D() && traceOrderByOffset) + if (fileInfo.Is4D()) { - // recalculate tertiaryIndex from header offset value - const auto - thisOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - tertiaryIndex = (thisOffset - offsetStart) / offsetStep; + if (fileInfo.IsOffsetSorted()) + { + // force the tertiary index to the index specified for the current chunk + tertiaryIndex = chunkInfo.min[1]; + + assert(offsetSortedOffsetValue == SEGY::ReadFieldFromHeader(header, offsetHeaderField, fileInfo.m_headerEndianness)); + } + else + { + tertiaryIndex = gatherSpacing->GetTertiaryIndex(trace); - // sanity check the new index - if (tertiaryIndex < 0 || tertiaryIndex >= fold) + // sanity check the new index + if (tertiaryIndex < 0 || tertiaryIndex >= fold) + { + continue; + } + } + + if (tertiaryIndex < chunkInfo.min[1] || tertiaryIndex >= chunkInfo.max[1]) { + // the current gather trace number is not within the request bounds continue; } } @@ -2374,66 +3829,83 @@ main(int argc, char* argv[]) continue; } + const int + targetPrimaryIndex = primaryKeyValue == PrimaryKeyValue::CrosslineNumber ? secondaryIndex : primaryIndex, + targetSecondaryIndex = primaryKeyValue == PrimaryKeyValue::CrosslineNumber ? primaryIndex : secondaryIndex; + { - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * amplitudePitch[3] + (secondaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (tertiaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * amplitudePitch[2] + (secondaryIndex - chunkInfo.min[1]) * amplitudePitch[1]; - } + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, amplitudePitch); copySamples(data, fileInfo.m_dataSampleFormatCode, fileInfo.m_headerEndianness, &reinterpret_cast(amplitudeBuffer)[targetOffset], chunkInfo.sampleStart, chunkInfo.sampleCount); } if (traceFlagBuffer) { - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * traceFlagPitch[3] + (secondaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * traceFlagPitch[2] + (secondaryIndex - chunkInfo.min[1]) * traceFlagPitch[1]; - } + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, traceFlagPitch); reinterpret_cast(traceFlagBuffer)[targetOffset] = true; } if (segyTraceHeaderBuffer) { - int targetOffset; - if (fileInfo.Is4D()) - { - targetOffset = (primaryIndex - chunkInfo.min[3]) * segyTraceHeaderPitch[3] + (secondaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; - } - else - { - targetOffset = (primaryIndex - chunkInfo.min[2]) * segyTraceHeaderPitch[2] + (secondaryIndex - chunkInfo.min[1]) * segyTraceHeaderPitch[1]; - } + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, segyTraceHeaderPitch); memcpy(&reinterpret_cast(segyTraceHeaderBuffer)[targetOffset], header, SEGY::TraceHeaderSize); } if (offsetBuffer) { - // offset is only applicable to 4D? - int targetOffset; - if (fileInfo.Is4D()) + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, offsetPitch); + + const auto + traceOffset = ConvertDistance(segyMeasurementSystem, static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness))); + static_cast(offsetBuffer)[targetOffset] = traceOffset; + } + + if (azimuthBuffer) + { + float + azimuth; + if (azimuthType == SEGY::AzimuthType::Azimuth) { - targetOffset = (primaryIndex - chunkInfo.min[3]) * offsetPitch[3] + (secondaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (tertiaryIndex - chunkInfo.min[1]) * offsetPitch[1]; + azimuth = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["azimuth"], fileInfo.m_headerEndianness)); + + // apply azimuth scale + azimuth = static_cast(azimuth * azimuthScaleFactor); + + if (azimuthUnit == SEGY::AzimuthUnits::Radians) + { + azimuth = static_cast(azimuth * 180.0 / M_PI); + } } else { - targetOffset = (primaryIndex - chunkInfo.min[2]) * offsetPitch[2] + (secondaryIndex - chunkInfo.min[1]) * offsetPitch[1]; + auto + offsetX = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offsetx"], fileInfo.m_headerEndianness), + offsetY = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offsety"], fileInfo.m_headerEndianness); + azimuth = static_cast(atan2(offsetY, offsetX) * 180.0 / M_PI); + + // transform from unit circle azimuth to compass azimuth + azimuth = fmod(450.0f - azimuth, 360.0f); } + const int targetOffset = VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, azimuthPitch); + + static_cast(azimuthBuffer)[targetOffset] = azimuth; + } + + if (muteBuffer) + { + auto + muteStartTime = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["mutestarttime"], fileInfo.m_headerEndianness)), + muteEndTime = static_cast(SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["muteendtime"], fileInfo.m_headerEndianness)); + + // Need to multiply by 2 the result of VoxelIndexToDataIndex, because it gives the two-component offset, and we want the plain offset into a uint16_t buffer const int - traceOffset = SEGY::ReadFieldFromHeader(header, g_traceHeaderFields["offset"], fileInfo.m_headerEndianness); - reinterpret_cast(offsetBuffer)[targetOffset] = static_cast(traceOffset); + targetOffset = 2 * VoxelIndexToDataIndex(fileInfo, targetPrimaryIndex, targetSecondaryIndex, tertiaryIndex, chunkInfo.min, mutePitch); + + static_cast(muteBuffer)[targetOffset] = muteStartTime; + static_cast(muteBuffer)[targetOffset + 1] = muteEndTime; } } } @@ -2443,12 +3915,16 @@ main(int argc, char* argv[]) if (traceFlagPage) traceFlagPage->Release(); if (segyTraceHeaderPage) segyTraceHeaderPage->Release(); if (offsetPage) offsetPage->Release(); + if (azimuthPage) azimuthPage->Release(); + if (mutePage) mutePage->Release(); } amplitudeAccessor->Commit(); traceFlagAccessor->Commit(); segyTraceHeaderAccessor->Commit(); if (offsetAccessor) offsetAccessor->Commit(); + if (azimuthAccessor) azimuthAccessor->Commit(); + if (muteAccessor) muteAccessor->Commit(); dataView.reset(); traceDataManagers.clear(); diff --git a/tools/SEGYImport/TraceInfo2DManager.cpp b/tools/SEGYImport/TraceInfo2DManager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9dfcf1ad0b6ff2e7b92d90b21b1d9502919930ce --- /dev/null +++ b/tools/SEGYImport/TraceInfo2DManager.cpp @@ -0,0 +1,125 @@ +/**************************************************************************** +** Copyright 2021 The Open Group +** Copyright 2021 Bluware, Inc. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +****************************************************************************/ + +#include "TraceInfo2DManager.h" + +TraceInfo2DManagerStore::TraceInfo2DManagerStore(const TraceInfo2D* pData, int64_t count) : m_traceInfo(pData, pData + count) +{ +} + +void TraceInfo2DManagerStore::Clear() +{ + m_traceInfo.clear(); + ClearEnsembleNumberMap(); +} + +int TraceInfo2DManagerStore::Count() +{ + return static_cast(m_traceInfo.size()); +} + +const TraceInfo2D& TraceInfo2DManagerStore::Get(int traceNumber) +{ + return m_traceInfo.at(traceNumber); +} + +const TraceInfo2D& TraceInfo2DManagerStore::Front() +{ + return m_traceInfo.front(); +} + +int TraceInfo2DManagerStore::GetIndexOfEnsembleNumber(int ensembleNumber) +{ + if (m_ensembleToIndex.size() != m_traceInfo.size()) + { + BuildEnsembleNumberMap(); + } + + const auto + iter = m_ensembleToIndex.find(ensembleNumber); + assert(iter != m_ensembleToIndex.end()); + if (iter == m_ensembleToIndex.end()) + { + return 0; + } + + return iter->second; +} + +void TraceInfo2DManagerStore::BuildEnsembleNumberMap() +{ + m_ensembleToIndex.clear(); + for (int index = 0; index < static_cast(m_traceInfo.size()); ++index) + { + m_ensembleToIndex.insert(std::make_pair(m_traceInfo[index].ensembleNumber, index)); + } +} + +void TraceInfo2DManagerStore::ClearEnsembleNumberMap() +{ + m_ensembleToIndex.clear(); +} + +TraceInfo2DManagerImpl::TraceInfo2DManagerImpl(SEGY::Endianness endianness, double scaleOverride, const SEGY::HeaderField& coordinateScaleHeaderField, const SEGY::HeaderField& xCoordinateHeaderField, const SEGY::HeaderField& yCoordinateHeaderField, const SEGY::HeaderField& startTimeHeaderField, const SEGY::HeaderField& espnHeaderField, const SEGY::HeaderField& ensembleNumberHeaderField) + : m_endianness(endianness), m_scaleOverride(scaleOverride), m_coordinateScaleHeaderField(coordinateScaleHeaderField), m_xCoordinateHeaderField(xCoordinateHeaderField), m_yCoordinateHeaderField(yCoordinateHeaderField), m_startTimeHeaderField(startTimeHeaderField), m_espnHeaderField(espnHeaderField), m_ensembleNumberHeaderField(ensembleNumberHeaderField), m_previousEnsembleNumber(-1) +{ +} + +void TraceInfo2DManagerImpl::Clear() +{ + TraceInfo2DManagerStore::Clear(); + m_previousEnsembleNumber = -1; +} + +void TraceInfo2DManagerImpl::AddTraceInfo(const char* traceHeader) +{ + double coordScaleFactor = 1.0; + + if (m_scaleOverride == 0.0) + { + int scale = SEGY::ReadFieldFromHeader(traceHeader, m_coordinateScaleHeaderField, m_endianness); + if (scale < 0) + { + coordScaleFactor = 1.0 / static_cast(std::abs(scale)); + } + else if (scale > 0) + { + coordScaleFactor = 1.0 * static_cast(std::abs(scale)); + } + } + else + { + coordScaleFactor = m_scaleOverride; + } + + TraceInfo2D traceInfo{}; + + traceInfo.energySourcePointNumber = SEGY::ReadFieldFromHeader(traceHeader, m_espnHeaderField, m_endianness); + traceInfo.ensembleNumber = SEGY::ReadFieldFromHeader(traceHeader, m_ensembleNumberHeaderField, m_endianness); + traceInfo.x = coordScaleFactor * SEGY::ReadFieldFromHeader(traceHeader, m_xCoordinateHeaderField, m_endianness); + traceInfo.y = coordScaleFactor * SEGY::ReadFieldFromHeader(traceHeader, m_yCoordinateHeaderField, m_endianness); + traceInfo.startTime = SEGY::ReadFieldFromHeader(traceHeader, m_startTimeHeaderField, m_endianness); + + if (m_previousEnsembleNumber != traceInfo.ensembleNumber) + { + m_traceInfo.push_back(traceInfo); + m_previousEnsembleNumber = traceInfo.ensembleNumber; + + // any modification to trace info collection should invalidate the ensemble number mapping + ClearEnsembleNumberMap(); + } +} diff --git a/tools/SEGYImport/TraceInfo2DManager.h b/tools/SEGYImport/TraceInfo2DManager.h new file mode 100644 index 0000000000000000000000000000000000000000..0b7be871bfaeb1b4ad689b15fa4da36bc0b7b854 --- /dev/null +++ b/tools/SEGYImport/TraceInfo2DManager.h @@ -0,0 +1,100 @@ +/**************************************************************************** +** Copyright 2021 The Open Group +** Copyright 2021 Bluware, Inc. +** +** Licensed under the Apache License, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** http://www.apache.org/licenses/LICENSE-2.0 +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +****************************************************************************/ + +#pragma once + +#include "SEGYUtils/SEGYFileInfo.h" +#include + +struct TraceInfo2D +{ + int energySourcePointNumber; + int ensembleNumber; + double x; + double y; + int startTime; +}; + +class TraceInfo2DManager +{ +public: + virtual ~TraceInfo2DManager() = default; + + virtual void Clear() {} + + virtual void AddTraceInfo(const char* traceHeader) {} + + virtual int Count() { return 0; } + + virtual const TraceInfo2D& Get(int traceNumber) { return m_placeHolder; } + + virtual const TraceInfo2D& Front() { return m_placeHolder; } + + virtual int GetIndexOfEnsembleNumber(int ensembleNumber) { return 0; } + +private: + TraceInfo2D m_placeHolder{}; +}; + +class TraceInfo2DManagerStore : public TraceInfo2DManager +{ +public: + TraceInfo2DManagerStore() = default; + + TraceInfo2DManagerStore(const TraceInfo2D* pData, int64_t count); + + void Clear() override; + + int Count() override; + + const TraceInfo2D& Get(int traceNumber) override; + + const TraceInfo2D& Front() override; + + int GetIndexOfEnsembleNumber(int ensembleNumber) override; + +protected: + std::vector m_traceInfo; + std::unordered_map + m_ensembleToIndex; + + void BuildEnsembleNumberMap(); + + void ClearEnsembleNumberMap(); +}; + +class TraceInfo2DManagerImpl : public TraceInfo2DManagerStore +{ +public: + TraceInfo2DManagerImpl(SEGY::Endianness endianness, double scaleOverride, const SEGY::HeaderField& coordinateScaleHeaderField, const SEGY::HeaderField& xCoordinateHeaderField, const SEGY::HeaderField& yCoordinateHeaderField, const SEGY::HeaderField& startTimeHeaderField, const SEGY::HeaderField& espnHeaderField, const SEGY::HeaderField& ensembleNumberHeaderField); + + void Clear() override; + + void AddTraceInfo(const char* traceHeader) override; + +private: + SEGY::Endianness m_endianness; + double m_scaleOverride; + SEGY::HeaderField m_coordinateScaleHeaderField; + SEGY::HeaderField m_xCoordinateHeaderField; + SEGY::HeaderField m_yCoordinateHeaderField; + SEGY::HeaderField m_startTimeHeaderField; + SEGY::HeaderField m_espnHeaderField; + SEGY::HeaderField m_ensembleNumberHeaderField; + + int m_previousEnsembleNumber; +}; \ No newline at end of file