Commit 8227427d authored by Jørgen Lind's avatar Jørgen Lind
Browse files

Add TextHeader printing functionality to SEGYImport

parent 6e2da541
Pipeline #77707 passed with stages
in 20 minutes and 3 seconds
......@@ -22,6 +22,9 @@
#include <string.h>
#include <assert.h>
#include <algorithm>
#include <string>
namespace SEGY
{
......@@ -314,4 +317,94 @@ IsSEGYTypeWithGatherOffset(const SEGYType segyType)
return segyType == SEGY::SEGYType::Prestack || IsSEGYTypeUnbinned(segyType);
}
bool autoDetectSEGYTextHeaderIsEBCDIC(const void* buffer, size_t bufferSize)
{
int toScan = int(std::min(bufferSize, size_t(3200)));
int ASCIISpace = 0;
int EBCDICSpace = 0;
const char* textHeader = static_cast<const char*>(buffer);
for(int i = 0; i < toScan; i++)
{
if(textHeader[i] == 0x20) ASCIISpace++;
if(textHeader[i] == 0x40) EBCDICSpace++;
}
return EBCDICSpace > ASCIISpace;
}
size_t convertSEGYEBCDICHeaderToASCII(const void* inputBuffer, size_t inputBufferSize, char* outputBuffer, size_t outputBufferSize, int columnWidth)
{
static const char ebcdicToAscii[256] =
{
/* 0*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 8*/ 0, 0, 0, 0, 0, '\r', 0, 0,
/* 16*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 24*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 32*/ 0, 0, 0, 0, 0, '\n', 0, 0,
/* 40*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 48*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 56*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 64*/ ' ', 0, 0, 0, 0, 0, 0, 0,
/* 72*/ 0, 0, 0, '.', '<', '(', '+', 0,
/* 80*/ '&', 0, 0, 0, 0, 0, 0, 0,
/* 88*/ 0, 0, '!', '$', '*', ')', ';', 0,
/* 96*/ '-', '/', 0, 0, 0, 0, 0, 0,
/* 104*/ 0, 0, '|', ',', '%', '_', '>', '?',
/* 112*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 120*/ 0, 0, ':', '#', '@', '\'', '=', '"',
/* 128*/ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
/* 136*/ 'h', 'i', 0, 0, 0, 0, 0, 0,
/* 144*/ 0, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
/* 152*/ 'q', 'r', 0, 0, 0, 0, 0, 0,
/* 160*/ 0, '~', 's', 't', 'u', 'v', 'w', 'x',
/* 168*/ 'y', 'z', 0, 0, 0, 0, 0, 0,
/* 176*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 184*/ 0, '`', 0, 0, 0, 0, 0, 0,
/* 192*/ '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
/* 200*/ 'H', 'I', 0, 0, 0, 0, 0, 0,
/* 208*/ '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
/* 216*/ 'Q', 'R', 0, 0, 0, 0, 0, 0,
/* 224*/ '\\', 0, 'S', 'T', 'U', 'V', 'W', 'X',
/* 232*/ 'Y', 'Z', 0, 0, 0, 0, 0, 0,
/* 240*/ '0', '1', '2', '3', '4', '5', '6', '7',
/* 248*/ '8', '9', 0, 0, 0, 0, 0, 0,
};
if (outputBuffer >= inputBuffer && outputBuffer < ((const char*)inputBuffer) + inputBufferSize)
{
fprintf(stderr, "Overlapping input and output buffer.");
abort();
}
size_t toCopy = std::min(inputBufferSize, std::min(outputBufferSize, size_t(3200)));
size_t copied = 0;
size_t filled = 0;
if (columnWidth > 0)
{
while (copied < toCopy && filled < outputBufferSize)
{
size_t copyLineSize = std::min(size_t(columnWidth), toCopy - copied);
memcpy(outputBuffer + filled, static_cast<const char*>(inputBuffer) + copied, copyLineSize);
copied += copyLineSize;
filled += copyLineSize;
if (filled < outputBufferSize)
{
outputBuffer[filled] = 0x25;
filled++;
}
}
}
else
{
memcpy(outputBuffer, inputBuffer, toCopy);
copied = toCopy;
filled = toCopy;
}
std::transform(outputBuffer, outputBuffer + filled, outputBuffer, [](const uint8_t &d) { return ebcdicToAscii[d]; });
return filled;
}
} // end namespace SEGY
......@@ -323,6 +323,9 @@ OPENVDS_EXPORT bool IsSEGYTypeUnbinned(SEGYType segyType);
OPENVDS_EXPORT bool IsSEGYTypeWithGatherOffset(SEGYType segyType);
OPENVDS_EXPORT bool autoDetectSEGYTextHeaderIsEBCDIC(const void* buffer, size_t bufferSize);
OPENVDS_EXPORT size_t convertSEGYEBCDICHeaderToASCII(const void* inputBuffer, size_t inputBufferSize, char* outputBuffer, size_t outputBufferSize, int columnWidth = 80);
} // end namespace SEGY
#endif
......@@ -1474,6 +1474,7 @@ main(int argc, char* argv[])
bool prestack = false;
bool traceOrderByOffset = true;
bool jsonOutput = false;
bool printSegyTextHeader = false;
bool help = false;
bool helpConnection = false;
bool version = false;
......@@ -1515,6 +1516,7 @@ main(int argc, char* argv[])
options.add_option("", "", "uniqueID", "Generate a new globally unique ID when scanning the input SEG-Y file.", cxxopts::value<bool>(uniqueID), "");
options.add_option("", "", "disable-persistentID", "Disable the persistentID usage, placing the VDS directly into the url location.", cxxopts::value<bool>(disablePersistentID), "");
options.add_option("", "", "json-output", "Enable json output.", cxxopts::value<bool>(jsonOutput), "");
options.add_option("", "", "print-text-header", "Print the text header of the input segy file and exit.", cxxopts::value<bool>(printSegyTextHeader), "");
// TODO add option for turning off traceOrderByOffset
options.add_option("", "h", "help", "Print this help information", cxxopts::value<bool>(help), "");
......@@ -1728,6 +1730,33 @@ main(int argc, char* argv[])
return EXIT_FAILURE;
}
if (printSegyTextHeader)
{
auto& dataProvider = dataProviders.front();
std::unique_ptr<uint8_t[]> inputData(new uint8_t[3200]);
if (!dataProvider.Read(inputData.get(), 0, 3200, error))
{
OpenVDS::printError(jsonOutput, "IO", "Could not read SEGY Text header", errorFileName, error.string);
return EXIT_FAILURE;
}
std::string output;
output.resize(3250);
if (SEGY::autoDetectSEGYTextHeaderIsEBCDIC(inputData.get(), 3200))
{
auto outputSize = SEGY::convertSEGYEBCDICHeaderToASCII(inputData.get(), 3200, &output[0], output.size());
output.resize(outputSize);
}
else
{
output.resize(3200);
memcpy(&output[0], inputData.get(), 3200);
}
fwrite(output.data(), 1, output.size(), stdout);
fprintf(stderr, "\n");
return EXIT_SUCCESS;
}
SEGYFileInfo
fileInfo(headerEndianness);
fileInfo.m_segyType = segyType;
......
add_executable(VDSInfo VDSInfo.cpp ${COMMON_DIR}/Base64/Base64.cpp)
target_link_libraries(VDSInfo PUBLIC openvds fmt::fmt jsoncpp_lib_static)
target_link_libraries(VDSInfo PUBLIC openvds segyutils fmt::fmt jsoncpp_lib_static)
setCompilerFlagsForTools(VDSInfo)
......@@ -3,6 +3,7 @@
#include <OpenVDS/OpenVDS.h>
#include <OpenVDS/VolumeDataLayout.h>
#include <SEGYUtils/SEGY.h>
#include <Base64/Base64.h>
......@@ -17,42 +18,6 @@ namespace OpenVDS
extern Json::Value SerializeChannelDescriptors(VolumeDataLayout const &volumeDataLayout);
}
const char ebcdic_to_ascii[256] =
{
/* 0*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 8*/ 0, 0, 0, 0, 0, '\r', 0, 0,
/* 16*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 24*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 32*/ 0, 0, 0, 0, 0, '\n', 0, 0,
/* 40*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 48*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 56*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 64*/ ' ', 0, 0, 0, 0, 0, 0, 0,
/* 72*/ 0, 0, 0, '.', '<', '(', '+', 0,
/* 80*/ '&', 0, 0, 0, 0, 0, 0, 0,
/* 88*/ 0, 0, '!', '$', '*', ')', ';', 0,
/* 96*/ '-', '/', 0, 0, 0, 0, 0, 0,
/* 104*/ 0, 0, '|', ',', '%', '_', '>', '?',
/* 112*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 120*/ 0, 0, ':', '#', '@', '\'', '=', '"',
/* 128*/ 0, 'a', 'b', 'c', 'd', 'e', 'f', 'g',
/* 136*/ 'h', 'i', 0, 0, 0, 0, 0, 0,
/* 144*/ 0, 'j', 'k', 'l', 'm', 'n', 'o', 'p',
/* 152*/ 'q', 'r', 0, 0, 0, 0, 0, 0,
/* 160*/ 0, '~', 's', 't', 'u', 'v', 'w', 'x',
/* 168*/ 'y', 'z', 0, 0, 0, 0, 0, 0,
/* 176*/ 0, 0, 0, 0, 0, 0, 0, 0,
/* 184*/ 0, '`', 0, 0, 0, 0, 0, 0,
/* 192*/ '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
/* 200*/ 'H', 'I', 0, 0, 0, 0, 0, 0,
/* 208*/ '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
/* 216*/ 'Q', 'R', 0, 0, 0, 0, 0, 0,
/* 224*/ '\\', 0, 'S', 'T', 'U', 'V', 'W', 'X',
/* 232*/ 'Y', 'Z', 0, 0, 0, 0, 0, 0,
/* 240*/ '0', '1', '2', '3', '4', '5', '6', '7',
/* 248*/ '8', '9', 0, 0, 0, 0, 0, 0,
};
std::string compressionStringForCompressionMethod(OpenVDS::CompressionMethod method)
{
switch (method)
......@@ -184,29 +149,18 @@ static std::string convertToString(const Json::Value &value)
return stream.str();
}
static bool autodetectDecode(const std::vector<uint8_t> &blob)
{
int countEbcidicSpace = 0;
int valuesGT127 = 0;
for (auto c : blob)
{
if (c > 127)
valuesGT127++;
else if (c == '@')
countEbcidicSpace++;
}
return countEbcidicSpace > blob.size() * 0.20 && valuesGT127 > blob.size() * 0.3;
}
static void decodedEbcdic(std::vector<uint8_t> &ebcdic)
{
std::transform(ebcdic.begin(), ebcdic.end(), ebcdic.begin(), [](const uint8_t &d) { return ebcdic_to_ascii[d]; });
}
int main(int argc, char **argv)
{
cxxopts::Options options("VDSInfo", "VDSInfo - A tool for extracting info from a VDS\n\nUse -H or see online documentation for connection string paramters:\nhttp://osdu.pages.community.opengroup.org/platform/domain-data-mgmt-services/seismic/open-vds/connection.html\n");
std::string help_info = R"info(VDSInfo - A tool for extracting info from a VDS
Use -H or see online documentation for connection string paramters:
http://osdu.pages.community.opengroup.org/platform/domain-data-mgmt-services/seismic/open-vds/connection.html
Example for getting the TextHeader from the imported SEGY file autodecoded EBCDIC to ASCII:
VDSInfo --metadata-name TextHeader -b -e -w 80 s3://bluware-jorgen-dev/volve
)info";
cxxopts::Options options("VDSInfo", help_info);
options.positional_help("<url>");
std::vector<std::string> urlarg;
......@@ -403,20 +357,26 @@ int main(int argc, char **argv)
bool decodeEBCDIC = false;
if (metadataAutoDecodeEBCDIC)
{
decodeEBCDIC = autodetectDecode(vector);
decodeEBCDIC = SEGY::autoDetectSEGYTextHeaderIsEBCDIC(vector.data(), vector.size());
}
std::string output;
auto outputSize = 3200;
if (textDecodeWidth > 0)
outputSize += 3200 / textDecodeWidth + 1;
output.resize(outputSize);
if (decodeEBCDIC)
{
decodedEbcdic(vector);
auto filled = SEGY::convertSEGYEBCDICHeaderToASCII(vector.data(), vector.size(), &output[0], output.size(), textDecodeWidth);
output.resize(filled);
}
int i = 0;
while(i < int(vector.size()))
else
{
int to_copy = std::min(textDecodeWidth, int(vector.size() - i));
fwrite(vector.data() + i, 1, to_copy, stdout);
fwrite("\n", 1, 1, stdout);
i += to_copy;
output.resize(std::min(vector.size(), output.size()));
memcpy(&output[0], vector.data(), output.size());
}
fwrite(output.data(), 1, output.size(), stdout);
fprintf(stderr, "\n");
}
}
else
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment