From 3616664b987610c7aab64350eb9c8ca011fe4f90 Mon Sep 17 00:00:00 2001 From: Paal Kvamme Date: Fri, 21 May 2021 11:46:57 +0200 Subject: [PATCH] Fix bug in alignment when appending to an empty file created by the old accessor. --- Makefile | 1 + native/src/Makefile | 8 ++- native/src/impl/bulk.cpp | 30 +++++++-- native/src/runtests.sh | 1 + native/src/test/test_reopen.cpp | 92 +++++++++++++++++++++++++++- native/src/test/testdata.bat | 2 + native/windows/OpenZGY.Test.vcxproj | 4 +- testdata/EmptyOldFile.zgy.bz2 | Bin 0 -> 324 bytes 8 files changed, 129 insertions(+), 9 deletions(-) create mode 100644 testdata/EmptyOldFile.zgy.bz2 diff --git a/Makefile b/Makefile index bd3eeaf..03309f0 100644 --- a/Makefile +++ b/Makefile @@ -93,6 +93,7 @@ testscripts: testdata/Empty-v1.zgy.bz2 \ testdata/Empty-v3.zgy.bz2 \ testdata/Fancy-int8.zgy.bz2 \ + testdata/EmptyOldFile.zgy.bz2 \ $(wildcard private/refreshtoken.sh*) \ $(wildcard private/grabtoken.sh*) \ $(wildcard private/showtoken.py*) diff --git a/native/src/Makefile b/native/src/Makefile index 2dd81cd..4b357e4 100644 --- a/native/src/Makefile +++ b/native/src/Makefile @@ -104,7 +104,7 @@ tools: $(BIN_DIR)/zgycopyc $(BIN_DIR)/zgydumpc $(BIN_DIR)/bulk_inspector $(BIN_D nm: $(LIBDSO) nm -gpC $(BUILD_ROOT)/bin/libopenzgy.so | grep ' [TVW] ' | cut -d\ -f3- | sort -u -data: $(TESTDATARW)/Empty-v1.zgy $(TESTDATARW)/Empty-v3.zgy $(TESTDATARW)/Fancy-int8.zgy +data: $(TESTDATARW)/Empty-v1.zgy $(TESTDATARW)/Empty-v3.zgy $(TESTDATARW)/Fancy-int8.zgy $(TESTDATARW)/EmptyOldFile.zgy LATEX := $(wildcard /usr/bin/latex) PDFTK := $(wildcard /usr/bin/pdftk) @@ -159,6 +159,10 @@ $(TESTDATARW)/Fancy-int8.zgy: $(TESTDATA)/Fancy-int8.zgy.bz2 mkdir -p $(@D) bzip2 -dkc $^ > $@ +$(TESTDATARW)/EmptyOldFile.zgy: $(TESTDATA)/EmptyOldFile.zgy.bz2 + mkdir -p $(@D) + bzip2 -dkc $^ > $@ + $(MAIN_OBJ) $(TEST_OBJ) $(TOOL_OBJ): $(OBJ_DIR)/%.o: %.cpp Makefile | $(ALL_OBJ_DIR) $(CXX) -c -o $@ $(CPPFLAGS) $(CXXFLAGS) $< @@ -222,7 +226,7 @@ $(ALL_OBJ_DIR) $(INC_DIR): .PHONY: clean clobber clean: packages-clean $(RM) -rf $(OBJ_DIR) $(BUILDROOT)/build/temp/native - $(RM) $(TESTDATARW)/Empty-v1.zgy $(RM) $(TESTDATARW)/Empty-v3.zgy $(TESTDATARW)/Fancy-int8.zgy + $(RM) $(TESTDATARW)/Empty-v1.zgy $(RM) $(TESTDATARW)/Empty-v3.zgy $(TESTDATARW)/Fancy-int8.zgy $(TESTDATARW)/EmptyOldFile.zgy clobber: clean $(RM) -rf $(BUILDROOT)/build/deploy/native diff --git a/native/src/impl/bulk.cpp b/native/src/impl/bulk.cpp index 8b74f48..a306e00 100644 --- a/native/src/impl/bulk.cpp +++ b/native/src/impl/bulk.cpp @@ -221,6 +221,8 @@ struct WriteBrickArgPack * * There are new fields rawdata (replaces data) and brickstatus. * Also the compressor is no longer needed as compression is now done. + * But we need to know whether compression was done because that might + * affect alignment whan a new brick is allocated. */ struct WriteNowArgPack { @@ -229,16 +231,19 @@ struct WriteNowArgPack std::int64_t fileoffset; rawdata_t rawdata; BrickStatus brickstatus; + bool align; WriteNowArgPack(const std::array& brickpos_in, std::int32_t lod_in, std::int64_t fileoffset_in, const rawdata_t rawdata_in, - BrickStatus brickstatus_in) + BrickStatus brickstatus_in, + bool align_in) : brickpos(brickpos_in) , lod(lod_in) , fileoffset(fileoffset_in) , rawdata(rawdata_in) , brickstatus(brickstatus_in) + , align(align_in) { } std::string toString() const @@ -251,6 +256,8 @@ struct WriteNowArgPack ss << ", fileoffset=" << std::hex << fileoffset << std::dec; // Should use symbolic names for enums, but this is just for verbose logs. ss << ", brickstatus=" << (int)brickstatus; + if (align) + ss << ", alignment needed"; return ss.str(); } }; @@ -1305,14 +1312,14 @@ ZgyInternalBulk::_setPaddingSamples( * The operation is currently not thread safe. For cloud writes it would * not help much to allow multi threading since most writes end up being * buffered anyway. Supporting parallel writes in that scenario would - * require changes at a lower level. Also, SegmentClosedExceptopn would + * require changes at a lower level. Also, SegmentClosedException would * get really problematic to handle. * * For local writes, temporarily dropping a write lock while executing * the two xx_write() calls might help. But that requires a lot of testing. * Don't do it unless certain there will be a substantial benefit. * - * Args: brickpos, lod, fileoffset, rawdata, brickstatus. + * Args: brickpos, lod, fileoffset, rawdata, brickstatus, align. */ void ZgyInternalBulk::_writeWithRetry(const WriteNowArgPack& args) @@ -1338,6 +1345,15 @@ ZgyInternalBulk::_writeWithRetry(const WriteNowArgPack& args) _logger(2, std::stringstream() << "Allocating space at EOF: " << std::hex << fileoffset << std::dec << ")\n"); + // Normally fileoffset will be aligned already. The test is needed + // if updating a file created by the old accessor because that + // code won't add padding until the first brick is written. When + // updating ZFP-compressed files or files on the cloud those must + // have been created by OpenZGY so there is no issue. + if (args.align && !this->_file->xx_iscloud()) { + const std::int64_t alignto = this->_metadata->ih().bytesperbrick(); + fileoffset = ((fileoffset + alignto - 1) / alignto) * alignto; + } this->_file->xx_write(args.rawdata.first.get(), fileoffset, args.rawdata.second, UsageHint::Data); LookupTable::setBrickFilePosition (args.brickpos[0], args.brickpos[1], args.brickpos[2], args.lod, @@ -1414,8 +1430,14 @@ ZgyInternalBulk::_writeOneNormalBrick(const WriteBrickArgPack& args) //data->byteswap(); } // Arguments that our caller needs to be pass to _writeWithRetry() + // All normal bricks are flagged as needing alignment if on-prem. + // Technically I could have passed !compressor so that uncompressed + // bricks that were attempted compressed but didn't make it will not + // need to be aligned. But that case is too obscure to warrant extra + // testing. return std::make_shared - (args.brickpos, args.lod, args.fileoffset, rawdata, brickstatus); + (args.brickpos, args.lod, args.fileoffset, rawdata, brickstatus, + brickstatus == BrickStatus::Normal); } /** diff --git a/native/src/runtests.sh b/native/src/runtests.sh index 406d21f..87e179f 100755 --- a/native/src/runtests.sh +++ b/native/src/runtests.sh @@ -27,6 +27,7 @@ mkdir -p build/testdata bzip2 -dkc testdata/Empty-v1.zgy.bz2 > build/testdata/Empty-v1.zgy bzip2 -dkc testdata/Empty-v3.zgy.bz2 > build/testdata/Empty-v3.zgy bzip2 -dkc testdata/Fancy-int8.zgy.bz2 > build/testdata/Fancy-int8.zgy +bzip2 -dkc testdata/EmptyOldFile.zgy.bz2 > build/testdata/EmptyOldFile.zgy /bin/rm -rf build/run /bin/mkdir -p build/run diff --git a/native/src/test/test_reopen.cpp b/native/src/test/test_reopen.cpp index a55f9ba..79d1389 100644 --- a/native/src/test/test_reopen.cpp +++ b/native/src/test/test_reopen.cpp @@ -1226,7 +1226,6 @@ test_checkpoint_nofinalize() static void test_reopen_empty() { - typedef OpenZGY::IZgyWriter::size3i_t size3i_t; LocalFileAutoDelete lad("reopen_empty.zgy"); { @@ -1415,6 +1414,96 @@ test_reopen() TEST_EQUAL(runs, 5+32+12); } +/** + * Opening an empty file created by the old ZGY accessor has some + * challenges with respect to alignment. + */ +static void +test_reopen_zgypublic() +{ + typedef OpenZGY::IZgyWriter::size3i_t size3i_t; + LocalFileAutoDelete lad("reopen_zgypublic.zgy"); + std::string oldname = get_testdata("EmptyOldFile.zgy"); + if (!TEST_CHECK(copy_file(oldname, lad.name()))) + return; + + ZgyWriterArgs secondargs = ZgyWriterArgs().filename(lad.name()); + + { + if (verbose()) + std::cout << "Append, first brick all 1000\n"; + std::shared_ptr writer = + OpenZGY::IZgyWriter::reopen(secondargs); + std::vector data(64*64*64, 1000); + std::fill(data.data() + 64*64*64 - 64, data.data() + 64*64*64, 0); + writer->write(size3i_t{0,0,0}, size3i_t{64,64,64}, data.data()); + writer->close(); + } + + { + if (verbose()) + std::cout << "Append, write second brick all 2000\n"; + std::shared_ptr writer = + OpenZGY::IZgyWriter::reopen(secondargs); + // Will not be a round number if the code failed to add padding. + TEST_EQUAL(writer->filestats()->fileSize(), 64*64*64*16); + std::vector data(64*64*64, 2000); + writer->write(size3i_t{64,64,64}, size3i_t{64,64,64}, data.data()); + writer->close(); + } + + { + if (verbose()) + std::cout << "Overlap, write 3000 in some samples\n"; + std::shared_ptr writer = + OpenZGY::IZgyWriter::reopen(secondargs); + std::vector data(65*65*65, 3000); + writer->write(size3i_t{96,96,96}, size3i_t{65,65,65}, data.data()); + writer->close(); + } + + static const auto inside = [](int ii, int jj, int kk, const size3i_t& start, const size3i_t& size) { + return (ii >= start[0] && ii < start[0] + size[0] && + jj >= start[1] && jj < start[1] + size[1] && + kk >= start[2] && kk < start[2] + size[2]); + }; + static const auto expect = [](int ii, int jj, int kk) { + return (inside(ii, jj, kk, size3i_t{96,96,96}, size3i_t{65,65,65}) ? + 3000 : + inside(ii, jj, kk, size3i_t{64,64,64}, size3i_t{64,64,64}) ? + 2000 : + inside(ii, jj, kk, size3i_t{63,63,0}, size3i_t{1,1,64}) ? + 0 : + inside(ii, jj, kk, size3i_t{0,0,0}, size3i_t{64,64,64}) ? + 1000 : + 0); + }; + + { + if (verbose()) + std::cout << "Read back and check\n"; + std::shared_ptr reader = + OpenZGY::IZgyReader::open(lad.name()); + std::vector check(192*192*192, -999); + reader->read(size3i_t{0,0,0}, size3i_t{192,192,192}, check.data(), 0); + reader->close(); + for (int ii=0; ii<192; ++ii) { + for (int jj=0; jj<192; ++jj) { + for (int kk=0; kk<192; ++kk) { + float value_expect = expect(ii, jj, kk); + float value_actual = check[ii*192*192 + jj*192 + kk]; + if (std::abs(value_actual - value_expect) > 0.001) { + if (!TEST_EQUAL_FLOAT(value_actual, value_expect, 0.001)) { + std::cout << "FAIL at (" << ii << "," << jj << "," << kk << ")\n"; + return; + } + } + } + } + } + } +} + class Register_reopen { public: @@ -1432,6 +1521,7 @@ public: register_test("reopen.empty", test_reopen_empty); register_test("reopen.reopen_bad_histo", test_reopen_bad_histo); register_test("reopen.reopen", test_reopen); + register_test("reopen.zgypublic", test_reopen_zgypublic); #ifdef HAVE_SD register_test("reopen.plain_sd", test_reopen_plain_sd); #endif diff --git a/native/src/test/testdata.bat b/native/src/test/testdata.bat index 60c698c..1525f21 100644 --- a/native/src/test/testdata.bat +++ b/native/src/test/testdata.bat @@ -5,10 +5,12 @@ set OUTDIR=%1 7z x -y -o%OUTDIR% ..\..\testdata\Empty-v1.zgy.bz2 7z x -y -o%OUTDIR% ..\..\testdata\Empty-v3.zgy.bz2 7z x -y -o%OUTDIR% ..\..\testdata\Fancy-int8.zgy.bz2 +7z x -y -o%OUTDIR% ..\..\testdata\EmptyOldFile.zgy.bz2 copy /B %OUTDIR%Empty-v1.zgy+,, %OUTDIR%Empty-v1.zgy copy /B %OUTDIR%Empty-v3.zgy+,, %OUTDIR%Empty-v3.zgy copy /B %OUTDIR%Fancy-int8.zgy+,, %OUTDIR%Fancy-int8.zgy +copy /B %OUTDIR%EmptyOldFile.zgy+,, %OUTDIR%EmptyOldFile.zgy exit /b 0 diff --git a/native/windows/OpenZGY.Test.vcxproj b/native/windows/OpenZGY.Test.vcxproj index 153e234..1a21cbd 100644 --- a/native/windows/OpenZGY.Test.vcxproj +++ b/native/windows/OpenZGY.Test.vcxproj @@ -117,7 +117,7 @@ Document %(FullPath) $(SolutionDir)..\..\build\testdata\ - $(SolutionDir)..\..\build\testdata\Empty-V1.zgy;$(SolutionDir)..\..\build\testdata\Empty-V3.zgy;$(SolutionDir)..\..\build\testdata\Fancy-int8.zgy + $(SolutionDir)..\..\build\testdata\Empty-V1.zgy;$(SolutionDir)..\..\build\testdata\Empty-V3.zgy;$(SolutionDir)..\..\build\testdata\Fancy-int8.zgy;$(SolutionDir)..\..\build\testdata\EmptyOldFile.zgy false %(FullPath) $(SolutionDir)..\..\build\testdata\ $(SolutionDir)..\..\build\testdata\Empty-V1.zgy;$(SolutionDir)..\..\build\testdata\Empty-V3.zgy;$(SolutionDir)..\..\build\testdata\Fancy-int8.zgy @@ -129,4 +129,4 @@ - + \ No newline at end of file diff --git a/testdata/EmptyOldFile.zgy.bz2 b/testdata/EmptyOldFile.zgy.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..a8af2b71b390e95894e198e050fb8a506c0fd138 GIT binary patch literal 324 zcmV-K0lWS}T4*^jL0KkKSzC&cjQ|T(fB*YQzytW;Dun;w*>o;u+_gXflodcR2uz89 zf`U;2Knbt__=sw!rm5;|lhQDQOqm!%C!sMgOb}qvJtvg*rl!PsP;g`#Gy#)LLrn%j zh+zXjXvD;5002cL382xS28{p!007aT00004%4Q5S$cw4JmDAQr3W7`37s-M^omZMIoRD&3w{!>DN=M*4vb5j zduRU*6Y2Ir$N?En5lA9-*GP#>7ri~;J)2<3Ur3<4m;MWLXT`b?o`A