Commit 6da89693 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

Refactoring after the previous change.

parent 43f2971e
Pipeline #50203 passed with stages
in 8 minutes and 37 seconds
......@@ -857,30 +857,11 @@ public:
// it is obviously not possible to read everything up front.
_accessor_rw.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta_rw, _meta_rw, compress));
// If low resolution data is not available, mark as dirty because
// even if there has been no change to the bulk data the code should
// by default try to create lods and statistics on close.
if (_meta->ih().nlods() !=
InternalZGY::LookupTable::usableBrickLOD
(this->_meta->ih().lodsizes(),
this->_meta->ih().brickoffsets(),
this->_meta->blup().lup(),
this->_meta->blup().lupend()))
this->_dirty = true;
// If statistics are missing this also indicates that genlod needs
// to be called. Without this extra check the statistics for a
// tiny 1-brick file might not be updated. Because such a file
// always has all its (zero) lowres bricks.
if (this->_meta->ih().scnt() == 0)
this->_dirty = true;
// Also if the histogram is missing everything must be rebuilt.
// The converse is not true. The histogram range may be set
// already even if finaize was not run. With an incremental update
// the histogram cannot change. With a full rebuild it can grow.
const InternalZGY::IHistHeaderAccess& hh = this->_meta->hh();
if (hh.samplecount() == 0 || hh.minvalue() > hh.maxvalue())
// If low resolution data, statistics, or histogram are not
// available, mark as dirty because even if there has been no
// change to the bulk data the code should by default try to
// create lods and statistics on close.
if (!_meta->has_finalized_feature())
this->_dirty = true;
// Consistency checks: Only files uploaded by OpenZGY can be updated.
......@@ -1427,43 +1408,6 @@ public:
this->_dirty = false;
}
/**
* Does the file use features the old ZGY-Public doesn't understand?
*
* ZFP-compressed data is definitely out. As is missing lowres.
* Testing for lowres is trivial but the old accesor just doesn't.
* Missing histogram and/or statistics can probably be tolerated.
*
* Tiny files consisting of just a single brick triggers several corner
* cases. In practice we shold never see such files but ideally they
* ought to be handled.
*
* The tiny files have no lowres bricks so they doesn't need a finalize
* for lowres. But it is needed for statistics. Use relaxed checks
* treat as finalized in the context of checking for v4. Treat as
* not finalized when deciding to allow re-open of a compressed
* finalized file. Tiny files should never have incremental rebuild.
*/
bool _has_v4_features()
{
const InternalZGY::IHistHeaderAccess& hh = this->_meta_rw->hh();
const InternalZGY::IInfoHeaderAccess& ih = this->_meta_rw->ih();
const std::int32_t usable_lods = InternalZGY::LookupTable::usableBrickLOD
(ih.lodsizes(),
ih.brickoffsets(),
this->_meta_rw->blup().lup(),
this->_meta_rw->blup().lupend());
const bool has_compression = InternalZGY::LookupTable::hasBrickCompression
(this->_meta_rw->blup().lup(),
this->_meta_rw->blup().lupend());
//const bool has_statistics =
// (ih.scnt() != 0 && ih.smin() <= ih.smax() &&
// hh.samplecount() != 0 && hh.minvalue() <= hh.maxvalue());
const bool has_lowres = (ih.nlods() == usable_lods);
const bool has_v4_features = has_compression || !has_lowres;
return has_v4_features;
}
/**
* \brief Flush the file to disk and close it.
*
......@@ -1483,7 +1427,7 @@ public:
return; // looks like _close_internal alrady called.
// Prevent ZGY-Public from opening the file if appropriate.
this->_meta_rw->fh().set_version(_has_v4_features() ? 4 : 3);
this->_meta_rw->fh().set_version(_meta_rw->can_old_library_read() ? 3 : 4);
if (!this->errorflag()) {
this->_meta_rw->flushMeta(this->_fd);
......
......@@ -807,14 +807,6 @@ ZgyInternalBulk::readToNewBuffer(
*
* This method must be nonvirtual because it is also called from the
* constructor.
*
* The test for pre-existing low resolution bricks attempted here is
* somewhat paranoid. Checking statistics and histogram should have
* been sufficient because if called from the constructor then
* ZgyInternalMeta::initFromReopen() will have cleared the statistics
* if lowres was missing. If called from finalize() the caller knows
* whether lowres exists or not and if not should not have asked us to
* turn it on.
*/
void
ZgyInternalBulk::trackedBricksTryEnable(bool on)
......@@ -826,17 +818,9 @@ ZgyInternalBulk::trackedBricksTryEnable(bool on)
_modified_histo.reset();
}
else if (on) {
const IHistHeaderAccess& hh = _metadata_rw->hh();
const IInfoHeaderAccess& ih = _metadata_rw->ih();
const std::int64_t nlods = static_cast<std::int64_t>(ih.lodsizes().size());
const std::int64_t havelods = LookupTable::usableBrickLOD
(ih.lodsizes(), ih.brickoffsets(),
_metadata_rw->blup().lup(), _metadata_rw->blup().lupend());
const bool complete = (havelods == nlods &&
hh.samplecount() != 0 &&
hh.minvalue() <= hh.maxvalue() &&
ih.scnt() != 0);
if (complete) {
if (_metadata_rw->can_finalize_incremental()) {
const IHistHeaderAccess& hh = _metadata_rw->hh();
const IInfoHeaderAccess& ih = _metadata_rw->ih();
_modified_bricks.resize(ih.brickoffsets().back(), 0);
_modified_stats.reset(new StatisticData
(ih.scnt(), /*inf=*/0, ih.ssum(), ih.sssq(),
......
......@@ -2234,14 +2234,7 @@ ZgyInternalMeta::initFromReopen(const ZgyInternalWriterArgs& args_in, bool compr
ih.gpiline(), ih.gpxline(),
ih.gpx(), ih.gpy());
const std::int64_t nlods = static_cast<std::int64_t>(ih.lodsizes().size());
const std::int64_t havelods = LookupTable::usableBrickLOD
(ih.lodsizes(), ih.brickoffsets(),
this->_blup->lup(), this->_blup->lupend());
const bool complete = (havelods == nlods &&
this->_hh->samplecount() != 0 &&
ih.scnt() != 0);
if (!complete) {
if (!has_finalized_feature()) {
// Clear histogram, statistics, and low resolution data because
// everything will be rebuilt on finalize. The only thing kept
// is the previous histogram range. See ZgyInternalBulk ctor.
......@@ -2268,29 +2261,22 @@ ZgyInternalMeta::initFromReopen(const ZgyInternalWriterArgs& args_in, bool compr
&this->_blup->lup(), &this->_blup->lupend());
}
if (complete && compress) {
// Disallow opening a finalized file, even if uncompressed, if the
// second open specifies compression. Because we must assume that
// the application is going to finalize in this session and that
// would not be allowed. To be really pedantic I could allow the
// compressor to be set as long as the lodcompressor is not,
// but this is getting ridiculous.
throw OpenZGY::Errors::ZgyUserError
("A finalized file cannot have compressed data appended.");
}
if (complete) {
// Disallow any write of a file that has been finalized with compressed
// low resolution bricks. Because with current rules you won't be
// allowed to re-finalize it. That counts as an update. Need to catch
// this error early. If the application is allowed to start writing but
// not finalize at the end then the file would essentially be corrupted.
if (!can_append_bulk(compress)) {
// Disallow opening a finalized file if it was or will be compressed.
// Because we must assume that the application is going to finalize
// in this session and that would not be allowed, and the attempt
// would leave the file corrupted. To be really pedantic I could
// allow the compressor to be set as long as the lodcompressor is
// not, but this is getting ridiculous.
// TODO-@@@-Medium: It should still be possible to open a compressed
// file for the sole purpose of changing annotation and world coords.
// So ideally the test should be deferred to the first write. But there
// are other problems such as also deferring the re-open segment.
if (InternalZGY::LookupTable::hasBrickCompression
(this->_blup->lup(), this->_blup->lupend()))
// That would not require a finalize. So ideally the test should be
// deferred to the first write. But there are other problems such as
// also deferring the re-open segment.
if (compress)
throw OpenZGY::Errors::ZgyUserError
("A finalized file cannot have compressed data appended.");
else
throw OpenZGY::Errors::ZgyUserError
("A finalized compressed file cannot be opened for update.");
}
......@@ -2395,4 +2381,196 @@ ZgyInternalMeta::dump(std::ostream& out, const std::string& prefix)
if (_blup) _blup->dump(out, prefix + " ");
}
/*
* Caveat: Adding bloat to this class. Find a better design?
*
* The has_xxx_feature() and can_xxx() tests will respectively
* collect information about the file and inform the caller about
* what is allowed.
*
* Initially there were just "was/is compressed" and "was finalized"
* cheks but things get more complicated when a file can be finalized
* with respect to low resolution data but not statistics or vice
* versa. Or the file may be small enough to not even have low
* resolution bricks.
*
* The tests that determine the version number are similar to the tests
* that decide whether incremental builds are allowed and whether a
* compressed file may be re-opened.
*
* Why does this matter?
*
* Petrel creates an empty file that will be written to later. This
* file must not be finalized because that would prevent it from being
* later written with compressed data. And if the finalize was done
* with compression it would prevent even ubcompressed writes. If no
* compression at all is involved then a finalize at this point "only"
* a serious performance issue.
*/
/**
* \brief Statistics and histogram are good.
*
* \details
* Good statistics and histogram has both sane limits (min<max) and
* nonzero counts. Note a few implementation details: An empty
* histogram might have limits (0,0) if never updated, and it might
* have (min<max) coupled with zero count because even if the file
* didn't get finalized it might need to know what the range would
* have been.
*/
bool
ZgyInternalMeta::has_stathist_feature() const
{
return (_hh->samplecount() != 0 &&
_ih->scnt() != 0 &&
_ih->smin() <= _ih->smax());
}
/**
* \brief Entire file is just a single brick.
*
* \details
* A survey small enough to fit inside a single brick triggers several
* corner cases. Such as never having any low resolution bricls.
*/
bool
ZgyInternalMeta::has_tiny_feature() const
{
const std::int64_t nlods = static_cast<std::int64_t>(_ih->lodsizes().size());
return nlods == 1;
}
/**
* \brief Has low resolution. Always false for single-brick files.
*
* \details
* Returns true if finalize has been run and low resolution bricks
* have been stored. For tiny files where all samples fit into a
* single brick, i.e. has_tiny_feature() == true, this will return
* false because the file still won't have (and won't need) lowres.
*/
bool
ZgyInternalMeta::has_lowres_feature() const
{
const std::int64_t nlods = static_cast<std::int64_t>(_ih->lodsizes().size());
const std::int64_t havelods = LookupTable::usableBrickLOD
(_ih->lodsizes(), _ih->brickoffsets(), _blup->lup(), _blup->lupend());
return nlods == havelods && nlods > 1;
}
/**
* \brief Contains compressed bricks.
*
* \details
* TODO-@@@-Low: Performance: Might need to cache the result.
* In that case, marking the cache dirty later on is a hassle.
*/
bool
ZgyInternalMeta::has_compression_feature() const
{
return InternalZGY::LookupTable::hasBrickCompression
(_blup->lup(), _blup->lupend());
}
/**
* \brief File is fully finalized.
*
* \details
* Both low resolution bricks (if needed), statistics, and histogram
* are present. If this test fails we might as well remove all the
* results of a previous finalize.
*/
bool
ZgyInternalMeta::has_finalized_feature() const
{
return (has_stathist_feature() &&
(has_tiny_feature() || has_lowres_feature()));
}
/**
* \brief Readable by the old library.
*
* \details
* When OpenZGY saves a file that isn't finalized then it is flagged
* as v4 because the old reader cannot handle that. The same happens
* if the file contains compressed data.
*
* If histogram or statistics are missing due to the file not being
* finalized then this does not in itself prevent the old library
* from using it. It is assumed that application code will react
* properly when it sees a range with min>max or count==0.
*
* A tiny uncompressed file that was not finalized will still be
* usable by ZGY-Public because it never has lowres.
*
* The version number (3 or 4) should reflect the current state of the
* file, not its history. So e.g. a file that was v4 only due to
* missing low resolution data should be changed to v3 when finalized.
* The test thus needs to be done right before closing the file.
*/
bool
ZgyInternalMeta::can_old_library_read() const
{
return (!has_compression_feature() &&
(has_tiny_feature() || has_lowres_feature()));
}
/**
* \brief Eligible for incremental finalize.
*
* \details
* The test is similar to can_old_library_read(). If low resolution data
* is missing then not only can't the file be opened by old zgy, it can't
* be incrementally finalized either because there is no starting point.
* The test here is stricter because even if only statistics and/or
* histogram are bad then incremental finalize os still disallowed.
*
* The user can still elect to do a full finalize. In fact, that is
* currently the default.
*
* Compressed files can't be finalized more than once. Nor can
* uncompressed files if the latest reopen asked for compression.
*/
bool
ZgyInternalMeta::can_finalize_incremental() const
{
return !has_compression_feature() && has_finalized_feature();
}
/**
* \brief Allowed to append (not necessarily update) bulk data.
*
* \details
* Compressed and finalized files are not allowed to have data appended
* to them. Not because the write itself would be a problem, but because
* re-finalizing the file counts an an update not an append so that would
* not be allowed.
*
* In the special single-brick case, appending is technically allowed
* even for compressed finalized data because no low resolution bricks
* need to be updated. This is of academic interest only (or it might
* change the wording of error messages) because a compressed file
* must have at least one brick of real data. And since the entire
* file is just one brick, there are no more empty bricks. So there
* is no space to append anything anyway.
*
* If the file is currently uncompressed but this current session will
* be adding compressed data then it still needs to be considered to
* be compressed. The caller needs to provide the will_compress flag
* because ibky the accessor knows what it is planning to do.
*
* Caveat: Ideally the test should be made on the first write and not
* the file reopen, because maybe the application just wanted to update
* the metadata. Which ought to still be allowed. But that special case
* also triggers other issues. How to prevent (for cloud access) the
* last segment from being re-opened needlessly.
*/
bool
ZgyInternalMeta::can_append_bulk(bool will_compress) const
{
return ((!has_compression_feature() && !will_compress) ||
(has_tiny_feature() || !has_lowres_feature()));
}
} // namespace
......@@ -475,6 +475,22 @@ public:
return old;
}
std::int64_t flushMeta(const std::shared_ptr<FileADT>& file);
private:
// Caveat: Adding bloat to this class. Find a better design?
// Add methods to get derived information from one or more of the headers.
// Add methods that essentially forward the request to the static class
// LookupTable. Being static, all context needed by LookupTable must be
// passed in every call. And most of what it needs comes from our headers.
bool has_stathist_feature() const; // Statistics and histogram are good.
bool has_tiny_feature() const; // Entire file is just a single brick.
bool has_lowres_feature() const; // Has low resolution. False if tiny.
bool has_compression_feature() const; // Contains compressed bricks.
public:
bool has_finalized_feature() const; // Fully finalized.
bool can_old_library_read() const; // Readable by the old library.
bool can_finalize_incremental() const; // Eligible for incremental finalize.
bool can_append_bulk(bool) const; // Allowed to append bulk data.
private:
bool _logger(int priority, const std::string& ss = std::string()) const;
bool _logger(int priority, const std::ios& ss) const;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment