Commit b7e65108 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

Reduce I/O needed for incremental rebuilds.

parent 1bdd8e70
......@@ -300,7 +300,10 @@ GenLodImpl::call()
std::cout << "@ GenLod is running."
<< " Histogram range " << _histogram_range
<< "\n";
index3_t chunksize = this->_bricksize * std::int64_t(_incremental ? 2 : 2);
// Keep this in sync with GenLodC::_willneed(). If incremental is true
// it is not trivial to use anything other than one brick. which gets
// changed to one brick-column elsewhere.
index3_t chunksize = this->_bricksize * std::int64_t(_incremental ? 1 : 2);
this->_reporttotal(_willneed());
this->_report(nullptr);
this->_calculate(index3_t{0,0,0}, chunksize, this->_nlods-1);
......@@ -434,6 +437,7 @@ GenLodImpl::_calculate(const index3_t& readpos_in, const index3_t& readsize_in,
chunksize[2]};
// Size of the in-memory buffer of decimated data returned to the caller.
const std::array<std::int64_t,3> returnsize = (readsize + std::int64_t(1)) / std::int64_t(2);
const std::array<std::int64_t,3> returnpos = readpos / std::int64_t(2);
if (_verbose)
std::cout << "@" << _prefix(readlod)
......@@ -443,23 +447,24 @@ GenLodImpl::_calculate(const index3_t& readpos_in, const index3_t& readsize_in,
std::shared_ptr<const DataBuffer> data;
bool wasread = false;
if(readlod == 0) {
if (this->_isclean(readlod, readpos, readsize)) {
std::shared_ptr<DataBuffer> result = (readlod == this->_nlods-1) ? nullptr :
this->_read(readlod+1, returnpos, returnsize);
if (this->_verbose) {
std::cout << "@" << _prefix(readlod)
<< "calculate returns(lod="
<< readlod+1 << ", pos=" << returnpos
<< ", size=" << returnsize
<< ", data=" << result->toString() << " (SHORTCUT))\n";
}
return result;
}
else if(readlod == 0) {
// Fullres bricks are always read, not calculated.
data = this->_read(readlod, readpos, readsize);
wasread = true;
this->_accumulate(data);
}
else if (this->_isclean(readlod, readpos, readsize)) {
// Lowres bricks can sometimes be read if building incrementally.
// Note that the code always calculates full brick-columns to
// avoid r/m/w cycles. So if any of the 4 brick columns we
// depend on are dirty then all 4 needs to be read or computed.
// And the converse: If _canread() returns true then *all*
// requested data is already on the file so there is no write back.
// Just like in the LOD 0 case.
data = this->_read(readlod, readpos, readsize);
wasread = true;
}
else {
std::array<std::int64_t,3> offsets[4] =
{{ 0, 0, 0},
......@@ -481,12 +486,6 @@ GenLodImpl::_calculate(const index3_t& readpos_in, const index3_t& readsize_in,
// Caveat for multi threading: nested loops, smaller blocks.
// Caveat for replacing with a single call: more special cases to test.
// In particular handling of crops to survey size.
//
// TODO-@@@: If one of more sub-blocks have a nonzero size and are
// not flagged as dirty then we can skip reading that sub-block.
// This means we get a read/modify/write cycle on the block we are
// about to update. That needs to be handled here. Read the old
// contents and pass it down to _paste4() as a preallocated result.
for (int ii=0; ii<4; ++ii)
hires[ii] = this->_calculate(readpos*std::int64_t(2) + offsets[ii],
......@@ -878,66 +877,81 @@ GenLodC::GenLodC(
std::int64_t
GenLodC::_willneed() const
{
// meta->lodsizes() is not available, I guess I could have captutred it.
// But it is no big deal to re-compute that information.
std::int64_t total = 0; // Total number of bricks in all levels.
std::array<std::int64_t,3> bs = this->_bricksize;
std::array<std::int64_t,3> sz = this->_surveysize;
while (sz[0] > bs[0] || sz[1] > bs[1] || sz[2] > bs[2]) {
std::array<std::int64_t,3> bricks = (sz + bs - std::int64_t(1)) / bs;
total += (bricks[0] * bricks[1] * bricks[2]);
sz = (sz + std::int64_t(1)) / std::int64_t(2);
{
std::array<std::int64_t,3> bs = this->_bricksize;
std::array<std::int64_t,3> sz = this->_surveysize;
while (sz[0] > bs[0] || sz[1] > bs[1] || sz[2] > bs[2]) {
std::array<std::int64_t,3> bricks = (sz + bs - std::int64_t(1)) / bs;
total += (bricks[0] * bricks[1] * bricks[2]);
sz = (sz + std::int64_t(1)) / std::int64_t(2);
}
++total; // Loop stopped short of the last level, by definition one brick.
}
++total; // Loop stopped short of the last level, by definition one brick.
const std::int64_t full_total = total;
// TODO-@@@ This is WAY too complicated and fragile. Consider some
// kind of dry run instead. Or just accept a less accurate progress bar.
// On the positice side this is a good test to see that incremental
// rebuilds touch the data they ought to. Even though only the final
// count is checked.
// Subtract the bricks skipped because of incremental build.
// If full build then _canread() returns false for all lod>0
// and the code below, even without a short cut, would do nothing.
if (!_incremental)
return total;
sz = this->_surveysize;
std::array<std::int64_t,3> max = sz; // Limit imposed by survey size @ lod-1
for (std::int32_t lod = 1; lod < _nlods; ++lod) {
max = sz; // size at (lod-1)
sz = (sz + std::int64_t(1)) / std::int64_t(2);
const index3_t chosensize{2*bs[0], 2*bs[1], sz[2]}; // before clipping.
for (std::int64_t ii = 0; ii < sz[0]; ii += chosensize[0]) {
for (std::int64_t jj = 0; jj < sz[1]; jj += chosensize[1]) {
// Consider 4 brick-columns at (lod), i.e. what _calculate chooses.
// If any of those need to be rebuilt then rebuild all, meaning
// we need to read 16 brick-columns from lod-1.
const index3_t checkpos{ii,jj,0};
const index3_t checksize = _clipsizetosurvey(lod, checkpos, chosensize);
if (_isclean(lod, checkpos, checksize)) {
const index3_t subsize = _clipsizetosurvey
(lod-1, checkpos + checkpos, checksize + checksize);
const std::int64_t ibricks = (subsize[0] + bs[0] - 1) / bs[0];
const std::int64_t jbricks = (subsize[1] + bs[1] - 1) / bs[1];
const std::int64_t kbricks = (subsize[2] + bs[2] - 1) / bs[2];
total -= (ibricks * jbricks * kbricks);
if (lod == _nlods - 1)
total -= 1; // final brick won't be read either.
if (this->_verbose) {
std::cout << "@ lod " << lod << " pos " << checkpos
<< " is clean, saving "
<< ibricks << "*" << jbricks << "*" << kbricks
<< " bricks: "
<< " do not read lod " << lod-1
<< " pos (" << checkpos[0]*2
<< ", " << checkpos[1]*2
<< ", " << checkpos[2]*2
<< ") size (" << ibricks * bs[0]
<< ", " << jbricks * bs[1]
<< ", " << kbricks * bs[2]
<< ")\n";
// The incremental case is trickier.
// The code below might have worked for full rebuilds as well,
// but it currently assumes chunksize is one brick-column.
const std::int64_t full_total = total;
total = 0; // Start over. full_total is just for logging.
const auto countbricks = [this](const index3_t& size) {
return
((size[0] + this->_bricksize[0] - 1) / this->_bricksize[0]) *
((size[1] + this->_bricksize[1] - 1) / this->_bricksize[1]) *
((size[2] + this->_bricksize[2] - 1) / this->_bricksize[2]);
};
// How much data to be processed in each chunk, in samples?
// Needs to match chunksize in GenLodImpl::call(), and will
// currently ONLY works as shown, chunksize = one brick-column.
// Other chunk sizes will need non trivial changes below.
std::array<std::int64_t,3> chunksize
{this->_bricksize[0],
this->_bricksize[1],
this->_surveysize[2]};
// How much decimated data does this correspond to, in samples?
const index3_t cshalf
{(chunksize[0]+1)/2,
(chunksize[1]+1)/2,
(chunksize[2]+1)/2};
// Number of bricks to read or write to get these samples?
// Note that cshalf might need less than a brick, it still needs to read all.
for (std::int32_t lod = 0; lod < _nlods; ++lod) {
const std::int64_t lodfactor = std::int64_t(1) << lod;
const index3_t sz = (this->_surveysize + (lodfactor - 1)) / lodfactor;
for (std::int64_t ii = 0; ii < sz[0]; ii += chunksize[0]) {
for (std::int64_t jj = 0; jj < sz[1]; jj += chunksize[1]) {
for (std::int64_t kk = 0; kk < sz[2]; kk += chunksize[2]) {
const index3_t pos{ii,jj,kk};
const index3_t poshalf{ii/2, jj/2, kk/2}; // in lod+1
const index3_t csclip =_clipsizetosurvey(lod, pos, chunksize);
const index3_t cshalfclip =
_clipsizetosurvey(lod+1, poshalf, cshalf);
if (!_isclean(lod, pos, csclip)) {
total += countbricks(csclip);
}
else if (lod < (_nlods-1) && !_isclean(lod+1, poshalf, cshalfclip)) {
total += countbricks(cshalfclip);
// READ THIS if you want to change chunksize.
// The region checked using _isclean() should have been the
// chunksize- sized region in lod+1 that is built from, in
// part, the chunk being handled here. Because the code needs
// to know whether that lod+1 region needs to be pocessed.
// With chunksize exactly one brick-column the test on
// poshalf, cshalfclip is equivalent since the dirty bricks
// are tracked per brick so the code will be checking what
// it needs to. If chunksize is larger then need to figure
// out the start and size of that lod+1 brick. Remember
// to take into account rounding up to align with bricksize,
// clipping to survey size, rounding start down to chunksize,
// and possibly something else. And possibly in a different
// order. Possibly if academic interest only because the
// chunk size of one brick-column might well be optimal
// for incremental builds.
}
}
}
......@@ -945,12 +959,8 @@ GenLodC::_willneed() const
}
if (this->_verbose) {
if (full_total == total)
std::cout << "@ Full lowres build needs " << total
<< " bricks" << std::endl;
else
std::cout << "@ Incr lowres build needs " << total
<< " of " << full_total << " bricks" << std::endl;
std::cout << "@ Incremental lowres build needs " << total
<< " of " << full_total << " bricks" << std::endl;
}
return total;
......
......@@ -432,7 +432,7 @@ do_test_reopen(const std::string& filename, TestTwiceFlags flags)
!flagset(TestTwiceFlags::step1_finalize) ? 88 :
!any_step2_write ? 0 :
!flagset(TestTwiceFlags::step2_fin_incr) ? 88 :
any_step2_write ? 46 :
any_step2_write ? -1 : // unknown
0;
ZgyWriterArgs firstargs = ZgyWriterArgs()
......@@ -622,7 +622,7 @@ do_test_reopen(const std::string& filename, TestTwiceFlags flags)
ok = TEST_EQUAL_FLOAT(actual_stat.ssq, expect_stat_ssq, 0.1) && ok;
ok = TEST_EQUAL(actual_stat.min, expect_stat_min) && ok;
ok = TEST_EQUAL(actual_stat.max, expect_stat_max) && ok;
ok = TEST_EQUAL(p.total(), expect_brickrw) && ok;
ok = (expect_brickrw < 0 || TEST_EQUAL(p.total(), expect_brickrw)) && ok;
ok = TEST_EQUAL(p.done(), p.total()) && ok;
// Not testing histogram min/max because it is rather unclear
......@@ -1394,7 +1394,7 @@ test_reopen_track_changes()
{OpenZGY::DecimationType::Average},
std::ref(p), FinalizeAction::BuildIncremental);
writer->close();
TEST_EQUAL(p.total(), 322); // difficult to verify by hand.
TEST_EQUAL(p.total(), 97); // difficult to verify by hand.
TEST_EQUAL(p.done(), p.total());
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment