Commit cd8a52b0 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

Refactor a function that has grown too large.

parent 172b09c9
......@@ -157,6 +157,9 @@ public:
// Functions from FileUtilsSeismicStore
virtual void deleteFile(const std::string& filename, bool missing_ok) const;
virtual std::string altUrl(const std::string& filename) const;
private:
void do_write_one(const void* const data, const std::int64_t blocknum, const std::int64_t size, const bool overwrite);
void do_write_many(const void* const data, const std::int64_t blocknum, const std::int64_t size, const std::int64_t blobsize, const bool overwrite);
public:
// TODO-Low per-instance logging. This is tedious to implement
// because many of the helper classes will need to hold a logger
......@@ -1300,6 +1303,7 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
overwrite = true;
this->_dataset->info()->getLocalOffset
(offset, size, &blocknum, &local_offset, &local_size);
// Normally we get here to overwrite blob 0, and that is ok.
// TODO-Low: This test will fail in the parallel upload case
// because local_offset and local_size refers to SDAPI blocks and
// not the larger segments that we are asked to write. local_size
......@@ -1331,8 +1335,29 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
const std::int64_t blobsize =
(blocknum == 0 || _config->_segsize <= 0 ||_config->_segsplit <= 1) ?
size : _config->_segsize / _config->_segsplit;
const int blobcount = (size + blobsize - 1) / blobsize;
if (size <= blobsize) {
do_write_one(data, blocknum, size, overwrite);
}
else {
do_write_many(data, blocknum, size, blobsize, overwrite);
}
if (this->_config->_debug_trace)
this->_config->_debug_trace
(offset == current_eof ? "append" : "write",
size, size, 1, this->_dataset->info()->allSizes(-1));
}
/**
* This is the final part of xx_write, in the case where we want to
* write just a simgle SDAPI blob.
*/
void
SeismicStoreFile::do_write_one(
const void* const data,
const std::int64_t blocknum,
const std::int64_t size,
const bool overwrite)
{
this->_dataset->info()->checkOnWrite(blocknum, size);
this->_dataset->dataset()->writeBlock
(static_cast<int>(blocknum), // cast checked by checkOnWrite()
......@@ -1341,50 +1366,64 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
overwrite);
_wtimer->addBytesWritten(size);
this->_dataset->updateOnWrite(blocknum, size);
}
else {
// Write one or more SDAPI segments of no more than blobsize bytes each.
//
// Possible scenarios.
//
// \li We never get here for block 0; than one cannot be split and it
// would rarely make sense anyway because block 0 is usually small.
// Calling this code anyway with blobsize == size and blobcount == 1
// should work but is needlessly roundabout and I have not checked
// that it won't hit any corner cases.
//
// \li If size <= blobsize then we might as well use the old code,
// even if the other condtions for writing in parallel pass.
// There will be just one SDAPI block and only one thread will
// be used. The short cut means slightly less code coverage
// when running unit tests with only small files. So, use
// bigger ones.
//
// \li In most cases we get segsize bytes here (typically 1 GB) and
// we are expected to write out the data in segsplit (typically 8)
// identical SDAPI blocks each of size segsize/segsplit.
//
// \li If size != segsize this is the last or
// (if -1 <= blocknum <= segsplit) only segment with bulk data.
// In that case we may end up writing fewer than segsplit SDAPI
// blocks. Possibly just one. And the last SDAPI block can be
// smaller than segsize/segsplit blocks. We can even have
// size < segsize/segsplit meaning all headers are in SDAPI
// block 0 and all data in SDAPI block 1. In the latter case
// there will be no parallelization.
//
// The last two bullets seem to suggest we need to know what segsize is,
// But no, all this code needs to know is that it shall split the
// incoming buffer into chunks of no more than blobsize bytes.
//
// Below, only need to run consistency checks on the the first of the
// segsplit SDAPI blocks written. So we end up checing block 0 (above),
// 1, segsplit+1, ... When checking block 1 the real segment size hasn't
// been established yet so there isn't much to check. Otherwise the check
// asserts that _dataset->info()->block1size() == blobsize. There is
// always a check that blocknum fits in an int. To make the cast safe.
//
}
/**
* This is the final part of xx_write, in the case where we want to
* write two or more SDAPI blobs of no more than blobsize bytes each.
* It should work for a single blob as well, but that would be a
* major overkill compared to just calling do_wrire_one instead.
*
* Possible scenarios.
*
* \li We never get here for block 0; than one cannot be split and it
* would rarely make sense anyway because block 0 is usually small.
* Calling this code anyway with blobsize == size and blobcount == 1
* should work but is needlessly roundabout and I have not checked
* that it won't hit any corner cases.
*
* \li If size <= blobsize then we might as well use the old code,
* even if the other condtions for writing in parallel pass.
* There will be just one SDAPI block and only one thread will
* be used. The short cut means slightly less code coverage
* when running unit tests with only small files. So, use
* bigger ones.
*
* \li In most cases we get segsize bytes here (typically 1 GB) and
* we are expected to write out the data in segsplit (typically 8)
* identical SDAPI blocks each of size segsize/segsplit.
*
* \li If size != segsize this is the last or
* (if -1 <= blocknum <= segsplit) only segment with bulk data.
* In that case we may end up writing fewer than segsplit SDAPI
* blocks. Possibly just one. And the last SDAPI block can be
* smaller than segsize/segsplit blocks. We can even have
* size < segsize/segsplit meaning all headers are in SDAPI
* block 0 and all data in SDAPI block 1. In the latter case
* there will be no parallelization.
*
* The last two bullets seem to suggest we need to know what segsize is,
* But no, all this code needs to know is that it shall split the
* incoming buffer into chunks of no more than blobsize bytes.
*/
void
SeismicStoreFile::do_write_many(
const void* const data,
const std::int64_t blocknum,
const std::int64_t size,
const std::int64_t blobsize,
const bool overwrite)
{
// Only need to run consistency checks on the the first of the
// segsplit SDAPI blocks written. So we end up checing block
// 0, 1, segsplit+1, ... When checking block <2 the real segment
// size hasn't been established yet so there isn't much to check.
// Otherwise the check asserts that _dataset->info()->block1size()
// equals blobsize. There is always a check that blocknum fits in
// an int. To make the cast safe.
this->_dataset->info()->checkOnWrite(blocknum, std::min(size, blobsize));
const int blobcount = (size + blobsize - 1) / blobsize;
for (int ii = 0; ii < blobcount; ++ii) {
const int iter_blocknum = ii + static_cast<int>(blocknum);
const std::int64_t iter_offset = ii * blobsize;
......@@ -1397,6 +1436,7 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
_wtimer->addBytesWritten(iter_size);
}
}
// The rest of the functions, including updateOnWrite(), are unaware
// that we might have done parallel writes. Keep them in the dark.
// Otherwise there will be errors reported about writing out of sequence.
......@@ -1410,11 +1450,6 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
this->_dataset->updateOnWrite(iter_blocknum, iter_size);
}
}
} // end of decision to split into multiple bricks.
if (this->_config->_debug_trace)
this->_config->_debug_trace
(offset == current_eof ? "append" : "write",
size, size, 1, this->_dataset->info()->allSizes(-1));
}
/**
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment