Commit cd8a52b0 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

Refactor a function that has grown too large.

parent 172b09c9
...@@ -157,6 +157,9 @@ public: ...@@ -157,6 +157,9 @@ public:
// Functions from FileUtilsSeismicStore // Functions from FileUtilsSeismicStore
virtual void deleteFile(const std::string& filename, bool missing_ok) const; virtual void deleteFile(const std::string& filename, bool missing_ok) const;
virtual std::string altUrl(const std::string& filename) const; virtual std::string altUrl(const std::string& filename) const;
private:
void do_write_one(const void* const data, const std::int64_t blocknum, const std::int64_t size, const bool overwrite);
void do_write_many(const void* const data, const std::int64_t blocknum, const std::int64_t size, const std::int64_t blobsize, const bool overwrite);
public: public:
// TODO-Low per-instance logging. This is tedious to implement // TODO-Low per-instance logging. This is tedious to implement
// because many of the helper classes will need to hold a logger // because many of the helper classes will need to hold a logger
...@@ -1300,6 +1303,7 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s ...@@ -1300,6 +1303,7 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
overwrite = true; overwrite = true;
this->_dataset->info()->getLocalOffset this->_dataset->info()->getLocalOffset
(offset, size, &blocknum, &local_offset, &local_size); (offset, size, &blocknum, &local_offset, &local_size);
// Normally we get here to overwrite blob 0, and that is ok.
// TODO-Low: This test will fail in the parallel upload case // TODO-Low: This test will fail in the parallel upload case
// because local_offset and local_size refers to SDAPI blocks and // because local_offset and local_size refers to SDAPI blocks and
// not the larger segments that we are asked to write. local_size // not the larger segments that we are asked to write. local_size
...@@ -1331,8 +1335,29 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s ...@@ -1331,8 +1335,29 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
const std::int64_t blobsize = const std::int64_t blobsize =
(blocknum == 0 || _config->_segsize <= 0 ||_config->_segsplit <= 1) ? (blocknum == 0 || _config->_segsize <= 0 ||_config->_segsplit <= 1) ?
size : _config->_segsize / _config->_segsplit; size : _config->_segsize / _config->_segsplit;
const int blobcount = (size + blobsize - 1) / blobsize;
if (size <= blobsize) { if (size <= blobsize) {
do_write_one(data, blocknum, size, overwrite);
}
else {
do_write_many(data, blocknum, size, blobsize, overwrite);
}
if (this->_config->_debug_trace)
this->_config->_debug_trace
(offset == current_eof ? "append" : "write",
size, size, 1, this->_dataset->info()->allSizes(-1));
}
/**
* This is the final part of xx_write, in the case where we want to
* write just a simgle SDAPI blob.
*/
void
SeismicStoreFile::do_write_one(
const void* const data,
const std::int64_t blocknum,
const std::int64_t size,
const bool overwrite)
{
this->_dataset->info()->checkOnWrite(blocknum, size); this->_dataset->info()->checkOnWrite(blocknum, size);
this->_dataset->dataset()->writeBlock this->_dataset->dataset()->writeBlock
(static_cast<int>(blocknum), // cast checked by checkOnWrite() (static_cast<int>(blocknum), // cast checked by checkOnWrite()
...@@ -1341,50 +1366,64 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s ...@@ -1341,50 +1366,64 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
overwrite); overwrite);
_wtimer->addBytesWritten(size); _wtimer->addBytesWritten(size);
this->_dataset->updateOnWrite(blocknum, size); this->_dataset->updateOnWrite(blocknum, size);
} }
else {
// Write one or more SDAPI segments of no more than blobsize bytes each. /**
// * This is the final part of xx_write, in the case where we want to
// Possible scenarios. * write two or more SDAPI blobs of no more than blobsize bytes each.
// * It should work for a single blob as well, but that would be a
// \li We never get here for block 0; than one cannot be split and it * major overkill compared to just calling do_wrire_one instead.
// would rarely make sense anyway because block 0 is usually small. *
// Calling this code anyway with blobsize == size and blobcount == 1 * Possible scenarios.
// should work but is needlessly roundabout and I have not checked *
// that it won't hit any corner cases. * \li We never get here for block 0; than one cannot be split and it
// * would rarely make sense anyway because block 0 is usually small.
// \li If size <= blobsize then we might as well use the old code, * Calling this code anyway with blobsize == size and blobcount == 1
// even if the other condtions for writing in parallel pass. * should work but is needlessly roundabout and I have not checked
// There will be just one SDAPI block and only one thread will * that it won't hit any corner cases.
// be used. The short cut means slightly less code coverage *
// when running unit tests with only small files. So, use * \li If size <= blobsize then we might as well use the old code,
// bigger ones. * even if the other condtions for writing in parallel pass.
// * There will be just one SDAPI block and only one thread will
// \li In most cases we get segsize bytes here (typically 1 GB) and * be used. The short cut means slightly less code coverage
// we are expected to write out the data in segsplit (typically 8) * when running unit tests with only small files. So, use
// identical SDAPI blocks each of size segsize/segsplit. * bigger ones.
// *
// \li If size != segsize this is the last or * \li In most cases we get segsize bytes here (typically 1 GB) and
// (if -1 <= blocknum <= segsplit) only segment with bulk data. * we are expected to write out the data in segsplit (typically 8)
// In that case we may end up writing fewer than segsplit SDAPI * identical SDAPI blocks each of size segsize/segsplit.
// blocks. Possibly just one. And the last SDAPI block can be *
// smaller than segsize/segsplit blocks. We can even have * \li If size != segsize this is the last or
// size < segsize/segsplit meaning all headers are in SDAPI * (if -1 <= blocknum <= segsplit) only segment with bulk data.
// block 0 and all data in SDAPI block 1. In the latter case * In that case we may end up writing fewer than segsplit SDAPI
// there will be no parallelization. * blocks. Possibly just one. And the last SDAPI block can be
// * smaller than segsize/segsplit blocks. We can even have
// The last two bullets seem to suggest we need to know what segsize is, * size < segsize/segsplit meaning all headers are in SDAPI
// But no, all this code needs to know is that it shall split the * block 0 and all data in SDAPI block 1. In the latter case
// incoming buffer into chunks of no more than blobsize bytes. * there will be no parallelization.
// *
// Below, only need to run consistency checks on the the first of the * The last two bullets seem to suggest we need to know what segsize is,
// segsplit SDAPI blocks written. So we end up checing block 0 (above), * But no, all this code needs to know is that it shall split the
// 1, segsplit+1, ... When checking block 1 the real segment size hasn't * incoming buffer into chunks of no more than blobsize bytes.
// been established yet so there isn't much to check. Otherwise the check */
// asserts that _dataset->info()->block1size() == blobsize. There is void
// always a check that blocknum fits in an int. To make the cast safe. SeismicStoreFile::do_write_many(
// const void* const data,
const std::int64_t blocknum,
const std::int64_t size,
const std::int64_t blobsize,
const bool overwrite)
{
// Only need to run consistency checks on the the first of the
// segsplit SDAPI blocks written. So we end up checing block
// 0, 1, segsplit+1, ... When checking block <2 the real segment
// size hasn't been established yet so there isn't much to check.
// Otherwise the check asserts that _dataset->info()->block1size()
// equals blobsize. There is always a check that blocknum fits in
// an int. To make the cast safe.
this->_dataset->info()->checkOnWrite(blocknum, std::min(size, blobsize)); this->_dataset->info()->checkOnWrite(blocknum, std::min(size, blobsize));
const int blobcount = (size + blobsize - 1) / blobsize;
for (int ii = 0; ii < blobcount; ++ii) { for (int ii = 0; ii < blobcount; ++ii) {
const int iter_blocknum = ii + static_cast<int>(blocknum); const int iter_blocknum = ii + static_cast<int>(blocknum);
const std::int64_t iter_offset = ii * blobsize; const std::int64_t iter_offset = ii * blobsize;
...@@ -1397,6 +1436,7 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s ...@@ -1397,6 +1436,7 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
_wtimer->addBytesWritten(iter_size); _wtimer->addBytesWritten(iter_size);
} }
} }
// The rest of the functions, including updateOnWrite(), are unaware // The rest of the functions, including updateOnWrite(), are unaware
// that we might have done parallel writes. Keep them in the dark. // that we might have done parallel writes. Keep them in the dark.
// Otherwise there will be errors reported about writing out of sequence. // Otherwise there will be errors reported about writing out of sequence.
...@@ -1410,11 +1450,6 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s ...@@ -1410,11 +1450,6 @@ SeismicStoreFile::xx_write(const void* data, std::int64_t offset, std::int64_t s
this->_dataset->updateOnWrite(iter_blocknum, iter_size); this->_dataset->updateOnWrite(iter_blocknum, iter_size);
} }
} }
} // end of decision to split into multiple bricks.
if (this->_config->_debug_trace)
this->_config->_debug_trace
(offset == current_eof ? "append" : "write",
size, size, 1, this->_dataset->info()->allSizes(-1));
} }
/** /**
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment