Commit 1bd20df8 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

More telemetry for bulk access, and TODO-notes.

parent a7fc33da
Pipeline #21911 passed with stages
in 4 minutes and 56 seconds
......@@ -70,6 +70,10 @@
* Apply padding, compression, byte swapping, and convert from
* DataBuffer to void* + size suitable for the file i/o layer.
*
* _writeOneConstantBrick
* Apply conversion of the constant value to a lookup table entry,
* Pass on the write request to the function that updates the lut.
*
* _writeWithRetry
*
* Allocate space on the bulk file as needed.
......@@ -140,6 +144,16 @@ namespace {
static int enable = Environment::getNumericEnv("OPENZGY_TIMERS", 0);
return enable > 0;
}
/**
* For testing only; might be removed. Apps should have no reason to
* reset this variable because it is unlikely that they have another
* multi-threaded loop going at the same time.
*/
static bool enable_compress_mt()
{
static int enable = Environment::getNumericEnv("OPENZGY_ENABLE_COMPRESS_MT", 1);
return enable > 0;
}
}
struct IJK
......@@ -359,6 +373,7 @@ ZgyInternalBulk::ZgyInternalBulk(
, _loggerfn(logger ? logger : LoggerBase::standardCallback(LoggerBase::getVerboseFromEnv("OPENZGY_VERBOSE"), "openzgy-bulk:", ""))
, _ptimer_st(new SummaryPrintingTimer("writeAligned[S]"))
, _ptimer_mt(new SummaryPrintingTimer("writeAligned[M]"))
, _ststimer(new SummaryPrintingTimer("scaleToStorage"))
{
}
......@@ -1584,7 +1599,7 @@ ZgyInternalBulk::_writeAlignedRegion(
std::vector<std::shared_ptr<const WriteBrickArgPack>> const_queue(worksize);
std::vector<std::shared_ptr<const WriteNowArgPack>> normal_queue(worksize);
std::atomic<int> errors(0);
#pragma omp parallel for
#pragma omp parallel for if(enable_compress_mt())
for (std::int64_t ix = 0; ix < static_cast<std::int64_t>(worksize); ++ix) {
try {
const index3_t surveypos = work[ix]; // user's start i0,j0,k0 rounded down
......@@ -1702,8 +1717,25 @@ ZgyInternalBulk::writeRegion(
_metadata->ih().datatype()))
throw OpenZGY::Errors::ZgyUserError("Invalid data type in writeRegion");
if (!is_storage)
// TODO-Performamce:
// Getting the range (for updating min/max), testing for all in-use
// samples being the same (for storing empty bricks), and downcast
// from float to storage (may skip clipping or isfinite() or both)
// are all somewhat related. The function to get the range might tell
// us whether the buffer has any NaNs or Inf or both. Depending on
// the answer the scale to storage might use short cuts. If there are
// no infinites, no nan, and range inside what the target can accept,
// the conversion might become a lot faster. Another tweak that is
// maybe less clean is to just go ahead and downcast without worrying
// about NaN/Inf and just hope they don't mess up too much.
if (!is_storage) {
// TODO-Performance: Multi-threading of _scaleDataToStorage().
// Need to figure out the cutoff where the buffer is too small and the
// overhead of OpenMP starts getting in the way.
SimpleTimer t1(*_ststimer);
data = _scaleDataToStorage(data);
}
const index3_t beg = start;
const index3_t end = beg + data->size3d();
......@@ -1758,8 +1790,14 @@ ZgyInternalBulk::writeRegion(
// This also considers padding samples. Assuming that even though
// the values are unspecified they won't be outside the value range
// of the real data plus "defaultvalue".
// TODO-Worry: Safer to only check the used part of buffer.
// Or somehow replace all padding samples with defaultvalue.
// The range is in storage values. So e.g. for int8 it will be no
// more than [-128..+127].
// TODO-Performance: Add an OpemMP loop here or in data->range()
// since the caller will always be single threaded. Ideally a new
// rangeMT() should work whether the buffer is contiguous or not.
std::pair<double, double> minmax = data->range();
if (minmax.first <= minmax.second) {
_written_sample_min = std::min(_written_sample_min, minmax.first);
......
......@@ -79,6 +79,7 @@ private:
LoggerFn _loggerfn;
std::shared_ptr<SummaryPrintingTimer> _ptimer_st;
std::shared_ptr<SummaryPrintingTimer> _ptimer_mt;
std::shared_ptr<SummaryPrintingTimer> _ststimer;
public:
ZgyInternalBulk(
......
......@@ -1054,6 +1054,10 @@ DataBufferNd<T,NDim>::s_scaleFromFloat(const DataBuffer* in,
auto dst = std::shared_ptr<dst_type>(new dst_type(src->safesize(), src->safestride()));
// TODO-Worry: Beware of non-contiguous buffers.
// It might not be a good idea to convert the padding area.
//
// TODO-Performance: If caller has already computed the value
// range, it might be able to tell us whether clipping or testing
// for infinite or both is needed.
dst_type::value_type *dst_ptr = dst->data();
const typename src_type::value_type *src_ptr = src->data();
const typename src_type::value_type *src_end = src_ptr + src->allocsize();
......
......@@ -51,7 +51,7 @@ COPY ./ ./
# Enable these to allow the "test" image to run the copytimes.sh test.
# Note that the oldtools folder has not been committed to git.
#PERFTEST# copy oldtools/ oldtools/
#PERFTEST# copy private/copytimes.sh private/copytimes.sh
#PERFTEST# copy private/copytimes*.sh private/
#PERFTEST# copy oldtools/dropcache /usr/local/bin/dropcache
#PERFTEST# RUN chmod 4555 /usr/local/bin/dropcache
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment