genlod.cpp 31 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// Copyright 2017-2020, Schlumberger
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "genlod.h"
#include "enum.h"
#include "types.h"
#include "lodalgo.h"
#include "histogramdata.h"
#include "histogrambuilder.h"
#include "statisticdata.h"
#include "databuffer.h"
#include "arrayops.h"
#include "meta.h"
#include "bulk.h"
#include "../exception.h"

#include <memory>
#include <cstdint>
#include <array>
#include <vector>
#include <functional>
#include <iostream>
#include <tuple>

namespace InternalZGY {
#if 0
}
#endif

using namespace ArrayOps;
using namespace Formatters;

//# TODO-Low: Several tweaks are possible but might not offer
//# much value. See a detailed discussion in doc/lowres.html.

//# Testing notes:
//#   test.black.checkLodContents(), checkStatistics(), checkHistogram()
//#   is part of a reasonably complete end to end test checking this module.
//#   A new unit test testFinalizeProgress() has been written to verify the
//#   progress mechanism. Both that the done and total end up precisely
//#   identical at the end and that the client can abort the calculation.

/**
 * Abstract class for generating low resolution bricks, histogram,
 * and statistics. At this level only define virtual methods for I/O.
 * The implementation can be used as-is when mocking the class.
 * The optional nlods parameter is only used as a consistency check.
 *
 * Note that the WeightedAverage algorithm requires a histogram to work.
 * If no histogram was provided then the current contents of the
 * accumulated histogram will be used. This is unfortunate and might
 * cause brick artifacts. Especially in the first few bricks that are
 * generated. With a non-recursive algorithm (plan C) and with only
 * lod2 and above uses weighted average then this is unproblematic.
 * Because in that case we will be done with the histogram once we
 * need it. TODO-Low consider doing an initial pass with a statistical
 * sampling of the lod0 data, only for use with weighted average.
 * There will be a minor issue with some values appearing to have zero
 * frequency, but this should not cause any trouble. (assume "1").
 *
 * Note that the WeightedAverage and AverageNon0 algorithms expect a
 * defaultstorage to use when all inputs are inf/nan or (for AverageNon0)
 * zero. Only relevant for integral types, to ensure that the default
 * is whatever will produce the value closest to 0 after conversion.
 * And integral data can neither be inf nor nan, so this is a pretty
 * academic issue. For AverageNon0 that algorithm is currently not
 * used. So it isn't even clear what the desired behavior should be.
 */
GenLodBase::GenLodBase(
    const std::array<std::int64_t,3>& size,
    const std::array<std::int64_t,3>& bricksize,
    RawDataType dtype,
    const std::array<double,2>& histogram_range,
    std::int32_t nlods_in,
    const std::vector<LodAlgorithm>& decimation,
    const std::shared_ptr<HistogramData>& histogram,
    double defaultstorage,
    const std::function<bool(std::int64_t,std::int64_t)>& progress,
    bool verbose)
  : _nlods(0)
  , _total(0)
  , _done(0)
  , _surveysize(size)
  , _bricksize(bricksize)
  , _dtype(dtype)
  , _histogram_range(histogram_range) // default (-1,1)
  , _decimation(decimation.empty() ? std::vector<LodAlgorithm>{
      LodAlgorithm::LowPass, LodAlgorithm::WeightedAverage} : decimation)
  , _wa_histogram(histogram) // Might point to self._histo later.
  , _wa_defaultstorage(defaultstorage)
  , _progress(progress)
  , _verbose(verbose)
{
  // Re-compute this. Should already be known unless running a unit test.
  std::int32_t nlods = 1; // The loop stops before counting final level
  std::int64_t total = 1; // Total number of bricks in all levels.
  std::array<std::int64_t,3> bs = bricksize;
  std::array<std::int64_t,3> sz = size;
  while (sz[0]>bs[0] || sz[1] > bs[1] || sz[2] > bs[2]) {
    std::array<std::int64_t,3> bricks = (sz + bs - std::int64_t(1)) / bs;
    nlods += 1;
    total += (bricks[0] * bricks[1] * bricks[2]);
    sz = (sz + std::int64_t(1)) / std::int64_t(2);
  }
  if (nlods_in > 0 && nlods_in != nlods)
    throw OpenZGY::Errors::ZgyInternalError("GenLod error in nlods computation");
  this->_nlods = nlods;
  this->_total = total;
  this->_report(nullptr);
}

/**
 * Invoke the user's progress callback if any.
 * Keep track of how many bricks we have processed. Both reads and
 * writes increment the same counter. For plan C the reads will cover
 * all blocks in lod 0 and the writes will cover all blocks in lod > 0.
 * For plan D all blocks are written which means the computation of
 * _total done in __init__ might need to change.
 */
void
GenLodBase::_report(const DataBuffer* data)
{
  if (data) {
    std::array<std::int64_t,3> bricks =
      (data->size3d() + this->_bricksize - std::int64_t(1)) / this->_bricksize;
    this->_done += (bricks[0] * bricks[1] * bricks[2]);
  }
  if (this->_progress && !this->_progress(_done, _total))
    throw OpenZGY::Errors::ZgyAborted("Computation of low resolution data was aborted");
}

/**
 * This is a stub that must be redefined except for low level unit tests.
 * Read a block from the ZGY file (plans B and C) or the application
 * (plan D). The lod parameter will always be 0 for plans C and D.
 * Returns a ScalarBuffer if all constant, else a 3D numpy array.
 */
std::shared_ptr<DataBuffer>
GenLodBase::_read(
    std::int32_t lod,
    const std::array<std::int64_t,3>& pos,
    const std::array<std::int64_t,3>& size)
{
155
156
  std::shared_ptr<DataBuffer> result = DataBuffer::makeScalarBuffer3d
    (_wa_defaultstorage, size, _dtype);
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
  _report(result.get());
  return result;
}

/**
 * This is a stub that must be redefined except for low level unit tests.
 * Write a block to the ZGY file. Silently ignore writes of data that
 * is known to have been read directly from the file. For plans B and C
 * this means ignoring all writes to lod 0.
 */
void
GenLodBase::_write(
    std::int32_t lod,
    const std::array<std::int64_t,3>& pos,
    const std::shared_ptr<const DataBuffer>& data)
{
  this->_report(data.get());
}

/**
 * This is a stub that must be redefined except for low level unit tests.
 * Finalize and write the computed statistics and histogram to the file.
 */
void
GenLodBase::_savestats()
{
}

/**
 * For debugging and logging only.
 */
std::string
GenLodBase::_prefix(std::int32_t lod)
{
  return std::string(2 * (this->_nlods - 1 - lod), ' ');
}

/**
 * For debugging and logging only.
 */
std::string
GenLodBase::_format_result(const std::shared_ptr<DataBuffer>& data)
{
  return data ? data->toString() : "(null)";
}

/**
 * Abstract class for generating low resolution bricks, histogram,
 * and statistics. The inherited methods for I/O are still stubs.
 * See doc/lowres.html for details. This class implements plan C or D
 * which is good for compressed data and acceptable for uncompressed.
 * The ordering of low resolution bricks in the file will not be optimal.
 * For optimal ordering but working only for uncompressed data consider
 * implementing plan B in addition to the plan C already implemented.
 * The implementation can be used as-is in a unit test with mocked I/O.
 */
GenLodImpl::GenLodImpl(
    const std::array<std::int64_t,3>& size,
    const std::array<std::int64_t,3>& bricksize,
    RawDataType dtype,
    const std::array<double,2>& histogram_range,
    std::int32_t nlods_in,
    const std::vector<LodAlgorithm>& decimation,
    const std::shared_ptr<HistogramData>& histogram,
    double defaultstorage,
    const std::function<bool(std::int64_t,std::int64_t)>& progress,
    bool verbose)
  : GenLodBase(size, bricksize, dtype, histogram_range, nlods_in, decimation,
               histogram, defaultstorage, progress, verbose)
  , _stats(std::make_shared<StatisticData>())
  , _histo(std::make_shared<HistogramData>(256, histogram_range[0], histogram_range[1]))
{
  // See base constructor. Better than no histogram at all.
  // The WeightedAverage algorithm will now rely on the statistics
  // calculated so far. The exact result thus depends on the order
  // the data bricks is written in.
  if (!this->_wa_histogram || this->_wa_histogram->getcount() == 0)
    this->_wa_histogram = this->_histo;
}

/**
 * Generate and store statistics, histogram, and all low resolution
 * bricks. Works for plans C and D. If we also need an implementation
 * of plan B then this method wold need to iterate over all bricks
 * and lods, and _calculate would not make any recursive calls.
 *
 * TODO-Performance: If the bulk layer is made thread safe for writing
 * it is possible to do parallel processing at this high level.
 * E.g. do this for just one level: Split into 4 sub-tasks that
 * each execute in one thread. Special handling will be needed
 * for the simgle highest-level brick. All 4 threads need to be
 * joined before that one can be done. With one level here and with
 * 4 threads used in _calculate() this means we will be reading with
 * 16 threads. But there are serious caveats:
 *
 * \li Using 4 threads here means there is just 1/4th of the memory
 *     available to each thread, which means requests may become smaller
 *     which means we might not see any benefit at all.
 *
 * \li The added complexity both here and in making the bulk write
 *     thread safe might not be worth the trouble.
 */
std::tuple<std::shared_ptr<StatisticData>, std::shared_ptr<HistogramData>>
GenLodImpl::call()
{
  if (this->_verbose)
    std::cout << "@ GenLod is running."
              << " Histogram range " << _histogram_range
              << "\n";
  this->_calculate(std::array<std::int64_t,3>{0,0,0}, this->_nlods-1);
  this->_savestats(); // Remove?
  return std::make_tuple(this->_stats, this->_histo);
}

template <typename T>
void
GenLodImpl::_accumulateT(const std::shared_ptr<const DataBuffer>& data_in)
{
  std::shared_ptr<const DataBufferNd<T,3>> data =
    std::dynamic_pointer_cast<const DataBufferNd<T,3>>(data_in);
  if (!data)
    return;
  // Note that we never allow the histogram to grow dynamically;
  // that was a problematic feature of the old accessor.
  HistogramBuilder hb(256, _histogram_range[0], _histogram_range[1]);
  std::int64_t len = data->size3d()[0] * data->size3d()[1] * data->size3d()[2];
  if (data->isScalar()) {
    hb.add(data->data(), data->data() + 1);
    hb *= len;
  }
  else {
    hb.add(data->data(), data->data() + len);
  }
  *this->_stats += hb.getstats();
  *this->_histo += hb.gethisto();
}

/**
 * Keep a running tally of statistics and histogram.
 */
void
GenLodImpl::_accumulate(const std::shared_ptr<const DataBuffer>& data)
{
  if (!data)
    return;
  switch (data->datatype()) {
  case RawDataType::SignedInt8:    _accumulateT<std::int8_t>(data); break;
  case RawDataType::UnsignedInt8:  _accumulateT<std::uint8_t>(data); break;
  case RawDataType::SignedInt16:   _accumulateT<std::int16_t>(data); break;
  case RawDataType::UnsignedInt16: _accumulateT<std::uint16_t>(data); break;
  case RawDataType::SignedInt32:   _accumulateT<std::int32_t>(data); break;
  case RawDataType::UnsignedInt32: _accumulateT<std::uint32_t>(data); break;
  case RawDataType::Float32:       _accumulateT<float>(data); break;
  case RawDataType::IbmFloat32:
  default: throw OpenZGY::Errors::ZgyInternalError("Unrecognized type enum");
  }
}

/**
 * Read data from the specified (readpos, readlod) and store it back.
 * The function will itself decide how much to read. But with several
 * constraints. Always read full traces. Size in i and j needs to be
 * 2* bs * 2^N  where bs is the file's brick size in that dimension,
 * Clipped to the survey boundaries. This might give an empty result.
 *
 * TODO-Performance: Allow the application to configure how much memory
 * we are allowed to use. Increase the block size accordingly.
 * Larger bricks might help the bulk layer to become more efficient.
 *
 * When readlod is 0 and the data was read from the ZGY file then the
 * writing part is skipped. Since the data is obviously there already.
 *
 * In addition to reading and writing at the readlod level, the
 * method will compute a single decimated buffer at readlod+1 and
 * return it. As with the read/write the buffer might be smaller at
 * the survey edge. Note that the caller is responsible for storing
 * the decimated data.
 *
 * Full resolution data (lod 0) will be read from file (plan C) or the
 * application (plan D). Low resolution is computed by a recursive call
 * to this function (plans C and D) or by reading the file (plan B).
 * Note that currently only plan C is fully implemented.
 *
 * For plans B and C a single call needs to be made to read the brick
 * (there is by definition just one) at the highest level of detail.
 * This will end up computing all possible low resolution bricks and
 * storing them. For plan B the caller must iterate.
 *
 * The function is also responsible for collecting statisics and
 * histogram data. Note that some of the decimation algorithms use the
 * histogram of the entire file. Ideally the histogram of the entire
 * file should be available before decimation starts but that is
 * impractical. At least make sure the histogram updfate is done early
 * enough and the decimation late enough that the chunk of data being
 * decimated has already been added to the histogram.
 */
std::shared_ptr<DataBuffer>
GenLodImpl::_calculate(const std::array<std::int64_t,3>& readpos_in, std::int32_t readlod)
{
  const std::int64_t lodfactor = std::int64_t(1) << readlod;
  const std::array<std::int64_t,3> surveysize =
    (this->_surveysize + (lodfactor - 1)) / lodfactor;
  const std::array<std::int64_t,3> readpos{readpos_in[0], readpos_in[1], 0};
  if (readpos[0] >= surveysize[0] || readpos[1] >= surveysize[1])
    return nullptr;
  const std::array<std::int64_t,3> readsize
    {std::min(2 * this->_bricksize[0], (surveysize[0] - readpos[0])),
     std::min(2 * this->_bricksize[1], (surveysize[1] - readpos[1])),
     surveysize[2]}; // always read full traces.
  const std::array<std::int64_t,3> writesize = (readsize + std::int64_t(1)) / std::int64_t(2);

  if (_verbose)
    std::cout << "@" << _prefix(readlod)
              << "calculate(lod=" << readlod
              << ", pos=" << readpos
              << ", size=" << readsize << ")\n";

  std::shared_ptr<const DataBuffer> data;
  if(readlod == 0) {
    data = this->_read(readlod,readpos,readsize);
    this->_accumulate(data);
  }
  else {
    const std::array<std::int64_t,3> bs = this->_bricksize;
    std::array<std::int64_t,3> offsets[4] =
      {{    0,     0,   0},
       {    0, bs[1],   0},
       {bs[0],     0,   0},
       {bs[0], bs[1],   0}};
    std::shared_ptr<DataBuffer> hires[4]{nullptr, nullptr, nullptr, nullptr};

    // TODO-Performance: If algorithm "C" and readlod==1 it should be
    // fairly safe to parallelize this loop, using 4 threads for read.
    // Because _write() does nothing at readlod==0. Technically we
    // could also have consolidated the 4 lod0 read requests but that
    // would mean quite a bit of refactoring.
    // Worry: The assumption that _write is a no-op is important,
    // so it probably needs to be checked to avoid paralell writes.

    // Alternatively, and I suspect this will be more difficult, the code
    // might be refactored so the bulk layer sees just a single request
    // that is 4x larger. The bulk layer can then do multi threading itself.
    // The reason this is better than the first approach is that the bulk
    // layer in the cloud case might be able to consolidate more bricks.
    // On the flip size, while I plan to implement the cloud access case
    // I might not actually be doing the split and parallelize.

    // Or, can I just check if readlod==1 make just one call and skip paste?
    // There are probably some more survey-edge cases I need to handle then.
    // Also, carefully analyze how this affects total memory usage.

    for (int ii=0; ii<4; ++ii)
      hires[ii] = this->_calculate(readpos*std::int64_t(2) + offsets[ii]*std::int64_t(2), readlod-1);
    data = this->_paste4(hires[0], hires[1], hires[2], hires[3]);
  }

  // TODO-Performance: If parallelizing above, needs to have a test
  // if (plan != "C" || readlod > 0)
  this->_write(readlod, readpos, data);

  std::shared_ptr<DataBuffer> result;
  if (readlod == this->_nlods - 1) {
    result = nullptr; // Caller will discard it anyway.
    if (this->_done != this->_total)
      throw OpenZGY::Errors::ZgyInternalError("GenLodImpl: Missing or excess data.");
  }
  else if (data->isScalar()) {
424
425
    result = DataBuffer::makeScalarBuffer3d
      (data->scalarAsDouble(), writesize, data->datatype());
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
  }
  else {
    // TODO-Performance: Enable parallelization (in genlod.cpp) of this call.
    // Note that for readlod==0 we might already be running in one of four
    // OpenMP threads. It is not clear whether to force OpenMP to also
    // parallelize this low level loop. // omp_set_nested(1) or better, use
    // omp_set_max_active_levels(...).
    result = this->_decimate(data, readlod+1);
    if (result->size3d() != writesize)
      throw OpenZGY::Errors::ZgyInternalError("GenLodImpl: Wrong writesize.");
  }
  if (this->_verbose)
    std::cout << "@" << _prefix(readlod)
              << "calculate returns(lod="
              << readlod+1 << ", pos=" << readpos
              << ", data=" << data->toString() << ")\n";
  return result;
}

/**
 * Return a decimated version of the input buffer with half the size
 * (rounded up) in each dimension. In total the result will be ~1/8
 * the size of the input.
 *
 * Lod refers to the level being generated. Must be >= 1.
 */
std::shared_ptr<DataBuffer>
GenLodImpl::_decimate(const std::shared_ptr<const DataBuffer>& data, std::int64_t lod)
{
  if (!data)
    return nullptr;
  if (lod < 1)
    throw OpenZGY::Errors::ZgyInternalError("GenLodImpl::_decimate must have lod >= 1");

  const std::array<std::int64_t,3> outsize =
    (data->size3d() + std::int64_t(1)) / std::int64_t(2);
  if (data->isScalar()) {
    double value = data->scalarAsDouble();
464
    return DataBuffer::makeScalarBuffer3d(value, outsize, data->datatype());
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
  }
  else {
    std::shared_ptr<DataBuffer> result =
      DataBuffer::makeDataBuffer3d(nullptr, 0, outsize, data->datatype());
    // Constructor guaranteed !_decimation.empty() but might not be big enough.
    const LodAlgorithm algorithm =
      (lod <= (int)_decimation.size()) ? _decimation[lod-1] : _decimation.back();

    // The 4 last arguments are only used for vvArrayBasic::lod_WeightedAverage.
    // Note that the value range for the histogram should at this
    // point refer to storage and not converted values. For integral
    // types the value range will typically be the natural range of
    // int8_t or int16_t.
    // See BrickedAccessor.cpp in old code
    // TODO-Medium missing _wa_defaultstorage.
    createLod(result, data, algorithm,
              _wa_histogram->getbins(),
              _wa_histogram->getsize(),
              _wa_histogram->getmin(),
              _wa_histogram->getmax());
    return result;
  }
}

/**
 * See _paste4() for details.
 */
std::shared_ptr<DataBuffer>
GenLodImpl::_paste1(
    const std::shared_ptr<DataBuffer>& result,
    const std::shared_ptr<const DataBuffer>& more,
    std::int64_t ioff, std::int64_t joff)
{
  if (more) {
    const index3_t dstorig{0,0,0};
    const index3_t srcorig{ioff, joff, 0};
    result->copyFrom(more.get(), srcorig.data(), dstorig.data(), 0, 0);
  }
  return result;
}

/**
 * Combine 4 buffers into one. Input buffers may be None (do not
 * paste) or ScalarBuffer (paste a constant value). If all not-None
 * buffers are just scalars then the return from this function
 * will also be a scalar. d01 adds more data in the J direction,
 * so it starts at i=0, j>0 in the target. Similarly d10 adds
 * more in the J direction. And d11 in the diagonal.
 *
 * Performance note: It is in theory possible to avoid some buffer
 * copies, this one in particular, by passing our 4 or 8 buffers
 * to the decimation algorithm instead of a combined buffer.
 * The algorithms remain just as efficient. BUT if sizes can vary
 * or bricks can be missing or number of bricks differs in level 1
 * because we read directly from the file then things can get really
 * complicated really fast.
 */
std::shared_ptr<const DataBuffer>
GenLodImpl::_paste4(
    const std::shared_ptr<const DataBuffer>&d00,
    const std::shared_ptr<const DataBuffer>&d01,
    const std::shared_ptr<const DataBuffer>&d10,
    const std::shared_ptr<const DataBuffer>&d11)
{
  if (!d01 && !d10 && !d11)
    return d00; // Nothing to paste. Also works for empty or scalar.

  // Empty d00 + non-empty others is not good.
  if (!d00)
    throw OpenZGY::Errors::ZgyInternalError("GenLodImpl::_paste4() assert#1.");
  if (d01 && d01->size3d()[0] != d00->size3d()[0])
    throw OpenZGY::Errors::ZgyInternalError("GenLodImpl::_paste4() assert#2.");
  if (d10 && d10->size3d()[1] != d00->size3d()[1])
    throw OpenZGY::Errors::ZgyInternalError("GenLodImpl::_paste4() assert#3.");
  if (d01 && d10) {
    // The "diagonal" brick must exist with the right size.
    if (!d11 ||
        d11->size3d()[1] != d01->size3d()[1] ||
        d11->size3d()[0] != d10->size3d()[0])
      throw OpenZGY::Errors::ZgyInternalError("GenLodImpl::_paste4() assert#4.");
  }
  else {
    // The "diagonal" brick should not be needed in this case.
    if (d11)
      throw OpenZGY::Errors::ZgyInternalError("GenLodImpl::_paste4() assert#5.");
  }

  const std::int64_t ni = d00->size3d()[0] + (d10 ? d10->size3d()[0] : 0);
  const std::int64_t nj = d00->size3d()[1] + (d01 ? d01->size3d()[1] : 0);
  const std::int64_t nk = d00->size3d()[2];

  bool all_same = true;
  const std::array<std::shared_ptr<const DataBuffer>,4> all{d00, d01, d10, d11};
  for (const auto& e : all)
    if (all_same && e)
      if (!e->isScalar() || e->scalarAsDouble() != d00->scalarAsDouble())
        all_same = false;

  std::shared_ptr<DataBuffer> result;
  if (all_same) {
    double value = d00->scalarAsDouble();
566
    result = DataBuffer::makeScalarBuffer3d(value,
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
                                          index3_t{ni,nj,nk}, d00->datatype());
  }
  else {
    // TODO-Test: needs careful testing with weird sizes.
    result = DataBuffer::makeDataBuffer3d(nullptr, 0,
                                          index3_t{ni,nj,nk}, d00->datatype());
    _paste1(result, d00, 0, 0);
    _paste1(result, d01,                0, d00->size3d()[1]);
    _paste1(result, d10, d00->size3d()[0],                0);
    _paste1(result, d11, d00->size3d()[0], d00->size3d()[1]);
  }
  return result;
}

/**
 * Choose the histogram range to use.
 *
584
585
586
587
588
589
590
591
592
593
594
595
596
597
 * [1] For cubes stored as integral data use the entire range that can be
 * represented. Not just the possibly smaller range samples written.
 * For int8/uint8 and a 256-bin histogram this is a no-brainer because
 * having less than one possible sample value in each bin inevitably
 * leads to some empty bins even for a completely smooth distribution
 * of the input. For int16/uint16 the strategy is still workable but
 * it had been better to use a compromise: Use a narrower range but
 * narrowed using an integer factor. Or sidestep the issue by
 * internally using a 64k histogram that will get trimmed down to 256
 * entries later.
 *
 * For cubes stored as float or compressed data it gets more complicated.
 *
 * The range of all sample values seen until now, i.e. everything
598
599
600
601
 * written, is passed in. The possibility to overwrite data or
 * (future) append to existing file makes this not accurate but still
 * probably good enough.
 *
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
 * [2] In the normal case the file contains a range of different
 *     samples. Map the center of the first bin of the histogram to
 *     the lowest written value and the center of the last bin of the
 *     histogram to the highest written value. If the written values
 *     include zero then adjust the range slightly to make the range
 *     zero-centric. TODO-Low implement zero-centric.
 *
 *     Note that if the distribution of sample values is perfectly
 *     uniform this results in the first and last bin will probably
 *     have lower counts then expected. This is partly because the
 *     initial mapping is to the center of the bin instead of the
 *     outer edge, and partly because of the zero-centric adjustment.
 *     This is a very minor issue compared to how bad the histogram
 *     might look if it isn't zero centric.
 *
 *     One motivation for using bin centers instead of outer edges is
 *     that it helps (but does not guarantee) smooth histograms for a
 *     float cube that contains discrete values.
 *
 * [3] If writtenrange is invalid this means that no finite samples
 *     have been written. Choose an arbitrary range in that case
 *     instead of ending up with a lot of obscure corner cases. The
 *     range needs to include defaultvalue (which for float data is
 *     zero) as there might be unwritten bricks. The range (-1,+1) is
 *     probably as good as any. Or adjust it slightly to make it zero
 *     centric. Or use (-128,+127) giving a bin width of 1 if the
 *     histogram has 256 values.
 *
 * [4] The same applies when only zeros have been written. Use (-1,+1)
 *     or (-128,+127) All samples and up in the center bin which is
 *     128.
 *
 * [5] For robustness also use that value if some internal error
 *     occurred.
636
 *
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
 * [6] If only a single non-zero value has been written then choose a
 *     range with defaultvalue (i.e. zero) at one end and the written
 *     value at the other. Only bin 0 and 255 will have non-zero
 *     counts. Which bin contains defaultvalue and which contains the
 *     written value depends on the sign of the written value.
 *
 *     An alternative to the above would be to include 2*writtenvalue
 *     in the range, which would always map writtenvalue to bin 128
 *     just like the all-zero case does. This is arguably more
 *     consistent. But to avoid bias, writtenvalue should map to the
 *     center of a bin. Similar to the zero-centric propery which maps
 *     zero to the center of a bin. So the range should run to
 *     (255.0/128.0)*writtenvalue. Or (255.0/127.0) if writtenvalue is
 *     negative. Or something else if the size of the histogram is not
 *     256. Sigh. The previous choice sounds a lot better now, right?
 *
 * TODO-Medium: Make sure the Python version and the C++ version use
 * the same algorithm. In the Python implementation this logic is in
 * HistogramData.suggestHistogramRange().
 *
 * TODO-Low: If data is being appended the code will still re-compute
Paal Kvamme's avatar
Paal Kvamme committed
658
659
660
661
 * the entire histogram. To do this, it uses the _written_sample_min/max
 * kept track of while writing lod0 data. The range should be the union
 * of the data range written previously, found in the old histogram, and
 * the samples written this time around. Problem
662
663
 * is, the first write might not have bothered to finalize and thus
 * did not save this information. I probably need to "finalize" with
664
665
666
 * an empty histogram. Then include the existing histogram range in
 * the new one. Bear in mind that the stored range will be in user and
 * not storage values.
667
 *
668
669
670
671
672
673
674
 * TODO-Low should the suggested range for float data include default
 * value? Which for floats is always zero? Technically the code should
 * keep track of whether all bricks have been explicitly written and
 * if not include zero. It doesn't. This "bug" is probably in the
 * "extreme nitpicking" category. Application code can remove the
 * problem by explicitly choosing the defaultvalue to use, then
 * initialize the entire file with that value.
675
 *
676
677
678
679
 * TODO-Test: Unit tests for all these corner cases both C++ and
 * Python for files containing Nothing, 0, -42, +42. I can probably do
 * all those tests manually with zgycopy / zgycopyc but an automated
 * test is better.
680
681
682
683
684
685
 */
std::array<double,2>
GenLodImpl::suggestHistogramRange(
    const std::array<double,2>& writtenrange,
    RawDataType dtype)
{
686
687
  const std::array<double,2> bogus{-128,+127};
  const RawDataTypeDetails details(dtype);
688
  if (details.is_integer)
689
690
691
692
693
    return std::array<double,2>{details.lowest, details.highest}; // [1]
  else if (!std::isfinite(writtenrange[0]) ||
           !std::isfinite(writtenrange[1]) ||
           writtenrange[0] > writtenrange[1])
    return bogus; // [3], [5], nothing written or error.
694
  else if (writtenrange[0] < writtenrange[1])
695
696
697
698
699
    return writtenrange; // [2], normal case
  else if (writtenrange[0] > 0)
    return std::array<double,2>{0, writtenrange[0]}; // [6], single value
  else if (writtenrange[0] < 0)
    return std::array<double,2>{writtenrange[0], 0}; // [6], single value
700
  else
701
    return bogus; // [4], all zero
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
}

/**
 * Generate and store low resolution bricks, histogram, and statistics.
 * See doc/lowres.html for details. I/O is done via ZgyInternalBulk.
 * Use this class as part as finalize().
 * The implementation uses plan C, which means the full resolution data
 * will be read from the ZGY file. To implement plan D, make a derived
 * class that redefines _read() to query the client for the required full
 * resolution data. _read() must then also call _write() to store the
 * data it just received.
 */
GenLodC::GenLodC(
    const std::shared_ptr<ZgyInternalBulk>& accessor,
    const std::shared_ptr<ZgyInternalMeta>& meta,
    const compressor_t& lodcompressor,
    const std::vector<LodAlgorithm>& decimation,
    const std::function<bool(std::int64_t,std::int64_t)>& progress,
    bool verbose)
  : GenLodImpl(meta->ih().size(),
               meta->ih().bricksize(),
               meta->ih().datatype(),
               suggestHistogramRange(accessor->valueRangeWritten(),
                                     meta->ih().datatype()),
               meta->ih().nlods(),
               decimation,
               nullptr, // Will use computed histogram so far.
               meta->ih().defaultstorage(),
               progress,
               verbose)
  , _accessor(accessor)
  , _lodcompressor(lodcompressor)
{
735
736
737
738
  if (this->_verbose)
    std::cout << "@ GenLod is created."
              << " Written range   " << accessor->valueRangeWritten()
              << "\n";
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
}

/**
 * See base class for details.
 */
std::shared_ptr<DataBuffer>
GenLodC::_read(
    std::int32_t lod,
    const index3_t& pos,
    const index3_t& size)
{
  std::shared_ptr<DataBuffer> result = this->_accessor->readToNewBuffer
    (pos, size, lod, /*as_float=*/false, /*extra_checking*/true);
  _report(result.get());
  return result;
}

/**
 * See base class for details.
 */
void
GenLodC::_write(
    std::int32_t lod, const index3_t& pos,
    const std::shared_ptr<const DataBuffer>& data)
{
  if (lod > 0) {
    if (this->_verbose)
      std::cout << "@" << _prefix(lod)
                << "write(lod=" << lod << ", pos=" << pos
                << ", " << data->toString() << ")\n";
    // TODO-Low more const-correctness of the main API.
    auto unconst_data = std::const_pointer_cast<DataBuffer>(data);
    _accessor->writeRegion(unconst_data, pos, lod, /*is_storage=*/true, _lodcompressor);
    _report(data.get());
  }
}

void
GenLodC::_savestats()
{
  // TODO-Low: Remove this / yagni? Currently finalize() handles this.
}

} // namespace