api.cpp 67.2 KB
Newer Older
1
// Copyright 2017-2021, Schlumberger
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "api.h"
#include "exception.h"
17
#include "safewriter.h"
18
19
20
21
22
23
24
25
26
27
28
29
#include "impl/enum.h"
#include "impl/file.h"
#include "impl/file_sd.h"
#include "impl/meta.h"
#include "impl/bulk.h"
#include "impl/databuffer.h"
#include "impl/transform.h"
#include "impl/lodalgo.h"
#include "impl/statisticdata.h"
#include "impl/histogramdata.h"
#include "impl/genlod.h"
#include "impl/compression.h"
30
#include "impl/guid.h"
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

#ifdef _MSC_VER
#pragma warning(push)
// The warning is due to me using pure virtual classes as "real" interfaces,
// having a similar inheritance scheme for interfaces and implementation.
// I believe this is safe as long as the interfaces are truly pure.
#pragma warning(disable:4250) // inherits via dominance
#endif

/**
 * \file api.cpp
 * \brief Implements the pure interfaces of the API.
 *
 * \internal TODO-Doc consider moving all doxygen comments to the interface
 * and exclude the concrete types. Unfortunately than might make the comments
 * harder to keep up to date.
 */

namespace {
  /**
   * \brief Make a fake shared_ptr from a plain unsafe pointer.
   *
   * Used when a method or a data member expects a smart pointer while
   * the application has an unsafe pointer is guaranteed to remain
   * valid long enough. The returned smart pointer does nothing on
   * cleanup.
   *
   * CAVEAT: Using this method indicates a code smell.
   * Note: The method is duplicated in bulk.cpp, databuffer.cpp,
   * and api.cpp. There isn't much point in consolidatimg them.
   */
  template<typename T>
  static inline std::shared_ptr<T>
  fake_shared(T* p)
  {
    return std::shared_ptr<T>(p, [](T* p){});
  }
}

namespace OpenZGY {
  class IOContext;
}

namespace OpenZGY { namespace Impl {
#if 0
}}
#endif

/** \cond IMPL */ // Doxygen needs api.cpp, but not all of it.

/**
 * \brief convert between internal and external data types.
 *
84
85
86
 * \details Thread safety:
 * Thread safe because the class only contains static methods with no state.
 *
87
88
89
90
91
 * \internal
 * TODO-Test: Move to a separate file to allow it to be unit tested.
 */
class EnumMapper
{
92
93
94
  EnumMapper() = delete;
  EnumMapper(const EnumMapper&) = delete;
  EnumMapper& operator=(const EnumMapper&) = delete;
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
public:
  static SampleDataType mapRawDataTypeToSampleDataType(InternalZGY::RawDataType);
  static UnitDimension  mapRawHorizontalDimensionToUnitDimension(InternalZGY::RawHorizontalDimension);
  static UnitDimension  mapRawVerticalDimensionToUnitDimension(InternalZGY::RawVerticalDimension);
  static InternalZGY::RawDataType            mapSampleDataTypeToRawDataType(SampleDataType);
  static InternalZGY::RawHorizontalDimension mapUnitDimensionToRawHorizontalDimension(UnitDimension);
  static InternalZGY::RawVerticalDimension   mapUnitDimensionToRawVerticalDimension(UnitDimension);
  static InternalZGY::ZgyInternalWriterArgs mapWriterArgs(const ZgyWriterArgs&);
  static InternalZGY::LodAlgorithm mapDecimationTypeToLodAlgorithm(DecimationType value);
  static std::vector<InternalZGY::LodAlgorithm> mapDecimationTypeToLodAlgorithm(const std::vector<DecimationType>& values);
};

/** \endcond */

/**
 * \brief High level API for reading and writing ZGY files.
 *
 * The base class contains properties common to both reader and writer.
 *
 * The constructor should logically have had a ZgyInternalMeta parameter
 * for accessing the implementation layer. But due to the way the code
 * is structured the _meta pointer needs to be set in the constructor
 * for the leaf types. The _meta pointer is guaranteed to not be empty
 * except while constructig the instance.
 *
 * Both ZgyReader and ZgyWriter need to retain a pointer to the file
 * descriptor. Primarily in order to explcitly close it. Relying on the
 * shared_ptr to do this when going out of scope is dangerous because
 * of exception handling. ZgyWriter additionally needs it when flushing
 * metadata to disk. Since there is no file descriptor in ZgyInternalMeta.
 *
 * Both ZgyReader and ZgyWriter need a ZgyInternalBulk pointer to do
 * bulk I/O. That instance in turn keeps private references to the
 * file descriptor and the metadata but is not supposed to expose them.
 *
 * The FileADT and ZgyInternalBulk pointers can be declared here since
 * both the reader and the writer needs them. Or removed from here and
 * duplicated in ZgyReader and ZgyWriter.
133
 *
134
 * Thread safety:
135
136
137
138
 * The following applies both to this class and derived types.
 * Modification may lead to a data race. The intent for this class is
 * that any calls a ZgyReader instance might make will not modify data.
 * Except possibly changes done while the file is being opened.
139
140
141
 * To help enforce this, both const and mutable pointers are used for
 * ZgyInternalMeta and ZgyInternalBulk with the latter being empty
 * when instanciated from the ZgyReader constructor instead of ZgyWriter.
142
 * FUTURE: The instance holds one or more pointers to the data it
143
144
145
146
147
 */
class ZgyMeta: virtual public IZgyMeta
{
protected:
  /** \brief Handle to the internal metadata layer which this class wraps. */
148
149
  std::shared_ptr<const InternalZGY::ZgyInternalMeta> _meta;

150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
public:

  virtual std::array<int64_t,3>
  size() const override
  {
    return _meta->ih().size();
  }

  virtual SampleDataType
  datatype() const override
  {
    return EnumMapper::mapRawDataTypeToSampleDataType(_meta->ih().datatype());
  }

  virtual std::array<float32_t,2>
  datarange() const override
  {
    if (_meta->ih().datatype() == InternalZGY::RawDataType::Float32)
      return std::array<float32_t,2>{_meta->ih().smin(), _meta->ih().smax()};
    else
170
171
172
173
174
175
176
177
178
179
      return _meta->ih().safe_codingrange();
  }

  virtual std::array<float32_t,2>
  raw_datarange() const override
  {
    if (_meta->ih().datatype() == InternalZGY::RawDataType::Float32)
      return std::array<float32_t,2>{_meta->ih().smin(), _meta->ih().smax()};
    else
      return _meta->ih().raw_codingrange();
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  }

  virtual UnitDimension
  zunitdim() const override
  {
    return EnumMapper::mapRawVerticalDimensionToUnitDimension(_meta->ih().vdim());
  }

  virtual UnitDimension
  hunitdim() const override
  {
    return EnumMapper::mapRawHorizontalDimensionToUnitDimension(_meta->ih().hdim());
  }

  virtual std::string
  zunitname() const override
  {
    return _meta->ih().vunitname();
  }

  virtual std::string
  hunitname() const override
  {
    return _meta->ih().hunitname();
  }

  virtual float64_t
  zunitfactor() const override
  {
    return _meta->ih().vunitfactor();
  }

  virtual float64_t
  hunitfactor() const override
  {
    return _meta->ih().hunitfactor();
  }

  virtual float32_t
  zstart() const override
  {
    return _meta->ih().orig()[2];
  }

  virtual float32_t
  zinc() const override
  {
    return _meta->ih().inc()[2];
  }

  virtual std::array<float32_t,2>
  annotstart() const override
  {
    return std::array<float32_t,2>{
      _meta->ih().orig()[0],
      _meta->ih().orig()[1]};
  }

  virtual std::array<float32_t,2>
  annotinc() const override
  {
    return std::array<float32_t,2>{
      _meta->ih().inc()[0],
      _meta->ih().inc()[1]};
  }

  virtual const corners_t&
  corners() const
  {
    return _meta->ih().ocp_world();
  }

  virtual const corners_t&
  indexcorners() const
  {
    return _meta->ih().ocp_index();
  }

  virtual const corners_t&
  annotcorners() const
  {
    return _meta->ih().ocp_annot();
  }

  virtual std::array<int64_t,3>
  bricksize() const override
  {
    return _meta->ih().bricksize();
  }

  virtual std::vector<std::array<int64_t,3>>
  brickcount() const override
  {
    return _meta->ih().lodsizes();
  }

  virtual int32_t
  nlods() const override
  {
    return _meta->ih().nlods();
  }

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  // Currently not needed by any client.
  //virtual std::string dataid() const override
  //{
  //  return InternalZGY::GUID(_meta->ih().dataid()).toString();
  //}

  virtual std::string verid()  const override
  {
    return InternalZGY::GUID(_meta->ih().verid()).toString();
  }

  // Currently not needed by any client.
  //virtual std::string previd() const override
  //{
  //  return InternalZGY::GUID(_meta->ih().previd()).toString();
  //}

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  virtual void
  dump(std::ostream& os) const override
  {
    throw std::runtime_error("Not implemented: ZgyMeta::dump()");
  }

  virtual SampleStatistics
  statistics() const override
  {
    const InternalZGY::IInfoHeaderAccess& ih = _meta->ih();
    return SampleStatistics(ih.scnt(), ih.ssum(), ih.sssq(), ih.smin(), ih.smax());
  }

  virtual SampleHistogram
  histogram() const override
  {
    const InternalZGY::IHistHeaderAccess& hh = _meta->hh();
316
317
318
319
320
321
    if (hh.bincount() > 0 && hh.bins() != nullptr)
      return SampleHistogram(hh.samplecount(), hh.minvalue(), hh.maxvalue(),
                             std::vector<std::int64_t>(hh.bins(),
                                                       hh.bins() + hh.bincount()));
    else
      return SampleHistogram();
322
  }
323

324
  std::shared_ptr<const FileStatistics> filestats_nocache() const
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  {
    using InternalZGY::LookupTable;
    using InternalZGY::BrickStatus;

    const std::int64_t bytesperalpha = _meta->ih().bytesperalpha();
    const std::int64_t bytesperbrick = _meta->ih().bytesperbrick();
    const std::vector<std::uint64_t>& alup = _meta->alup().lup();
    const std::vector<std::uint64_t>& blup = _meta->blup().lup();
    const std::vector<std::uint64_t>& bend = _meta->blup().lupend();

    FileStatistics result;
    result._file_version = _meta->fh().version();
    result._alpha_normal_size_per_entry = bytesperalpha;
    result._brick_normal_size_per_entry = bytesperbrick;
    // result._file_size = _fd->xx_size(); Available in ZgyReader and ZgyWriter only.

    // TODO-Low: Fix this kludge.
    // I happen to know that in V3 and V4 the headers are all stored
    // consecutively and the brick lookup table comes last.
    result._header_size = _meta->oh().bricklupoff() + _meta->oh().bricklupsize();

    const std::vector<std::array<std::int64_t,3>>& lodsizes = _meta->ih().lodsizes();
    const std::vector<std::int64_t>& alphaoffsets = _meta->ih().alphaoffsets();
    const std::vector<std::int64_t>& brickoffsets = _meta->ih().brickoffsets();
    // This had been much simpler if I made a getBrickFilePositionByIndex.
    // And possibly moving part of this code inside class LookupTable.
    // I only need to iterate over the contents of alup, blup, bend
    // but here I need to iterate over brick positiom and let class
    // LookupTable convert that back to a raw index.
    std::array<std::int64_t,3> size = _meta->ih().size();
    const std::array<std::int64_t,3> bs = _meta->ih().bricksize();
    for (std::int64_t lod= 0; lod < _meta->ih().nlods(); ++lod) {
      for (std::int64_t ii = 0; ii < size[0]; ii += bs[0]) {
        for (std::int64_t jj = 0; jj < size[1]; jj += bs[1]) {
          LookupTable::LutInfo info =
            LookupTable::getAlphaFilePosition
            (ii/bs[0], jj/bs[1], lod,
             lodsizes,
             alphaoffsets, alup, /*aend,*/
             bytesperalpha);
          //std::cout<< zgydump_format(ii/bs[0], jj/bs[1], 0, lod, info) << "\n";
          switch (info.status) {
          case BrickStatus::Missing:  result._alpha_missing_count  += 1; break;
          case BrickStatus::Constant: result._alpha_constant_count += 1; break;
          case BrickStatus::Normal:
            result._alpha_normal_count   += 1;
            break;
          case BrickStatus::Compressed:
            result._alpha_compressed_count += 1;
            result._alpha_compressed_size += info.size_in_file;
            break;
          }
          for (std::int64_t kk = 0; kk < size[2]; kk += bs[2]) {
            LookupTable::LutInfo info =
              LookupTable::getBrickFilePosition
              (ii/bs[0], jj/bs[1], kk/bs[2], lod,
               lodsizes,
               brickoffsets, blup, bend,
               bytesperbrick);
            //std::cout << zgydump_format(ii/bs[0], jj/bs[1], kk/bs[2], lod, info) << "\n";
            switch (info.status) {
            case BrickStatus::Missing:  result._brick_missing_count  += 1; break;
            case BrickStatus::Constant: result._brick_constant_count += 1; break;
            case BrickStatus::Normal:
              result._brick_normal_count   += 1; break;
            case BrickStatus::Compressed:
              result._brick_compressed_count += 1;
              result._brick_compressed_size += info.size_in_file;
              break;
            }
          }
        }
      }
      size[0] = (size[0]+1)/2;
      size[1] = (size[1]+1)/2;
      size[2] = (size[2]+1)/2;
    }

    // TODO-Low: Keep track of wasted_size and padding_size.
    // Padding gets added in ZgyInternalMeta::flushMeta(). I need to
    // replicate the logic here. The alternative is to scan for the
    // lowest brick offset. But even that isn't completely reliable
    // because there might be wasted blocks between end of padding and
    // start of first block. And, do I really care at all?
    //result._padding_size = roundup(result._header_size,
    //                              result._brick_size_per_entry);
    //result._wasted_size = result._file_size - result._usedSize();

    // DERIVED INFORMATION:
    // The following could also have been generated on the fly in some
    // member function. I pre-calculate it here instead, to limit the
    // amount of code visible in the public api.h header file.

    // File size not including padding and holes.
    result._used_size =
      ((result._alpha_normal_count * result._alpha_normal_size_per_entry) + result._alpha_compressed_size +
       (result._brick_normal_count * result._brick_normal_size_per_entry) + result._brick_compressed_size +
       result._header_size);

    // As used_size if the file is/was uncompressed.
    result._used_if_uncompressed =
      (((result._alpha_normal_count + result._alpha_compressed_count) * result._alpha_normal_size_per_entry) +
       ((result._brick_normal_count + result._brick_compressed_count) * result._brick_normal_size_per_entry) +
       result._header_size);

    // Is there at least one brick flagged as compressed?
    result._is_compressed =
      (result._alpha_compressed_count + result._brick_compressed_count > 0);

    // Relative size of this possibly compressed file compared to uncompressed.
    result._compression_factor =
      (result._used_if_uncompressed > 0 ?
       result._used_size / (double)result._used_if_uncompressed :
       1);
    // Slightly different definition of compression factor.
    // Doesn't work because file_size not set yet.
    // Besides, I like the other one better.
    //result._compression_factor =
    //  (result._file_size > 0 ?
    //   result._used_size + (result._file_size-(result._used_if_uncompressed)) / (double)result._file_size:
    //   1);

447
    return std::shared_ptr<const FileStatistics>(new FileStatistics(result));
448
  }
449
450
451
452
453
454
455
456
457

  /**
   * This method should be overwridden in ZgyReader and ZgyWriter.
   * If it is ok to have ZgyMeta be an abstract type then this
   * implementation can simply be removed.
   *
   * Calling this generic version of filestats() method will not
   * populate the file size and will not do any caching.
   */
458
  virtual std::shared_ptr<const FileStatistics> filestats() const override
459
460
461
  {
    return filestats_nocache();
  }
462
463
464
465
};

/**
 * \brief Add coordinate conversion to the concrete ZgyMeta class.
466
467
468
469
470
 *
 * \details Thread safety: See the base class. This specialization
 * does not add any additional concerns because all the new members
 * are const. So they are safe to call concurrently with other read
 * operations.
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
 */
class ZgyMetaAndTools: public ZgyMeta, virtual public IZgyTools
{
public:
  virtual void
  transform(const corners_t& A, const corners_t& B, std::vector<std::array<float64_t,2>>& data) const override
  {
    // If this method is needed then it is fairly simple to implement.
    // Change InternalZGY::generalTransform() to accept a vector of array.
    // transform1() can then be re-implemented in terms of transform.
    throw std::runtime_error("Not implemented: ZgyMetaAndToole::transform()");
  }

  virtual  std::array<float64_t,2>
  transform1(const corners_t& A, const corners_t& B, const std::array<float64_t,2>& point) const override
  {
    float64_t x{point[0]}, y{point[1]};
    if (!InternalZGY::generalTransform
        (A[0][0], A[0][1], A[1][0], A[1][1], A[2][0], A[2][1],
         B[0][0], B[0][1], B[1][0], B[1][1], B[2][0], B[2][1],
         &x, &y, 1))
      throw Errors::ZgyInternalError("Transform is not well defined due to colinear or coincident control points");
    return std::array<float64_t,2>{x, y};
  }

  virtual std::array<float64_t,2>
  annotToIndex(const std::array<float64_t,2>& point) const override
  {
    return transform1(annotcorners(), indexcorners(), point);
  }

  virtual std::array<float64_t,2>
  annotToWorld(const std::array<float64_t,2>& point) const override
  {
    return transform1(annotcorners(), corners(), point);
  }

  virtual std::array<float64_t,2>
  indexToAnnot(const std::array<float64_t,2>& point) const override
  {
    return transform1(indexcorners(), annotcorners(), point);
  }

  virtual std::array<float64_t,2>
  indexToWorld(const std::array<float64_t,2>& point) const override
  {
    return transform1(indexcorners(), corners(), point);
  }

  virtual std::array<float64_t,2>
  worldToAnnot(const std::array<float64_t,2>& point) const override
  {
    return transform1(corners(), annotcorners(), point);
  }

  virtual std::array<float64_t,2>
  worldToIndex(const std::array<float64_t,2>& point) const override
  {
    return transform1(corners(), indexcorners(), point);
  }
};

/**
 * \brief Concrete implementation of IZgyReader.
535
 * \details Thread safety: See the base class.
536
537
538
539
540
 */
class ZgyReader : public ZgyMetaAndTools, virtual public IZgyReader
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
541
  std::shared_ptr<const InternalZGY::ZgyInternalBulk> _accessor;
542
543
  std::shared_ptr<const FileStatistics> _filestats;
  mutable std::mutex _filestats_mutex;
544
545
546
547
548

public:
  /**
   * \copydoc IZgyReader::open()
   */
549
  ZgyReader(const std::string& filename, const IOContext* iocontext)
550
  {
551
552
553
554
    // Note: Currently the Python version of this constructor has an
    // additional "update" parameter which, if set, causes the underlying
    // file to be opened in read/write mode. This is a kludge to facilitate
    // a stand alone lowres generator. Which was only ever used for testing.
555
    _fd = InternalZGY::FileFactory::instance().create(
556
        filename, InternalZGY::OpenMode::ReadOnly, iocontext);
557
558
559
560
561
562
563
564
565
566
567

    // Set the protected metadata information in the ZgyMeta base class.
    // ZgyInternalMeta does not retain the file descriptor. The file is
    // only used inside the constructor to populate the headers.
    _meta.reset(new InternalZGY::ZgyInternalMeta(this->_fd));

    // At the implementation level the bulk- and meta access are separate,
    // and the bulk accessor needs some of the meta information to work.
    // The accessor will have its own metadata pointer which it holds on to.
    // It also holds on to the file descriptor. Unlike the metadata reader
    // it is obviously not possible to read everything up front.
568
    _accessor.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta, nullptr, false));
569
570
571
572
573
  }

  ~ZgyReader()
  {
    try {
574
575
576
      //close(); // See close() for why this may be a bad idea.
      _accessor.reset();
      _fd.reset();
577
578
579
580
581
    }
    catch (const std::exception& ex) {
      // We should never get here!
      // Caller should have done an explicit close() so it can handle
      // exceptions itself. Exceptions thrown from a destructors are evil.
582
583
584
585
      // Trying to catch exceptions from the two lines above might already
      // be too late. The destructor in _fd does a similar operation
      // (blind catch with logging) which makes it even less likely
      // thet we get here.
586
587
588
589
590
591
592
593
594
595
596
597
      std::cerr << "ERROR closing a file opened for read: "
                << (ex.what() ? ex.what() : "(null)")
                << std::endl;
    }
  }

  /**
   * \brief Read an arbitrary region.
   *
   * The data is read into a buffer provided by the caller.
   * The method will apply conversion storage -> float if needed.
   *
598
599
   * Data is ordered inline(slowest), crossline, vertical(fastest).
   *
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
   * The start position refers to the specified lod level.
   * At lod 0 start + data.size can be up to the survey size.
   * At lod 1 the maximum is just half that, rounded up.

   * It is valid to pass a size that includes the padding area
   * between the survey and the end of the current brick. But not
   * more. In other words, the limit for lod 0 is actually
   * reader()->size() rounded up to a multiple of reader->bricksize().
   */
  virtual void read(const size3i_t& start, const size3i_t& size, float* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<float> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<float,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, true);
  }

  /**
   * \brief Read an arbitrary region with no conversion.
   *
   * As the read overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
  virtual void read(const size3i_t& start, const size3i_t& size, std::int16_t* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<std::int16_t> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<std::int16_t,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, false);
  }

  /**
   * \brief Read an arbitrary region with no conversion.
   *
   * As the read overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
  virtual void read(const size3i_t& start, const size3i_t& size, std::int8_t* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<std::int8_t> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<std::int8_t,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, false);
  }

  /**
   * \brief Get hint about all constant region.
   *
   * Check to see if the specified region is known to have all samples
   * set to the same value. Returns a pair of (is_const, const_value).
653
654
655
656
657
658
   * If it returns is_const=true you know what the region contains;
   * otherwise you need to use the regular read() method.
   *
   * For int8 and int16 files the caller may specify whether to scale
   * the values or not. Even if unscaled the function returns the value
   * as a double.
659
660
661
662
   *
   * The function only makes inexpensive checks so it might return
   * is_const=false even if the region was in fact constant. It will not
   * make the opposite mistake. This method is only intended as a hint
663
664
665
   * to improve performance. The following figure might clarify how it works.
   * \image html readconst-fig1.png
   * \image latex readconst-fig1.png
666
   *
667
668
669
670
671
672
673
   * If the region requested in readconst() is exactly one ZGY brick
   * then this just checks whether that brick is known to be constant.
   * If called with a larger, arbitrary region it fails unless all
   * bricks in that region are known to be constant and all have the
   * same value. It will not tell you which brick(s) were not const.
   * If you need to know then you must loop over each ZGY brick and
   * make multiple calls to readconst().
674
675
676
677
678
679
680
681
682
683
684
   */
  virtual std::pair<bool,double> readconst(const size3i_t& start, const size3i_t& size, int lod, bool as_float) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    return _accessor->readConstantValue(start, size, lod, as_float);
  }

  /**
   * \brief Close the file and release resources.
   *
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
   * Unlike ZgyWriter::close(), forgetting to close a file that was
   * only open for read is not a major faux pas. It can still lead to
   * problems though.
   *
   *   \li The destructor of _fd will catch and ignore any exceptions
   *       because if a destructor throws then this will normally
   *       cause the application to crash.
   *
   *   \li If a callback is used to refresh the token this will not
   *       happen in our destructor (it won't call xx_close) because
   *       it is too risky to invoke the callback this late. It might
   *       not be valid any longer. This means that if the token has
   *       expired since the last read then the close will fail.
   *       Exactly why SDAPI requires a token just to close a file is
   *       a different question. Possibly this is for removing any
   *       read locks.
701
702
703
704
705
706
707
708
709
710
711
712
   */
  void close()
  {
    if (_accessor) {
      _accessor.reset();
    }
    if (_fd) {
      _fd->xx_close();
      _fd.reset();
    }
    // Metadata remains accessible. Not sure whether this is a good idea.
  }
713
714

  /**
715
716
717
   * Get the file statistics of a file currently opened for read.
   * The result does not change while the file is open, so it can
   * be cached here.
718
   */
719
  virtual std::shared_ptr<const FileStatistics> filestats() const override
720
  {
721
    if (!_filestats) {
722
723
      std::shared_ptr<FileStatistics> result
        (new FileStatistics(*filestats_nocache()));
724
      // The base class has no _fd member so I need to set the size here.
725
      result->_file_size = _fd->xx_eof();
726
727
728
      {
        // Too bad there is no proper atomic_shared_ptr yet.
        std::lock_guard<std::mutex> lk(_filestats_mutex);
729
730
        // The cache is semantically const.
        const_cast<ZgyReader*>(this)->_filestats = result;
731
732
733
      }
    }
    std::lock_guard<std::mutex> lk(_filestats_mutex);
734
    return _filestats;
735
  }
736
737
738
739
};

/**
 * \brief Concrete implementation of IZgyWriter.
740
 *
741
742
743
744
 * \details Thread safety: This class is single threaded. IZgyWriter::open()
 * may choose to return a wrapper that serializes all access, just in case
 * the application didn't read the memo about no concurrent access.
 * See class ZgySafeWriter for details.
745
746
747
748
749
750
751
752
753
754
 *
 * Const-correctness: Most methods are non-const because writes will update
 * metadata. If not in ZgyWriter itself then in the classes it refers to.
 * The class refers to a mutable ZgyInternalBulk instance. And a mutable
 * ZgyInternalMeta instance that is actually the same as the one in the
 * base class except this one is not declared const. Now for the minor
 * code smell: Any const method (currently just errorflag) will still be
 * able to access those _rw pointers so the "const" declaration hardly
 * protects against anything. There are ways of handling this better.
 * But with just a single method affected I'll let it slide.
755
756
757
758
759
 */
class ZgyWriter : public ZgyMetaAndTools, virtual public IZgyWriter
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
760
761
  std::shared_ptr<InternalZGY::ZgyInternalBulk> _accessor_rw;
  std::shared_ptr<InternalZGY::ZgyInternalMeta> _meta_rw;
762
763
764
765
766
767
768
769
770
771
  mutable bool _dirty; // if true need LOD, stats, histogram.
  compressor_t _compressor;
  compressor_t _lodcompressor;

public:
  /**
   * \copydoc IZgyWriter::open()
   */
  ZgyWriter(const ZgyWriterArgs& args)
    : _fd()
772
    , _accessor_rw()
773
774
775
776
777
778
779
780
781
782
    , _dirty(false)
    , _compressor(args._compressor)
    , _lodcompressor(args._lodcompressor)
  {
    const bool compress = args._compressor || args._lodcompressor;
    // This is both pointless and expensive to support.
    if (compress && args._datatype != SampleDataType::float32)
      throw Errors::ZgyUserError("Compressed files need to be stored as float.");

    // Set the protected metadata information in the ZgyMeta base class.
783
    // Also store a mutable copy in the current class.
784
785
786
787
    // ZgyInternalMeta does not contain any file descriptor. This means
    // we need to hold on to _fd ourselves and provide it when it is time
    // to flush metadata to disk.
    InternalZGY::ZgyInternalWriterArgs iargs = EnumMapper::mapWriterArgs(args);
788
    _meta_rw.reset(new InternalZGY::ZgyInternalMeta(iargs, compress));
789
    _meta = _meta_rw; // in base class
790
791
792
793

    // The file creation was deferred until after the consistency checks.
    _fd = InternalZGY::FileFactory::instance().create(
        args._filename, InternalZGY::OpenMode::Truncate, args._iocontext);
794
    _meta_rw->flushMeta(_fd);
795
796
    // Compress or not at this level only controls alignment etc.
    // The actual compression functor is passed to each write.
797
    _accessor_rw.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta_rw, _meta_rw, compress));
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
  }

  /**
   * \brief Automatically close the file when it goes out of scope.
   *
   * Application code is encouraged to close the file explicitly.
   * The destructor is just there as a fallback. Errors caught
   * in the fallback will be logged to stderr and otherwise ignored.
   */
  virtual ~ZgyWriter()
  {
    try {
      close();
    }
    catch (const Errors::ZgyError& ex) {
      std::cerr << "ERROR: Uncaught ZGY exception closing file: "
                << ex.what() << std::endl;
    }
    catch (const std::exception& ex) {
      std::cerr << "ERROR: Uncaught general exception closing file: "
                << ex.what() << std::endl;
    }
  }

  /**
   * \brief Write an arbitrary region.
   *
   * This will apply conversion float -> storage if needed.
   *
827
828
   * Data is ordered inline(slowest), crossline, vertical(fastest).
   *
829
830
831
832
833
834
835
836
837
838
839
   * A read/modify/write will be done if the region's start and size
   * doesn't align with bricksize. When writing to the cloud this
   * read/modify/write may incur performance and size penalties. So
   * do write brick aligned data if possible. The same applies to
   * writing compressed data where r/m/w can cause a severe
   * loss of quality.
   *
   * The start position refers to the specified lod level.
   * At lod 0 start + data.size can be up to the survey size.
   * At lod 1 the maximum is just half that, rounded up.
   */
840
  void write(const size3i_t& start, const size3i_t& size, const float* data) override
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
  {
    // TODO-Worry: The buffer is supposed to be copied at least once
    // before being sent down to the lowest levels. The longer I wait
    // before I do that, the higher the risk is that it isn't done.
    // Currently there is code in _writeAlignedRegion() that ensures
    // the buffer was copied. But if I do the copy too early then it
    // might be wasted effort since r/m/w or value type conversion
    // would copy it anyway.
    //
    // Why I need a copy:
    //
    //   - I need an ugly const_cast because DataBuffer isn't fully const
    //     aware. The cast is particularly dangerous because the lower
    //     levels might be tempted to do padding and byteswapping in place.
    //     If the compiler doesn't stop this due to a "const" declaration
    //     we get subtle bugs.
    //
    //   - The user's buffer is not reference counted. Which means I need
    //     to use a fake shared_ptr. If the lower levels implement delayed
    //     write and are "smart" about not always copying buffers then
    //     the buffer might be accessed after the function returns.
    std::shared_ptr<float> fakeshared = fake_shared(const_cast<float*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<float,3>(fakeshared, size));
864
    _accessor_rw->writeRegion(buffer, start, 0, false, _compressor);
865
866
867
868
869
870
871
872
873
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
874
  void write(const size3i_t& start, const size3i_t& size, const std::int16_t *data) override
875
876
877
  {
    std::shared_ptr<std::int16_t> fakeshared = fake_shared(const_cast<std::int16_t*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<int16_t,3>(fakeshared, size));
878
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
879
880
881
882
883
884
885
886
887
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
888
  void write(const size3i_t& start, const size3i_t& size, const std::int8_t* data) override
889
890
891
  {
    std::shared_ptr<std::int8_t> fakeshared = fake_shared(const_cast<std::int8_t*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int8_t,3>(fakeshared, size));
892
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
    this->_dirty = true;
  }

  /**
   * \brief Write all-constant data.
   *
   * Works as the corresponding write but the entire region is set
   * to the same value. So the provided data buffer needs just one
   * value, or alternatively can be passed as &scalar_value.
   *
   * Calling this method is faster than filling a buffer with constant
   * values and calling write. But it produces the exact same
   * result. This is because write will automatically detect whether
   * the input buffer is all constant.
   */
908
  void writeconst(const size3i_t& start, const size3i_t& size, const float* data) override
909
910
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<float,3>(*data, size));
911
    _accessor_rw->writeRegion(buffer, start, 0, false, _compressor);
912
913
914
915
916
917
918
919
920
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the writeconst overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
921
  void writeconst(const size3i_t& start, const size3i_t& size, const std::int16_t* data) override
922
923
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int16_t,3>(*data, size));
924
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
925
926
927
928
929
930
931
932
933
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
934
  void writeconst(const size3i_t& start, const size3i_t& size, const std::int8_t* data) override
935
936
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int8_t,3>(*data, size));
937
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
    this->_dirty = true;
  }

  /**
   * \brief Generate low resolution data, statistics, and histogram.
   *
   * This method will be called automatically from close(), but in
   * that case it is not possible to request a progress callback.
   *
   * If the processing raises an exception the data is still marked
   * as clean. Called can force a retry by passing force=True.
   *
   * The C++ code is very different from Python because  it needs
   * an entirely different approach to be performant.
   *
   * \param decimation: Optionally override the decimation algorithms by
   *                    passing an array of DecimationType
   *                    with one entry for each level of detail. If the
   *                    array is too short then the last entry is used for
   *                    subsequent levels.
   * \param progress:   Function(done, total) called to report progress.
   *                    If it returns False the computation is aborted.
960
961
962
   *                    The callback implementation should not invoke any
   *                    OpenZGY methods except for any calls expressly
   *                    documented as safe for this purpose and deadlock-free.
963
964
965
966
   *                    Will be called at least one, even if there is no work.
   * \param force:      If true, generate the low resolution data even if
   *                    it appears to not be needed. Use with caution.
   *                    Especially if writing to the cloud, where data
967
968
969
970
   *                    should only be written once. passing force=true
   *                    is currently required if cerating an empty file,
   *                    no samples written at all, if the statistics and
   *                    histogram data should still be stored.
971
972
973
974
975
976
   */
  void finalize(const std::vector<DecimationType>& decimation,
                const std::function<bool(std::int64_t,std::int64_t)>& progress,
                bool force = false)
  {
    if (this->_dirty || force) {
977
      InternalZGY::DataBuffer::cleartimers(true);
978
979
980
981
982
983
984
      this->_dirty = false;
      std::shared_ptr<InternalZGY::StatisticData> stats;
      std::shared_ptr<InternalZGY::HistogramData> histo;
      // Note: this->_accessor._metadata == this->_meta but the former is
      // private and only used to allow ZgyInternalBulk to use some parts of
      // the metadata. This is why both _accessor and _meta are needed below.
      std::tie(stats, histo) =
985
        InternalZGY::GenLodC(_accessor_rw, _meta_rw, _lodcompressor,
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
                             EnumMapper::mapDecimationTypeToLodAlgorithm(decimation),
                             progress, /*verbose=*/false).call();

      // Statistics and histogram are accumulated in storage units
      // and we want to store user units on the file.
      // Technically I could scale in place since stats and histo
      // shouldn't be used after this so it is ok to clobber them.
      // But I will copy just in case.
      InternalZGY::StatisticData scaled_stats(*stats);
      InternalZGY::HistogramData scaled_histo(*histo);
      const std::array<double,2> factors = this->_meta->ih().storagetofloat();
      scaled_stats.scale(0, 1, factors[1], factors[0] + factors[1]);
      scaled_histo.scale(0, 1, factors[1], factors[0] + factors[1]);
      // TODO-Low: if using a 64k temporary histogram. scaled_histo.resize(256);
      // TODO-Low: Some kind of static_assert making the following cast safe.
For faster browsing, not all history is shown. View entire blame