api.cpp 51.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
// Copyright 2017-2020, Schlumberger
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "api.h"
#include "exception.h"
17
#include "safewriter.h"
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include "impl/enum.h"
#include "impl/file.h"
#include "impl/file_sd.h"
#include "impl/meta.h"
#include "impl/bulk.h"
#include "impl/databuffer.h"
#include "impl/transform.h"
#include "impl/lodalgo.h"
#include "impl/statisticdata.h"
#include "impl/histogramdata.h"
#include "impl/genlod.h"
#include "impl/compression.h"

#ifdef _MSC_VER
#pragma warning(push)
// The warning is due to me using pure virtual classes as "real" interfaces,
// having a similar inheritance scheme for interfaces and implementation.
// I believe this is safe as long as the interfaces are truly pure.
#pragma warning(disable:4250) // inherits via dominance
#endif

/**
 * \file api.cpp
 * \brief Implements the pure interfaces of the API.
 *
 * \internal TODO-Doc consider moving all doxygen comments to the interface
 * and exclude the concrete types. Unfortunately than might make the comments
 * harder to keep up to date.
 */

namespace {
  /**
   * \brief Make a fake shared_ptr from a plain unsafe pointer.
   *
   * Used when a method or a data member expects a smart pointer while
   * the application has an unsafe pointer is guaranteed to remain
   * valid long enough. The returned smart pointer does nothing on
   * cleanup.
   *
   * CAVEAT: Using this method indicates a code smell.
   * Note: The method is duplicated in bulk.cpp, databuffer.cpp,
   * and api.cpp. There isn't much point in consolidatimg them.
   */
  template<typename T>
  static inline std::shared_ptr<T>
  fake_shared(T* p)
  {
    return std::shared_ptr<T>(p, [](T* p){});
  }
}

namespace OpenZGY {
  class IOContext;
}

namespace OpenZGY { namespace Impl {
#if 0
}}
#endif

/** \cond IMPL */ // Doxygen needs api.cpp, but not all of it.

/**
 * \brief convert between internal and external data types.
 *
83
84
85
 * \details Thread safety:
 * Thread safe because the class only contains static methods with no state.
 *
86
87
88
89
90
 * \internal
 * TODO-Test: Move to a separate file to allow it to be unit tested.
 */
class EnumMapper
{
91
92
93
  EnumMapper() = delete;
  EnumMapper(const EnumMapper&) = delete;
  EnumMapper& operator=(const EnumMapper&) = delete;
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
public:
  static SampleDataType mapRawDataTypeToSampleDataType(InternalZGY::RawDataType);
  static UnitDimension  mapRawHorizontalDimensionToUnitDimension(InternalZGY::RawHorizontalDimension);
  static UnitDimension  mapRawVerticalDimensionToUnitDimension(InternalZGY::RawVerticalDimension);
  static InternalZGY::RawDataType            mapSampleDataTypeToRawDataType(SampleDataType);
  static InternalZGY::RawHorizontalDimension mapUnitDimensionToRawHorizontalDimension(UnitDimension);
  static InternalZGY::RawVerticalDimension   mapUnitDimensionToRawVerticalDimension(UnitDimension);
  static InternalZGY::ZgyInternalWriterArgs mapWriterArgs(const ZgyWriterArgs&);
  static InternalZGY::LodAlgorithm mapDecimationTypeToLodAlgorithm(DecimationType value);
  static std::vector<InternalZGY::LodAlgorithm> mapDecimationTypeToLodAlgorithm(const std::vector<DecimationType>& values);
};

/** \endcond */

/**
 * \brief High level API for reading and writing ZGY files.
 *
 * The base class contains properties common to both reader and writer.
 *
 * The constructor should logically have had a ZgyInternalMeta parameter
 * for accessing the implementation layer. But due to the way the code
 * is structured the _meta pointer needs to be set in the constructor
 * for the leaf types. The _meta pointer is guaranteed to not be empty
 * except while constructig the instance.
 *
 * Both ZgyReader and ZgyWriter need to retain a pointer to the file
 * descriptor. Primarily in order to explcitly close it. Relying on the
 * shared_ptr to do this when going out of scope is dangerous because
 * of exception handling. ZgyWriter additionally needs it when flushing
 * metadata to disk. Since there is no file descriptor in ZgyInternalMeta.
 *
 * Both ZgyReader and ZgyWriter need a ZgyInternalBulk pointer to do
 * bulk I/O. That instance in turn keeps private references to the
 * file descriptor and the metadata but is not supposed to expose them.
 *
 * The FileADT and ZgyInternalBulk pointers can be declared here since
 * both the reader and the writer needs them. Or removed from here and
 * duplicated in ZgyReader and ZgyWriter.
132
 *
133
 * Thread safety:
134
135
136
137
 * The following applies both to this class and derived types.
 * Modification may lead to a data race. The intent for this class is
 * that any calls a ZgyReader instance might make will not modify data.
 * Except possibly changes done while the file is being opened.
138
139
140
 * To help enforce this, both const and mutable pointers are used for
 * ZgyInternalMeta and ZgyInternalBulk with the latter being empty
 * when instanciated from the ZgyReader constructor instead of ZgyWriter.
141
 * FUTURE: The instance holds one or more pointers to the data it
142
143
144
145
146
 */
class ZgyMeta: virtual public IZgyMeta
{
protected:
  /** \brief Handle to the internal metadata layer which this class wraps. */
147
148
  std::shared_ptr<const InternalZGY::ZgyInternalMeta> _meta;

149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
public:

  virtual std::array<int64_t,3>
  size() const override
  {
    return _meta->ih().size();
  }

  virtual SampleDataType
  datatype() const override
  {
    return EnumMapper::mapRawDataTypeToSampleDataType(_meta->ih().datatype());
  }

  virtual std::array<float32_t,2>
  datarange() const override
  {
    if (_meta->ih().datatype() == InternalZGY::RawDataType::Float32)
      return std::array<float32_t,2>{_meta->ih().smin(), _meta->ih().smax()};
    else
169
170
171
172
173
174
175
176
177
178
      return _meta->ih().safe_codingrange();
  }

  virtual std::array<float32_t,2>
  raw_datarange() const override
  {
    if (_meta->ih().datatype() == InternalZGY::RawDataType::Float32)
      return std::array<float32_t,2>{_meta->ih().smin(), _meta->ih().smax()};
    else
      return _meta->ih().raw_codingrange();
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
  }

  virtual UnitDimension
  zunitdim() const override
  {
    return EnumMapper::mapRawVerticalDimensionToUnitDimension(_meta->ih().vdim());
  }

  virtual UnitDimension
  hunitdim() const override
  {
    return EnumMapper::mapRawHorizontalDimensionToUnitDimension(_meta->ih().hdim());
  }

  virtual std::string
  zunitname() const override
  {
    return _meta->ih().vunitname();
  }

  virtual std::string
  hunitname() const override
  {
    return _meta->ih().hunitname();
  }

  virtual float64_t
  zunitfactor() const override
  {
    return _meta->ih().vunitfactor();
  }

  virtual float64_t
  hunitfactor() const override
  {
    return _meta->ih().hunitfactor();
  }

  virtual float32_t
  zstart() const override
  {
    return _meta->ih().orig()[2];
  }

  virtual float32_t
  zinc() const override
  {
    return _meta->ih().inc()[2];
  }

  virtual std::array<float32_t,2>
  annotstart() const override
  {
    return std::array<float32_t,2>{
      _meta->ih().orig()[0],
      _meta->ih().orig()[1]};
  }

  virtual std::array<float32_t,2>
  annotinc() const override
  {
    return std::array<float32_t,2>{
      _meta->ih().inc()[0],
      _meta->ih().inc()[1]};
  }

  virtual const corners_t&
  corners() const
  {
    return _meta->ih().ocp_world();
  }

  virtual const corners_t&
  indexcorners() const
  {
    return _meta->ih().ocp_index();
  }

  virtual const corners_t&
  annotcorners() const
  {
    return _meta->ih().ocp_annot();
  }

  virtual std::array<int64_t,3>
  bricksize() const override
  {
    return _meta->ih().bricksize();
  }

  virtual std::vector<std::array<int64_t,3>>
  brickcount() const override
  {
    return _meta->ih().lodsizes();
  }

  virtual int32_t
  nlods() const override
  {
    return _meta->ih().nlods();
  }

  virtual void
  dump(std::ostream& os) const override
  {
    throw std::runtime_error("Not implemented: ZgyMeta::dump()");
  }

  virtual SampleStatistics
  statistics() const override
  {
    const InternalZGY::IInfoHeaderAccess& ih = _meta->ih();
    return SampleStatistics(ih.scnt(), ih.ssum(), ih.sssq(), ih.smin(), ih.smax());
  }

  virtual SampleHistogram
  histogram() const override
  {
    const InternalZGY::IHistHeaderAccess& hh = _meta->hh();
    return SampleHistogram(hh.samplecount(), hh.minvalue(), hh.maxvalue(),
                         std::vector<std::int64_t>(hh.bins(),
                                                   hh.bins() + hh.bincount()));
  }
};

/**
 * \brief Add coordinate conversion to the concrete ZgyMeta class.
306
307
308
309
310
 *
 * \details Thread safety: See the base class. This specialization
 * does not add any additional concerns because all the new members
 * are const. So they are safe to call concurrently with other read
 * operations.
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
 */
class ZgyMetaAndTools: public ZgyMeta, virtual public IZgyTools
{
public:
  virtual void
  transform(const corners_t& A, const corners_t& B, std::vector<std::array<float64_t,2>>& data) const override
  {
    // If this method is needed then it is fairly simple to implement.
    // Change InternalZGY::generalTransform() to accept a vector of array.
    // transform1() can then be re-implemented in terms of transform.
    throw std::runtime_error("Not implemented: ZgyMetaAndToole::transform()");
  }

  virtual  std::array<float64_t,2>
  transform1(const corners_t& A, const corners_t& B, const std::array<float64_t,2>& point) const override
  {
    float64_t x{point[0]}, y{point[1]};
    if (!InternalZGY::generalTransform
        (A[0][0], A[0][1], A[1][0], A[1][1], A[2][0], A[2][1],
         B[0][0], B[0][1], B[1][0], B[1][1], B[2][0], B[2][1],
         &x, &y, 1))
      throw Errors::ZgyInternalError("Transform is not well defined due to colinear or coincident control points");
    return std::array<float64_t,2>{x, y};
  }

  virtual std::array<float64_t,2>
  annotToIndex(const std::array<float64_t,2>& point) const override
  {
    return transform1(annotcorners(), indexcorners(), point);
  }

  virtual std::array<float64_t,2>
  annotToWorld(const std::array<float64_t,2>& point) const override
  {
    return transform1(annotcorners(), corners(), point);
  }

  virtual std::array<float64_t,2>
  indexToAnnot(const std::array<float64_t,2>& point) const override
  {
    return transform1(indexcorners(), annotcorners(), point);
  }

  virtual std::array<float64_t,2>
  indexToWorld(const std::array<float64_t,2>& point) const override
  {
    return transform1(indexcorners(), corners(), point);
  }

  virtual std::array<float64_t,2>
  worldToAnnot(const std::array<float64_t,2>& point) const override
  {
    return transform1(corners(), annotcorners(), point);
  }

  virtual std::array<float64_t,2>
  worldToIndex(const std::array<float64_t,2>& point) const override
  {
    return transform1(corners(), indexcorners(), point);
  }
};

/**
 * \brief Concrete implementation of IZgyReader.
375
 * \details Thread safety: See the base class.
376
377
378
379
380
 */
class ZgyReader : public ZgyMetaAndTools, virtual public IZgyReader
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
381
  std::shared_ptr<const InternalZGY::ZgyInternalBulk> _accessor;
382
383
384
385
386

public:
  /**
   * \copydoc IZgyReader::open()
   */
387
  ZgyReader(const std::string& filename, const IOContext* iocontext)
388
  {
389
390
391
392
    // Note: Currently the Python version of this constructor has an
    // additional "update" parameter which, if set, causes the underlying
    // file to be opened in read/write mode. This is a kludge to facilitate
    // a stand alone lowres generator. Which was only ever used for testing.
393
    _fd = InternalZGY::FileFactory::instance().create(
394
        filename, InternalZGY::OpenMode::ReadOnly, iocontext);
395
396
397
398
399
400
401
402
403
404
405

    // Set the protected metadata information in the ZgyMeta base class.
    // ZgyInternalMeta does not retain the file descriptor. The file is
    // only used inside the constructor to populate the headers.
    _meta.reset(new InternalZGY::ZgyInternalMeta(this->_fd));

    // At the implementation level the bulk- and meta access are separate,
    // and the bulk accessor needs some of the meta information to work.
    // The accessor will have its own metadata pointer which it holds on to.
    // It also holds on to the file descriptor. Unlike the metadata reader
    // it is obviously not possible to read everything up front.
406
    _accessor.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta, nullptr, false));
407
408
409
410
411
  }

  ~ZgyReader()
  {
    try {
412
413
414
      //close(); // See close() for why this may be a bad idea.
      _accessor.reset();
      _fd.reset();
415
416
417
418
419
    }
    catch (const std::exception& ex) {
      // We should never get here!
      // Caller should have done an explicit close() so it can handle
      // exceptions itself. Exceptions thrown from a destructors are evil.
420
421
422
423
      // Trying to catch exceptions from the two lines above might already
      // be too late. The destructor in _fd does a similar operation
      // (blind catch with logging) which makes it even less likely
      // thet we get here.
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
      std::cerr << "ERROR closing a file opened for read: "
                << (ex.what() ? ex.what() : "(null)")
                << std::endl;
    }
  }

  /**
   * \brief Read an arbitrary region.
   *
   * The data is read into a buffer provided by the caller.
   * The method will apply conversion storage -> float if needed.
   *
   * The start position refers to the specified lod level.
   * At lod 0 start + data.size can be up to the survey size.
   * At lod 1 the maximum is just half that, rounded up.

   * It is valid to pass a size that includes the padding area
   * between the survey and the end of the current brick. But not
   * more. In other words, the limit for lod 0 is actually
   * reader()->size() rounded up to a multiple of reader->bricksize().
   */
  virtual void read(const size3i_t& start, const size3i_t& size, float* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<float> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<float,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, true);
  }

  /**
   * \brief Read an arbitrary region with no conversion.
   *
   * As the read overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
  virtual void read(const size3i_t& start, const size3i_t& size, std::int16_t* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<std::int16_t> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<std::int16_t,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, false);
  }

  /**
   * \brief Read an arbitrary region with no conversion.
   *
   * As the read overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
  virtual void read(const size3i_t& start, const size3i_t& size, std::int8_t* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<std::int8_t> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<std::int8_t,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, false);
  }

  /**
   * \brief Get hint about all constant region.
   *
   * Check to see if the specified region is known to have all samples
   * set to the same value. Returns a pair of (is_const, const_value).
489
490
491
492
493
494
   * If it returns is_const=true you know what the region contains;
   * otherwise you need to use the regular read() method.
   *
   * For int8 and int16 files the caller may specify whether to scale
   * the values or not. Even if unscaled the function returns the value
   * as a double.
495
496
497
498
   *
   * The function only makes inexpensive checks so it might return
   * is_const=false even if the region was in fact constant. It will not
   * make the opposite mistake. This method is only intended as a hint
499
500
501
   * to improve performance. The following figure might clarify how it works.
   * \image html readconst-fig1.png
   * \image latex readconst-fig1.png
502
   *
503
504
505
506
507
508
509
   * If the region requested in readconst() is exactly one ZGY brick
   * then this just checks whether that brick is known to be constant.
   * If called with a larger, arbitrary region it fails unless all
   * bricks in that region are known to be constant and all have the
   * same value. It will not tell you which brick(s) were not const.
   * If you need to know then you must loop over each ZGY brick and
   * make multiple calls to readconst().
510
511
512
513
514
515
516
517
518
519
520
   */
  virtual std::pair<bool,double> readconst(const size3i_t& start, const size3i_t& size, int lod, bool as_float) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    return _accessor->readConstantValue(start, size, lod, as_float);
  }

  /**
   * \brief Close the file and release resources.
   *
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
   * Unlike ZgyWriter::close(), forgetting to close a file that was
   * only open for read is not a major faux pas. It can still lead to
   * problems though.
   *
   *   \li The destructor of _fd will catch and ignore any exceptions
   *       because if a destructor throws then this will normally
   *       cause the application to crash.
   *
   *   \li If a callback is used to refresh the token this will not
   *       happen in our destructor (it won't call xx_close) because
   *       it is too risky to invoke the callback this late. It might
   *       not be valid any longer. This means that if the token has
   *       expired since the last read then the close will fail.
   *       Exactly why SDAPI requires a token just to close a file is
   *       a different question. Possibly this is for removing any
   *       read locks.
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
   */
  void close()
  {
    if (_accessor) {
      _accessor.reset();
    }
    if (_fd) {
      _fd->xx_close();
      _fd.reset();
    }
    // Metadata remains accessible. Not sure whether this is a good idea.
  }
};

/**
 * \brief Concrete implementation of IZgyWriter.
553
 *
554
555
556
557
 * \details Thread safety: This class is single threaded. IZgyWriter::open()
 * may choose to return a wrapper that serializes all access, just in case
 * the application didn't read the memo about no concurrent access.
 * See class ZgySafeWriter for details.
558
559
560
561
562
563
564
565
566
567
 *
 * Const-correctness: Most methods are non-const because writes will update
 * metadata. If not in ZgyWriter itself then in the classes it refers to.
 * The class refers to a mutable ZgyInternalBulk instance. And a mutable
 * ZgyInternalMeta instance that is actually the same as the one in the
 * base class except this one is not declared const. Now for the minor
 * code smell: Any const method (currently just errorflag) will still be
 * able to access those _rw pointers so the "const" declaration hardly
 * protects against anything. There are ways of handling this better.
 * But with just a single method affected I'll let it slide.
568
569
570
571
572
 */
class ZgyWriter : public ZgyMetaAndTools, virtual public IZgyWriter
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
573
574
  std::shared_ptr<InternalZGY::ZgyInternalBulk> _accessor_rw;
  std::shared_ptr<InternalZGY::ZgyInternalMeta> _meta_rw;
575
576
577
578
579
580
581
582
583
584
  mutable bool _dirty; // if true need LOD, stats, histogram.
  compressor_t _compressor;
  compressor_t _lodcompressor;

public:
  /**
   * \copydoc IZgyWriter::open()
   */
  ZgyWriter(const ZgyWriterArgs& args)
    : _fd()
585
    , _accessor_rw()
586
587
588
589
590
591
592
593
594
595
    , _dirty(false)
    , _compressor(args._compressor)
    , _lodcompressor(args._lodcompressor)
  {
    const bool compress = args._compressor || args._lodcompressor;
    // This is both pointless and expensive to support.
    if (compress && args._datatype != SampleDataType::float32)
      throw Errors::ZgyUserError("Compressed files need to be stored as float.");

    // Set the protected metadata information in the ZgyMeta base class.
596
    // Also store a mutable copy in the current class.
597
598
599
600
    // ZgyInternalMeta does not contain any file descriptor. This means
    // we need to hold on to _fd ourselves and provide it when it is time
    // to flush metadata to disk.
    InternalZGY::ZgyInternalWriterArgs iargs = EnumMapper::mapWriterArgs(args);
601
602
    _meta_rw.reset(new InternalZGY::ZgyInternalMeta(iargs));
    _meta = _meta_rw; // in base class
603
604
605
606

    // The file creation was deferred until after the consistency checks.
    _fd = InternalZGY::FileFactory::instance().create(
        args._filename, InternalZGY::OpenMode::Truncate, args._iocontext);
607
    _meta_rw->flushMeta(_fd);
608
609
    // Compress or not at this level only controls alignment etc.
    // The actual compression functor is passed to each write.
610
    _accessor_rw.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta_rw, _meta_rw, compress));
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
  }

  /**
   * \brief Automatically close the file when it goes out of scope.
   *
   * Application code is encouraged to close the file explicitly.
   * The destructor is just there as a fallback. Errors caught
   * in the fallback will be logged to stderr and otherwise ignored.
   */
  virtual ~ZgyWriter()
  {
    try {
      close();
    }
    catch (const Errors::ZgyError& ex) {
      std::cerr << "ERROR: Uncaught ZGY exception closing file: "
                << ex.what() << std::endl;
    }
    catch (const std::exception& ex) {
      std::cerr << "ERROR: Uncaught general exception closing file: "
                << ex.what() << std::endl;
    }
  }

  /**
   * \brief Write an arbitrary region.
   *
   * This will apply conversion float -> storage if needed.
   *
   * A read/modify/write will be done if the region's start and size
   * doesn't align with bricksize. When writing to the cloud this
   * read/modify/write may incur performance and size penalties. So
   * do write brick aligned data if possible. The same applies to
   * writing compressed data where r/m/w can cause a severe
   * loss of quality.
   *
   * The start position refers to the specified lod level.
   * At lod 0 start + data.size can be up to the survey size.
   * At lod 1 the maximum is just half that, rounded up.
   */
651
  void write(const size3i_t& start, const size3i_t& size, const float* data) override
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
  {
    // TODO-Worry: The buffer is supposed to be copied at least once
    // before being sent down to the lowest levels. The longer I wait
    // before I do that, the higher the risk is that it isn't done.
    // Currently there is code in _writeAlignedRegion() that ensures
    // the buffer was copied. But if I do the copy too early then it
    // might be wasted effort since r/m/w or value type conversion
    // would copy it anyway.
    //
    // Why I need a copy:
    //
    //   - I need an ugly const_cast because DataBuffer isn't fully const
    //     aware. The cast is particularly dangerous because the lower
    //     levels might be tempted to do padding and byteswapping in place.
    //     If the compiler doesn't stop this due to a "const" declaration
    //     we get subtle bugs.
    //
    //   - The user's buffer is not reference counted. Which means I need
    //     to use a fake shared_ptr. If the lower levels implement delayed
    //     write and are "smart" about not always copying buffers then
    //     the buffer might be accessed after the function returns.
    std::shared_ptr<float> fakeshared = fake_shared(const_cast<float*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<float,3>(fakeshared, size));
675
    _accessor_rw->writeRegion(buffer, start, 0, false, _compressor);
676
677
678
679
680
681
682
683
684
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
685
  void write(const size3i_t& start, const size3i_t& size, const std::int16_t *data) override
686
687
688
  {
    std::shared_ptr<std::int16_t> fakeshared = fake_shared(const_cast<std::int16_t*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<int16_t,3>(fakeshared, size));
689
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
690
691
692
693
694
695
696
697
698
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
699
  void write(const size3i_t& start, const size3i_t& size, const std::int8_t* data) override
700
701
702
  {
    std::shared_ptr<std::int8_t> fakeshared = fake_shared(const_cast<std::int8_t*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int8_t,3>(fakeshared, size));
703
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
    this->_dirty = true;
  }

  /**
   * \brief Write all-constant data.
   *
   * Works as the corresponding write but the entire region is set
   * to the same value. So the provided data buffer needs just one
   * value, or alternatively can be passed as &scalar_value.
   *
   * Calling this method is faster than filling a buffer with constant
   * values and calling write. But it produces the exact same
   * result. This is because write will automatically detect whether
   * the input buffer is all constant.
   */
719
  void writeconst(const size3i_t& start, const size3i_t& size, const float* data) override
720
721
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<float,3>(*data, size));
722
    _accessor_rw->writeRegion(buffer, start, 0, false, _compressor);
723
724
725
726
727
728
729
730
731
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the writeconst overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
732
  void writeconst(const size3i_t& start, const size3i_t& size, const std::int16_t* data) override
733
734
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int16_t,3>(*data, size));
735
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
736
737
738
739
740
741
742
743
744
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
745
  void writeconst(const size3i_t& start, const size3i_t& size, const std::int8_t* data) override
746
747
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int8_t,3>(*data, size));
748
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
    this->_dirty = true;
  }

  /**
   * \brief Generate low resolution data, statistics, and histogram.
   *
   * This method will be called automatically from close(), but in
   * that case it is not possible to request a progress callback.
   *
   * If the processing raises an exception the data is still marked
   * as clean. Called can force a retry by passing force=True.
   *
   * The C++ code is very different from Python because  it needs
   * an entirely different approach to be performant.
   *
   * \param decimation: Optionally override the decimation algorithms by
   *                    passing an array of DecimationType
   *                    with one entry for each level of detail. If the
   *                    array is too short then the last entry is used for
   *                    subsequent levels.
   * \param progress:   Function(done, total) called to report progress.
   *                    If it returns False the computation is aborted.
771
772
773
   *                    The callback implementation should not invoke any
   *                    OpenZGY methods except for any calls expressly
   *                    documented as safe for this purpose and deadlock-free.
774
775
776
777
   *                    Will be called at least one, even if there is no work.
   * \param force:      If true, generate the low resolution data even if
   *                    it appears to not be needed. Use with caution.
   *                    Especially if writing to the cloud, where data
778
779
780
781
   *                    should only be written once. passing force=true
   *                    is currently required if cerating an empty file,
   *                    no samples written at all, if the statistics and
   *                    histogram data should still be stored.
782
783
784
785
786
787
   */
  void finalize(const std::vector<DecimationType>& decimation,
                const std::function<bool(std::int64_t,std::int64_t)>& progress,
                bool force = false)
  {
    if (this->_dirty || force) {
788
      InternalZGY::DataBuffer::cleartimers(true);
789
790
791
792
793
794
795
      this->_dirty = false;
      std::shared_ptr<InternalZGY::StatisticData> stats;
      std::shared_ptr<InternalZGY::HistogramData> histo;
      // Note: this->_accessor._metadata == this->_meta but the former is
      // private and only used to allow ZgyInternalBulk to use some parts of
      // the metadata. This is why both _accessor and _meta are needed below.
      std::tie(stats, histo) =
796
        InternalZGY::GenLodC(_accessor_rw, _meta_rw, _lodcompressor,
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
                             EnumMapper::mapDecimationTypeToLodAlgorithm(decimation),
                             progress, /*verbose=*/false).call();

      // Statistics and histogram are accumulated in storage units
      // and we want to store user units on the file.
      // Technically I could scale in place since stats and histo
      // shouldn't be used after this so it is ok to clobber them.
      // But I will copy just in case.
      InternalZGY::StatisticData scaled_stats(*stats);
      InternalZGY::HistogramData scaled_histo(*histo);
      const std::array<double,2> factors = this->_meta->ih().storagetofloat();
      scaled_stats.scale(0, 1, factors[1], factors[0] + factors[1]);
      scaled_histo.scale(0, 1, factors[1], factors[0] + factors[1]);
      // TODO-Low: if using a 64k temporary histogram. scaled_histo.resize(256);
      // TODO-Low: Some kind of static_assert making the following cast safe.
      // Note reason for cast: ZGY only deals with int8, int16, float
      // which means that converting the value range to float will not
      // lose any precision. If we later decide to support int32 then
      // this and many similar places need to be updated.
816
      this->_meta_rw->ih().setstats
817
818
819
        (scaled_stats.getcnt(), scaled_stats.getsum(), scaled_stats.getssq(),
         static_cast<float>(scaled_stats.getmin()),
         static_cast<float>(scaled_stats.getmax()));
820
      this->_meta_rw->hh().sethisto(scaled_histo.getmin(), scaled_histo.getmax(),
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
                                 scaled_histo.getbins(), scaled_histo.getsize());
      if (false)
        std::cout << "@Histogram Limits "
                  << histo->getmin() << " " << histo->getmax()
                  << " Converted "
                  << scaled_histo.getmin() << " " << scaled_histo.getmax()
                  << "\n";
    }
    else {
      if (progress)
        progress(0, 0);
    }
  }

  /**
   * \brief Flush the file to disk and close it.
   *
   * This version of close() will not calculate statistics and low
   * resolution bricks. Currently this makes the file useless in most
   * cases. The function may be useful for performance measurements.
   *
   * In the future it might be possible to re-open the file at some
   * later date and continue writing data to it. Calling the regular
   * close() only when all data has been output.
   */
  void close_incomplete()
  {
    if (!this->_fd)
      return;

    if (!this->errorflag()) {
852
      this->_meta_rw->flushMeta(this->_fd);
853
854
855
856
857
    }
    else {
      // Write the headers anyway, but don't complain if it didn't work.
      try {
        this->set_errorflag(false);
858
        this->_meta_rw->flushMeta(this->_fd);
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
        this->set_errorflag(true);
      }
      catch (std::exception&) {
        this->set_errorflag(true);
      }
    }

    // TODO-Low it would be more consistent if the final write was of the
    // updated headers, in case the app crashes before xx_close. This
    // is true for local file access. But for cloud access the last
    // bulk data segment might be written on xx_close() because it is
    // buffered. While the headers can be written at once. Difficult
    // to change without complicating the internal FileADT api.

    // Closing the local or cloud handle is always needed even if
    // there was an unrecoverable error. There might be resources that
    // need to be cleaned up. TODO-Low if any of the above writes raised
    // am exception I should probably still try to close.

    this->_fd->xx_close();
    this->_fd.reset();

    // If errorflag() is set and the file is new or has been
    // successfully written to at least once then the client code is
    // strongly advised to delete the file.
    // TODO-Low: Later: this is in the bells & whistles category.
    //if not self._precious_set_on_open and was_written_to
    //    self._fd.xx_delete_on_close(); self._fd.xx_close()
    //    self._fd.xx_close_if_needed_and_delete()
    //    ZgyUtils(saved_iocontext).delete(self._filename)
  }

  /**
   * \brief Flush the file to disk and close it.
   *
   * If the file has been written to, the application is encouraged to
   * call finalize() before close(). This gives more control over the
   * process and allows using a progress callback to track generation
   * of low resolution data.
   *
   * The function won't bother with statistics, histogram, lowres if
   * there has been an unrecoverable error. The headers might still be
   * written out in case somebody wants to try some forensics.
   *
   * The ZgyWriter destructor will call close() if not done already,
   * but that will catch and swallow any exception. Relying on the
   * destructor to close the file is strongly discouraged.
   */
  void close()
  {
    if (this->_fd && !this->errorflag()) {
      this->finalize(std::vector<DecimationType>
                     {DecimationType::LowPass, DecimationType::WeightedAverage},
                     nullptr);
    }
    close_incomplete();
  }

  /**
   * Return true if this open file has encountered an unrecoverable error.
   * The error should previously have caused an exception to be thrown.
   * If this flag is set, no further writes will be allowed.
   *
   * Application code might check this flag if they are considering
   * trying to recover from an error. Internally the flag is also checked
   * and if set it will (mostly) prevent other writes from being done.
   *
   * Implementation note: Currently the ZgyInternalMeta and ZgyInternalBulk
   * instances each contains an _is_bad member. The reader or writer is
   * considered bad if either of those are set. This scheme improves
   * isolation somewhat, but TODO-Low it might backfire. If writing metadata
   * to file failed, the bulk accessor should probably behave as if it
   * also has seen an error.
   *
   * Currently only the ZgyWriter uses this mechanism. It might not make
   * that much sense in ZgyReader, because as long as opening the file
   * succeded no operation should manage to corrupt it.
936
937
938
939
940
941
   *
   * Implementation note: Unlike most other ZgyWriter members, this one is
   * declared const. But it can still access the mutable _accessor_rw and
   * _meta_rw data members. You'll just have to trust me when I tell you
   * they aren't being modified here. Or refactor somehow to allow the
   * compiler (or at least a runtime check) catch it.
942
943
944
   */
  bool errorflag() const override
  {
945
    return _accessor_rw->errorflag() || _meta_rw->errorflag();
946
947
948
949
950
951
952
953
  }

  /**
   * Force the error flag for this open file to true or false.
   * This should normally be done only for testing.
   */
  void set_errorflag(bool value) override
  {
954
955
    _accessor_rw->set_errorflag(value);
    _meta_rw->set_errorflag(value);
956
957
958
959
960
961
  }
};

/**
 * \brief Concrete implementation of IZgyUtils.
 *
962
963
964
965
966
 * \details Thread safety: Depends on the function being executed.
 * Currently implemented methods just forward the requests to SDAPI
 * or some other cloud back end. So thread safety depends on the
 * cloud provider.
 *
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
 * \internal TODO-Low: One utility class per backend plug-in.
 * This will be a pain to maintain though; as the
 * Python wrapper will need to do the same.
 */
class ZgyUtils : public IZgyUtils
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
  std::shared_ptr<InternalZGY::FileUtilsSeismicStore> _fd_sd;

public:
  /**
   * \copydoc IZgyUtils::utils()
   */
  ZgyUtils(const std::string& prefix, const IOContext* iocontext)
  {
    _fd = InternalZGY::FileFactory::instance().create
      (prefix, InternalZGY::OpenMode::Closed, iocontext);
    // TODO-Low: Use some kind of factory here.
#ifdef HAVE_SD
    _fd_sd = std::dynamic_pointer_cast<InternalZGY::FileUtilsSeismicStore>(_fd);
#endif
  }

  void deletefile(const std::string& filename, bool missing_ok)
  {
    if (_fd_sd)
      _fd_sd->deleteFile(filename, missing_ok);
    else
      throw Errors::ZgyUserError("deletefile is not supported for this backend");
  }
998
999
1000

  std::string alturl(const std::string& filename)
  {
For faster browsing, not all history is shown. View entire blame