api.cpp 68.5 KB
Newer Older
1
// Copyright 2017-2021, Schlumberger
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "api.h"
#include "exception.h"
17
#include "safewriter.h"
18
19
20
21
22
23
24
25
26
27
28
29
#include "impl/enum.h"
#include "impl/file.h"
#include "impl/file_sd.h"
#include "impl/meta.h"
#include "impl/bulk.h"
#include "impl/databuffer.h"
#include "impl/transform.h"
#include "impl/lodalgo.h"
#include "impl/statisticdata.h"
#include "impl/histogramdata.h"
#include "impl/genlod.h"
#include "impl/compression.h"
30
#include "impl/guid.h"
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83

#ifdef _MSC_VER
#pragma warning(push)
// The warning is due to me using pure virtual classes as "real" interfaces,
// having a similar inheritance scheme for interfaces and implementation.
// I believe this is safe as long as the interfaces are truly pure.
#pragma warning(disable:4250) // inherits via dominance
#endif

/**
 * \file api.cpp
 * \brief Implements the pure interfaces of the API.
 *
 * \internal TODO-Doc consider moving all doxygen comments to the interface
 * and exclude the concrete types. Unfortunately than might make the comments
 * harder to keep up to date.
 */

namespace {
  /**
   * \brief Make a fake shared_ptr from a plain unsafe pointer.
   *
   * Used when a method or a data member expects a smart pointer while
   * the application has an unsafe pointer is guaranteed to remain
   * valid long enough. The returned smart pointer does nothing on
   * cleanup.
   *
   * CAVEAT: Using this method indicates a code smell.
   * Note: The method is duplicated in bulk.cpp, databuffer.cpp,
   * and api.cpp. There isn't much point in consolidatimg them.
   */
  template<typename T>
  static inline std::shared_ptr<T>
  fake_shared(T* p)
  {
    return std::shared_ptr<T>(p, [](T* p){});
  }
}

namespace OpenZGY {
  class IOContext;
}

namespace OpenZGY { namespace Impl {
#if 0
}}
#endif

/** \cond IMPL */ // Doxygen needs api.cpp, but not all of it.

/**
 * \brief convert between internal and external data types.
 *
84
85
86
 * \details Thread safety:
 * Thread safe because the class only contains static methods with no state.
 *
87
88
89
90
91
 * \internal
 * TODO-Test: Move to a separate file to allow it to be unit tested.
 */
class EnumMapper
{
92
93
94
  EnumMapper() = delete;
  EnumMapper(const EnumMapper&) = delete;
  EnumMapper& operator=(const EnumMapper&) = delete;
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
public:
  static SampleDataType mapRawDataTypeToSampleDataType(InternalZGY::RawDataType);
  static UnitDimension  mapRawHorizontalDimensionToUnitDimension(InternalZGY::RawHorizontalDimension);
  static UnitDimension  mapRawVerticalDimensionToUnitDimension(InternalZGY::RawVerticalDimension);
  static InternalZGY::RawDataType            mapSampleDataTypeToRawDataType(SampleDataType);
  static InternalZGY::RawHorizontalDimension mapUnitDimensionToRawHorizontalDimension(UnitDimension);
  static InternalZGY::RawVerticalDimension   mapUnitDimensionToRawVerticalDimension(UnitDimension);
  static InternalZGY::ZgyInternalWriterArgs mapWriterArgs(const ZgyWriterArgs&);
  static InternalZGY::LodAlgorithm mapDecimationTypeToLodAlgorithm(DecimationType value);
  static std::vector<InternalZGY::LodAlgorithm> mapDecimationTypeToLodAlgorithm(const std::vector<DecimationType>& values);
};

/** \endcond */

/**
 * \brief High level API for reading and writing ZGY files.
 *
 * The base class contains properties common to both reader and writer.
 *
 * The constructor should logically have had a ZgyInternalMeta parameter
 * for accessing the implementation layer. But due to the way the code
 * is structured the _meta pointer needs to be set in the constructor
 * for the leaf types. The _meta pointer is guaranteed to not be empty
 * except while constructig the instance.
 *
 * Both ZgyReader and ZgyWriter need to retain a pointer to the file
 * descriptor. Primarily in order to explcitly close it. Relying on the
 * shared_ptr to do this when going out of scope is dangerous because
 * of exception handling. ZgyWriter additionally needs it when flushing
 * metadata to disk. Since there is no file descriptor in ZgyInternalMeta.
 *
 * Both ZgyReader and ZgyWriter need a ZgyInternalBulk pointer to do
 * bulk I/O. That instance in turn keeps private references to the
 * file descriptor and the metadata but is not supposed to expose them.
 *
 * The FileADT and ZgyInternalBulk pointers can be declared here since
 * both the reader and the writer needs them. Or removed from here and
 * duplicated in ZgyReader and ZgyWriter.
133
 *
134
 * Thread safety:
135
136
137
138
 * The following applies both to this class and derived types.
 * Modification may lead to a data race. The intent for this class is
 * that any calls a ZgyReader instance might make will not modify data.
 * Except possibly changes done while the file is being opened.
139
140
141
 * To help enforce this, both const and mutable pointers are used for
 * ZgyInternalMeta and ZgyInternalBulk with the latter being empty
 * when instanciated from the ZgyReader constructor instead of ZgyWriter.
142
 * FUTURE: The instance holds one or more pointers to the data it
143
144
145
146
147
 */
class ZgyMeta: virtual public IZgyMeta
{
protected:
  /** \brief Handle to the internal metadata layer which this class wraps. */
148
149
  std::shared_ptr<const InternalZGY::ZgyInternalMeta> _meta;

150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
public:

  virtual std::array<int64_t,3>
  size() const override
  {
    return _meta->ih().size();
  }

  virtual SampleDataType
  datatype() const override
  {
    return EnumMapper::mapRawDataTypeToSampleDataType(_meta->ih().datatype());
  }

  virtual std::array<float32_t,2>
  datarange() const override
  {
    if (_meta->ih().datatype() == InternalZGY::RawDataType::Float32)
      return std::array<float32_t,2>{_meta->ih().smin(), _meta->ih().smax()};
    else
170
171
172
173
174
175
176
177
178
179
      return _meta->ih().safe_codingrange();
  }

  virtual std::array<float32_t,2>
  raw_datarange() const override
  {
    if (_meta->ih().datatype() == InternalZGY::RawDataType::Float32)
      return std::array<float32_t,2>{_meta->ih().smin(), _meta->ih().smax()};
    else
      return _meta->ih().raw_codingrange();
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
  }

  virtual UnitDimension
  zunitdim() const override
  {
    return EnumMapper::mapRawVerticalDimensionToUnitDimension(_meta->ih().vdim());
  }

  virtual UnitDimension
  hunitdim() const override
  {
    return EnumMapper::mapRawHorizontalDimensionToUnitDimension(_meta->ih().hdim());
  }

  virtual std::string
  zunitname() const override
  {
    return _meta->ih().vunitname();
  }

  virtual std::string
  hunitname() const override
  {
    return _meta->ih().hunitname();
  }

  virtual float64_t
  zunitfactor() const override
  {
    return _meta->ih().vunitfactor();
  }

  virtual float64_t
  hunitfactor() const override
  {
    return _meta->ih().hunitfactor();
  }

  virtual float32_t
  zstart() const override
  {
    return _meta->ih().orig()[2];
  }

  virtual float32_t
  zinc() const override
  {
    return _meta->ih().inc()[2];
  }

  virtual std::array<float32_t,2>
  annotstart() const override
  {
    return std::array<float32_t,2>{
      _meta->ih().orig()[0],
      _meta->ih().orig()[1]};
  }

  virtual std::array<float32_t,2>
  annotinc() const override
  {
    return std::array<float32_t,2>{
      _meta->ih().inc()[0],
      _meta->ih().inc()[1]};
  }

  virtual const corners_t&
  corners() const
  {
    return _meta->ih().ocp_world();
  }

  virtual const corners_t&
  indexcorners() const
  {
    return _meta->ih().ocp_index();
  }

  virtual const corners_t&
  annotcorners() const
  {
    return _meta->ih().ocp_annot();
  }

  virtual std::array<int64_t,3>
  bricksize() const override
  {
    return _meta->ih().bricksize();
  }

  virtual std::vector<std::array<int64_t,3>>
  brickcount() const override
  {
    return _meta->ih().lodsizes();
  }

  virtual int32_t
  nlods() const override
  {
    return _meta->ih().nlods();
  }

282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
  // Currently not needed by any client.
  //virtual std::string dataid() const override
  //{
  //  return InternalZGY::GUID(_meta->ih().dataid()).toString();
  //}

  virtual std::string verid()  const override
  {
    return InternalZGY::GUID(_meta->ih().verid()).toString();
  }

  // Currently not needed by any client.
  //virtual std::string previd() const override
  //{
  //  return InternalZGY::GUID(_meta->ih().previd()).toString();
  //}

299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
  virtual void
  dump(std::ostream& os) const override
  {
    throw std::runtime_error("Not implemented: ZgyMeta::dump()");
  }

  virtual SampleStatistics
  statistics() const override
  {
    const InternalZGY::IInfoHeaderAccess& ih = _meta->ih();
    return SampleStatistics(ih.scnt(), ih.ssum(), ih.sssq(), ih.smin(), ih.smax());
  }

  virtual SampleHistogram
  histogram() const override
  {
    const InternalZGY::IHistHeaderAccess& hh = _meta->hh();
316
317
318
319
320
321
    if (hh.bincount() > 0 && hh.bins() != nullptr)
      return SampleHistogram(hh.samplecount(), hh.minvalue(), hh.maxvalue(),
                             std::vector<std::int64_t>(hh.bins(),
                                                       hh.bins() + hh.bincount()));
    else
      return SampleHistogram();
322
  }
323

324
  std::shared_ptr<const FileStatistics> filestats_nocache() const
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
  {
    using InternalZGY::LookupTable;
    using InternalZGY::BrickStatus;

    const std::int64_t bytesperalpha = _meta->ih().bytesperalpha();
    const std::int64_t bytesperbrick = _meta->ih().bytesperbrick();
    const std::vector<std::uint64_t>& alup = _meta->alup().lup();
    const std::vector<std::uint64_t>& blup = _meta->blup().lup();
    const std::vector<std::uint64_t>& bend = _meta->blup().lupend();

    FileStatistics result;
    result._file_version = _meta->fh().version();
    result._alpha_normal_size_per_entry = bytesperalpha;
    result._brick_normal_size_per_entry = bytesperbrick;
    // result._file_size = _fd->xx_size(); Available in ZgyReader and ZgyWriter only.

    // TODO-Low: Fix this kludge.
    // I happen to know that in V3 and V4 the headers are all stored
    // consecutively and the brick lookup table comes last.
    result._header_size = _meta->oh().bricklupoff() + _meta->oh().bricklupsize();

    const std::vector<std::array<std::int64_t,3>>& lodsizes = _meta->ih().lodsizes();
    const std::vector<std::int64_t>& alphaoffsets = _meta->ih().alphaoffsets();
    const std::vector<std::int64_t>& brickoffsets = _meta->ih().brickoffsets();
    // This had been much simpler if I made a getBrickFilePositionByIndex.
    // And possibly moving part of this code inside class LookupTable.
    // I only need to iterate over the contents of alup, blup, bend
    // but here I need to iterate over brick positiom and let class
    // LookupTable convert that back to a raw index.
    std::array<std::int64_t,3> size = _meta->ih().size();
    const std::array<std::int64_t,3> bs = _meta->ih().bricksize();
    for (std::int64_t lod= 0; lod < _meta->ih().nlods(); ++lod) {
      for (std::int64_t ii = 0; ii < size[0]; ii += bs[0]) {
        for (std::int64_t jj = 0; jj < size[1]; jj += bs[1]) {
          LookupTable::LutInfo info =
            LookupTable::getAlphaFilePosition
            (ii/bs[0], jj/bs[1], lod,
             lodsizes,
             alphaoffsets, alup, /*aend,*/
             bytesperalpha);
          //std::cout<< zgydump_format(ii/bs[0], jj/bs[1], 0, lod, info) << "\n";
          switch (info.status) {
          case BrickStatus::Missing:  result._alpha_missing_count  += 1; break;
          case BrickStatus::Constant: result._alpha_constant_count += 1; break;
          case BrickStatus::Normal:
            result._alpha_normal_count   += 1;
            break;
          case BrickStatus::Compressed:
            result._alpha_compressed_count += 1;
            result._alpha_compressed_size += info.size_in_file;
            break;
          }
          for (std::int64_t kk = 0; kk < size[2]; kk += bs[2]) {
            LookupTable::LutInfo info =
              LookupTable::getBrickFilePosition
              (ii/bs[0], jj/bs[1], kk/bs[2], lod,
               lodsizes,
               brickoffsets, blup, bend,
               bytesperbrick);
            //std::cout << zgydump_format(ii/bs[0], jj/bs[1], kk/bs[2], lod, info) << "\n";
            switch (info.status) {
            case BrickStatus::Missing:  result._brick_missing_count  += 1; break;
            case BrickStatus::Constant: result._brick_constant_count += 1; break;
            case BrickStatus::Normal:
              result._brick_normal_count   += 1; break;
            case BrickStatus::Compressed:
              result._brick_compressed_count += 1;
              result._brick_compressed_size += info.size_in_file;
              break;
            }
          }
        }
      }
      size[0] = (size[0]+1)/2;
      size[1] = (size[1]+1)/2;
      size[2] = (size[2]+1)/2;
    }

    // TODO-Low: Keep track of wasted_size and padding_size.
    // Padding gets added in ZgyInternalMeta::flushMeta(). I need to
    // replicate the logic here. The alternative is to scan for the
    // lowest brick offset. But even that isn't completely reliable
    // because there might be wasted blocks between end of padding and
    // start of first block. And, do I really care at all?
    //result._padding_size = roundup(result._header_size,
    //                              result._brick_size_per_entry);
    //result._wasted_size = result._file_size - result._usedSize();

    // DERIVED INFORMATION:
    // The following could also have been generated on the fly in some
    // member function. I pre-calculate it here instead, to limit the
    // amount of code visible in the public api.h header file.

    // File size not including padding and holes.
    result._used_size =
      ((result._alpha_normal_count * result._alpha_normal_size_per_entry) + result._alpha_compressed_size +
       (result._brick_normal_count * result._brick_normal_size_per_entry) + result._brick_compressed_size +
       result._header_size);

    // As used_size if the file is/was uncompressed.
    result._used_if_uncompressed =
      (((result._alpha_normal_count + result._alpha_compressed_count) * result._alpha_normal_size_per_entry) +
       ((result._brick_normal_count + result._brick_compressed_count) * result._brick_normal_size_per_entry) +
       result._header_size);

    // Is there at least one brick flagged as compressed?
    result._is_compressed =
      (result._alpha_compressed_count + result._brick_compressed_count > 0);

    // Relative size of this possibly compressed file compared to uncompressed.
    result._compression_factor =
      (result._used_if_uncompressed > 0 ?
       result._used_size / (double)result._used_if_uncompressed :
       1);
    // Slightly different definition of compression factor.
    // Doesn't work because file_size not set yet.
    // Besides, I like the other one better.
    //result._compression_factor =
    //  (result._file_size > 0 ?
    //   result._used_size + (result._file_size-(result._used_if_uncompressed)) / (double)result._file_size:
    //   1);

447
    return std::shared_ptr<const FileStatistics>(new FileStatistics(result));
448
  }
449
450
451
452
453
454
455
456
457

  /**
   * This method should be overwridden in ZgyReader and ZgyWriter.
   * If it is ok to have ZgyMeta be an abstract type then this
   * implementation can simply be removed.
   *
   * Calling this generic version of filestats() method will not
   * populate the file size and will not do any caching.
   */
458
  virtual std::shared_ptr<const FileStatistics> filestats() const override
459
460
461
  {
    return filestats_nocache();
  }
462
463
464
465
};

/**
 * \brief Add coordinate conversion to the concrete ZgyMeta class.
466
467
468
469
470
 *
 * \details Thread safety: See the base class. This specialization
 * does not add any additional concerns because all the new members
 * are const. So they are safe to call concurrently with other read
 * operations.
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
 */
class ZgyMetaAndTools: public ZgyMeta, virtual public IZgyTools
{
public:
  virtual void
  transform(const corners_t& A, const corners_t& B, std::vector<std::array<float64_t,2>>& data) const override
  {
    // If this method is needed then it is fairly simple to implement.
    // Change InternalZGY::generalTransform() to accept a vector of array.
    // transform1() can then be re-implemented in terms of transform.
    throw std::runtime_error("Not implemented: ZgyMetaAndToole::transform()");
  }

  virtual  std::array<float64_t,2>
  transform1(const corners_t& A, const corners_t& B, const std::array<float64_t,2>& point) const override
  {
    float64_t x{point[0]}, y{point[1]};
    if (!InternalZGY::generalTransform
        (A[0][0], A[0][1], A[1][0], A[1][1], A[2][0], A[2][1],
         B[0][0], B[0][1], B[1][0], B[1][1], B[2][0], B[2][1],
         &x, &y, 1))
      throw Errors::ZgyInternalError("Transform is not well defined due to colinear or coincident control points");
    return std::array<float64_t,2>{x, y};
  }

  virtual std::array<float64_t,2>
  annotToIndex(const std::array<float64_t,2>& point) const override
  {
    return transform1(annotcorners(), indexcorners(), point);
  }

  virtual std::array<float64_t,2>
  annotToWorld(const std::array<float64_t,2>& point) const override
  {
    return transform1(annotcorners(), corners(), point);
  }

  virtual std::array<float64_t,2>
  indexToAnnot(const std::array<float64_t,2>& point) const override
  {
    return transform1(indexcorners(), annotcorners(), point);
  }

  virtual std::array<float64_t,2>
  indexToWorld(const std::array<float64_t,2>& point) const override
  {
    return transform1(indexcorners(), corners(), point);
  }

  virtual std::array<float64_t,2>
  worldToAnnot(const std::array<float64_t,2>& point) const override
  {
    return transform1(corners(), annotcorners(), point);
  }

  virtual std::array<float64_t,2>
  worldToIndex(const std::array<float64_t,2>& point) const override
  {
    return transform1(corners(), indexcorners(), point);
  }
};

/**
 * \brief Concrete implementation of IZgyReader.
535
 * \details Thread safety: See the base class.
536
537
538
539
540
 */
class ZgyReader : public ZgyMetaAndTools, virtual public IZgyReader
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
541
  std::shared_ptr<const InternalZGY::ZgyInternalBulk> _accessor;
542
543
  std::shared_ptr<const FileStatistics> _filestats;
  mutable std::mutex _filestats_mutex;
544
545
546
547
548

public:
  /**
   * \copydoc IZgyReader::open()
   */
549
  ZgyReader(const std::string& filename, const IOContext* iocontext)
550
  {
551
552
553
554
    // Note: Currently the Python version of this constructor has an
    // additional "update" parameter which, if set, causes the underlying
    // file to be opened in read/write mode. This is a kludge to facilitate
    // a stand alone lowres generator. Which was only ever used for testing.
555
    _fd = InternalZGY::FileFactory::instance().create(
556
        filename, InternalZGY::OpenMode::ReadOnly, iocontext);
557
558
559
560
561
562
563
564
565
566
567

    // Set the protected metadata information in the ZgyMeta base class.
    // ZgyInternalMeta does not retain the file descriptor. The file is
    // only used inside the constructor to populate the headers.
    _meta.reset(new InternalZGY::ZgyInternalMeta(this->_fd));

    // At the implementation level the bulk- and meta access are separate,
    // and the bulk accessor needs some of the meta information to work.
    // The accessor will have its own metadata pointer which it holds on to.
    // It also holds on to the file descriptor. Unlike the metadata reader
    // it is obviously not possible to read everything up front.
568
    _accessor.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta, nullptr, false));
569
570
571
572
573
  }

  ~ZgyReader()
  {
    try {
574
575
576
      //close(); // See close() for why this may be a bad idea.
      _accessor.reset();
      _fd.reset();
577
578
579
580
581
    }
    catch (const std::exception& ex) {
      // We should never get here!
      // Caller should have done an explicit close() so it can handle
      // exceptions itself. Exceptions thrown from a destructors are evil.
582
583
584
585
      // Trying to catch exceptions from the two lines above might already
      // be too late. The destructor in _fd does a similar operation
      // (blind catch with logging) which makes it even less likely
      // thet we get here.
586
587
588
589
590
591
592
593
594
595
596
597
      std::cerr << "ERROR closing a file opened for read: "
                << (ex.what() ? ex.what() : "(null)")
                << std::endl;
    }
  }

  /**
   * \brief Read an arbitrary region.
   *
   * The data is read into a buffer provided by the caller.
   * The method will apply conversion storage -> float if needed.
   *
598
599
   * Data is ordered inline(slowest), crossline, vertical(fastest).
   *
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
   * The start position refers to the specified lod level.
   * At lod 0 start + data.size can be up to the survey size.
   * At lod 1 the maximum is just half that, rounded up.

   * It is valid to pass a size that includes the padding area
   * between the survey and the end of the current brick. But not
   * more. In other words, the limit for lod 0 is actually
   * reader()->size() rounded up to a multiple of reader->bricksize().
   */
  virtual void read(const size3i_t& start, const size3i_t& size, float* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<float> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<float,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, true);
Paal Kvamme's avatar
Paal Kvamme committed
616
617
618
    databuffer.reset();
    if (!fakeshared.unique()) // Actually a fatal error.
      throw Errors::ZgyInternalError("A Reference to the user's buffer was retained.");
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
  }

  /**
   * \brief Read an arbitrary region with no conversion.
   *
   * As the read overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
  virtual void read(const size3i_t& start, const size3i_t& size, std::int16_t* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<std::int16_t> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<std::int16_t,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, false);
Paal Kvamme's avatar
Paal Kvamme committed
634
635
636
    databuffer.reset();
    if (!fakeshared.unique()) // Actually a fatal error.
      throw Errors::ZgyInternalError("A Reference to the user's buffer was retained.");
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
  }

  /**
   * \brief Read an arbitrary region with no conversion.
   *
   * As the read overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
  virtual void read(const size3i_t& start, const size3i_t& size, std::int8_t* data, int lod) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    std::shared_ptr<std::int8_t> fakeshared = fake_shared(data);
    auto databuffer = std::make_shared<InternalZGY::DataBufferNd<std::int8_t,3>>(fakeshared, size);
    _accessor->readToExistingBuffer(databuffer, start, lod, false);
Paal Kvamme's avatar
Paal Kvamme committed
652
653
654
    databuffer.reset();
    if (!fakeshared.unique()) // Actually a fatal error.
      throw Errors::ZgyInternalError("A Reference to the user's buffer was retained.");
655
656
657
658
659
660
661
  }

  /**
   * \brief Get hint about all constant region.
   *
   * Check to see if the specified region is known to have all samples
   * set to the same value. Returns a pair of (is_const, const_value).
662
663
664
665
666
667
   * If it returns is_const=true you know what the region contains;
   * otherwise you need to use the regular read() method.
   *
   * For int8 and int16 files the caller may specify whether to scale
   * the values or not. Even if unscaled the function returns the value
   * as a double.
668
669
670
671
   *
   * The function only makes inexpensive checks so it might return
   * is_const=false even if the region was in fact constant. It will not
   * make the opposite mistake. This method is only intended as a hint
672
673
674
   * to improve performance. The following figure might clarify how it works.
   * \image html readconst-fig1.png
   * \image latex readconst-fig1.png
675
   *
676
677
678
679
680
681
682
   * If the region requested in readconst() is exactly one ZGY brick
   * then this just checks whether that brick is known to be constant.
   * If called with a larger, arbitrary region it fails unless all
   * bricks in that region are known to be constant and all have the
   * same value. It will not tell you which brick(s) were not const.
   * If you need to know then you must loop over each ZGY brick and
   * make multiple calls to readconst().
683
684
685
686
687
688
689
690
691
692
693
   */
  virtual std::pair<bool,double> readconst(const size3i_t& start, const size3i_t& size, int lod, bool as_float) const override
  {
    if (!_accessor)
      throw Errors::ZgyUserError("ZGY file not open for read");
    return _accessor->readConstantValue(start, size, lod, as_float);
  }

  /**
   * \brief Close the file and release resources.
   *
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
   * Unlike ZgyWriter::close(), forgetting to close a file that was
   * only open for read is not a major faux pas. It can still lead to
   * problems though.
   *
   *   \li The destructor of _fd will catch and ignore any exceptions
   *       because if a destructor throws then this will normally
   *       cause the application to crash.
   *
   *   \li If a callback is used to refresh the token this will not
   *       happen in our destructor (it won't call xx_close) because
   *       it is too risky to invoke the callback this late. It might
   *       not be valid any longer. This means that if the token has
   *       expired since the last read then the close will fail.
   *       Exactly why SDAPI requires a token just to close a file is
   *       a different question. Possibly this is for removing any
   *       read locks.
710
711
712
713
714
715
716
717
718
719
720
721
   */
  void close()
  {
    if (_accessor) {
      _accessor.reset();
    }
    if (_fd) {
      _fd->xx_close();
      _fd.reset();
    }
    // Metadata remains accessible. Not sure whether this is a good idea.
  }
722
723

  /**
724
725
726
   * Get the file statistics of a file currently opened for read.
   * The result does not change while the file is open, so it can
   * be cached here.
727
   */
728
  virtual std::shared_ptr<const FileStatistics> filestats() const override
729
  {
730
    if (!_filestats) {
731
732
      std::shared_ptr<FileStatistics> result
        (new FileStatistics(*filestats_nocache()));
733
      // The base class has no _fd member so I need to set the size here.
734
      result->_file_size = _fd->xx_eof();
735
736
737
      {
        // Too bad there is no proper atomic_shared_ptr yet.
        std::lock_guard<std::mutex> lk(_filestats_mutex);
738
739
        // The cache is semantically const.
        const_cast<ZgyReader*>(this)->_filestats = result;
740
741
742
      }
    }
    std::lock_guard<std::mutex> lk(_filestats_mutex);
743
    return _filestats;
744
  }
745
746
747
748
};

/**
 * \brief Concrete implementation of IZgyWriter.
749
 *
750
751
752
753
 * \details Thread safety: This class is single threaded. IZgyWriter::open()
 * may choose to return a wrapper that serializes all access, just in case
 * the application didn't read the memo about no concurrent access.
 * See class ZgySafeWriter for details.
754
755
756
757
758
759
760
761
762
763
 *
 * Const-correctness: Most methods are non-const because writes will update
 * metadata. If not in ZgyWriter itself then in the classes it refers to.
 * The class refers to a mutable ZgyInternalBulk instance. And a mutable
 * ZgyInternalMeta instance that is actually the same as the one in the
 * base class except this one is not declared const. Now for the minor
 * code smell: Any const method (currently just errorflag) will still be
 * able to access those _rw pointers so the "const" declaration hardly
 * protects against anything. There are ways of handling this better.
 * But with just a single method affected I'll let it slide.
764
765
766
767
768
 */
class ZgyWriter : public ZgyMetaAndTools, virtual public IZgyWriter
{
private:
  std::shared_ptr<InternalZGY::FileADT> _fd;
769
770
  std::shared_ptr<InternalZGY::ZgyInternalBulk> _accessor_rw;
  std::shared_ptr<InternalZGY::ZgyInternalMeta> _meta_rw;
771
772
773
774
775
776
777
778
779
780
  mutable bool _dirty; // if true need LOD, stats, histogram.
  compressor_t _compressor;
  compressor_t _lodcompressor;

public:
  /**
   * \copydoc IZgyWriter::open()
   */
  ZgyWriter(const ZgyWriterArgs& args)
    : _fd()
781
    , _accessor_rw()
782
783
    , _dirty(false)
    , _compressor(args._compressor)
784
    , _lodcompressor(args._lodcompressor ? args._lodcompressor : args._compressor)
785
786
787
788
789
790
791
  {
    const bool compress = args._compressor || args._lodcompressor;
    // This is both pointless and expensive to support.
    if (compress && args._datatype != SampleDataType::float32)
      throw Errors::ZgyUserError("Compressed files need to be stored as float.");

    // Set the protected metadata information in the ZgyMeta base class.
792
    // Also store a mutable copy in the current class.
793
794
795
796
    // ZgyInternalMeta does not contain any file descriptor. This means
    // we need to hold on to _fd ourselves and provide it when it is time
    // to flush metadata to disk.
    InternalZGY::ZgyInternalWriterArgs iargs = EnumMapper::mapWriterArgs(args);
797
    _meta_rw.reset(new InternalZGY::ZgyInternalMeta(iargs, compress));
798
    _meta = _meta_rw; // in base class
799
800
801
802

    // The file creation was deferred until after the consistency checks.
    _fd = InternalZGY::FileFactory::instance().create(
        args._filename, InternalZGY::OpenMode::Truncate, args._iocontext);
803
    _meta_rw->flushMeta(_fd);
804
805
    // Compress or not at this level only controls alignment etc.
    // The actual compression functor is passed to each write.
806
    _accessor_rw.reset(new InternalZGY::ZgyInternalBulk(_fd, _meta_rw, _meta_rw, compress));
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
  }

  /**
   * \brief Automatically close the file when it goes out of scope.
   *
   * Application code is encouraged to close the file explicitly.
   * The destructor is just there as a fallback. Errors caught
   * in the fallback will be logged to stderr and otherwise ignored.
   */
  virtual ~ZgyWriter()
  {
    try {
      close();
    }
    catch (const Errors::ZgyError& ex) {
      std::cerr << "ERROR: Uncaught ZGY exception closing file: "
                << ex.what() << std::endl;
    }
    catch (const std::exception& ex) {
      std::cerr << "ERROR: Uncaught general exception closing file: "
                << ex.what() << std::endl;
    }
  }

  /**
   * \brief Write an arbitrary region.
   *
   * This will apply conversion float -> storage if needed.
   *
836
837
   * Data is ordered inline(slowest), crossline, vertical(fastest).
   *
838
839
840
841
842
843
844
845
846
847
848
   * A read/modify/write will be done if the region's start and size
   * doesn't align with bricksize. When writing to the cloud this
   * read/modify/write may incur performance and size penalties. So
   * do write brick aligned data if possible. The same applies to
   * writing compressed data where r/m/w can cause a severe
   * loss of quality.
   *
   * The start position refers to the specified lod level.
   * At lod 0 start + data.size can be up to the survey size.
   * At lod 1 the maximum is just half that, rounded up.
   */
849
  void write(const size3i_t& start, const size3i_t& size, const float* data) override
850
  {
851
852
853
    if (errorflag())
      throw Errors::ZgyCorruptedFile("Cannot continue due to previous errors.");

854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
    // TODO-Worry: The buffer is supposed to be copied at least once
    // before being sent down to the lowest levels. The longer I wait
    // before I do that, the higher the risk is that it isn't done.
    // Currently there is code in _writeAlignedRegion() that ensures
    // the buffer was copied. But if I do the copy too early then it
    // might be wasted effort since r/m/w or value type conversion
    // would copy it anyway.
    //
    // Why I need a copy:
    //
    //   - I need an ugly const_cast because DataBuffer isn't fully const
    //     aware. The cast is particularly dangerous because the lower
    //     levels might be tempted to do padding and byteswapping in place.
    //     If the compiler doesn't stop this due to a "const" declaration
    //     we get subtle bugs.
    //
    //   - The user's buffer is not reference counted. Which means I need
    //     to use a fake shared_ptr. If the lower levels implement delayed
    //     write and are "smart" about not always copying buffers then
    //     the buffer might be accessed after the function returns.
    std::shared_ptr<float> fakeshared = fake_shared(const_cast<float*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<float,3>(fakeshared, size));
876
    _accessor_rw->writeRegion(buffer, start, 0, false, _compressor);
877
    this->_dirty = true;
Paal Kvamme's avatar
Paal Kvamme committed
878
879
880
    buffer.reset();
    if (!fakeshared.unique()) // Actually a fatal error.
      throw Errors::ZgyInternalError("A Reference to the user's buffer was retained.");
881
882
883
884
885
886
887
888
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
889
  void write(const size3i_t& start, const size3i_t& size, const std::int16_t *data) override
890
891
892
  {
    std::shared_ptr<std::int16_t> fakeshared = fake_shared(const_cast<std::int16_t*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<int16_t,3>(fakeshared, size));
893
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
894
    this->_dirty = true;
Paal Kvamme's avatar
Paal Kvamme committed
895
896
897
    buffer.reset();
    if (!fakeshared.unique()) // Actually a fatal error.
      throw Errors::ZgyInternalError("A Reference to the user's buffer was retained.");
898
899
900
901
902
903
904
905
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
906
  void write(const size3i_t& start, const size3i_t& size, const std::int8_t* data) override
907
908
909
  {
    std::shared_ptr<std::int8_t> fakeshared = fake_shared(const_cast<std::int8_t*>(data));
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int8_t,3>(fakeshared, size));
910
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
911
    this->_dirty = true;
Paal Kvamme's avatar
Paal Kvamme committed
912
913
914
    buffer.reset();
    if (!fakeshared.unique()) // Actually a fatal error.
      throw Errors::ZgyInternalError("A Reference to the user's buffer was retained.");
915
916
917
918
919
920
921
922
923
924
925
926
927
928
  }

  /**
   * \brief Write all-constant data.
   *
   * Works as the corresponding write but the entire region is set
   * to the same value. So the provided data buffer needs just one
   * value, or alternatively can be passed as &scalar_value.
   *
   * Calling this method is faster than filling a buffer with constant
   * values and calling write. But it produces the exact same
   * result. This is because write will automatically detect whether
   * the input buffer is all constant.
   */
929
  void writeconst(const size3i_t& start, const size3i_t& size, const float* data) override
930
931
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<float,3>(*data, size));
932
    _accessor_rw->writeRegion(buffer, start, 0, false, _compressor);
933
934
935
936
937
938
939
940
941
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the writeconst overload with a float buffer but only works for files with
   * SampleDataType::int16 and does not scale the samples.
   */
942
  void writeconst(const size3i_t& start, const size3i_t& size, const std::int16_t* data) override
943
944
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int16_t,3>(*data, size));
945
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
946
947
948
949
950
951
952
953
954
    this->_dirty = true;
  }

  /**
   * \brief Write an arbitrary region with no conversion.
   *
   * As the write overload with a float buffer but only works for files with
   * SampleDataType::int8 and does not scale the samples.
   */
955
  void writeconst(const size3i_t& start, const size3i_t& size, const std::int8_t* data) override
956
957
  {
    std::shared_ptr<InternalZGY::DataBuffer> buffer(new InternalZGY::DataBufferNd<std::int8_t,3>(*data, size));
958
    _accessor_rw->writeRegion(buffer, start, 0, true, _compressor);
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
    this->_dirty = true;
  }

  /**
   * \brief Generate low resolution data, statistics, and histogram.
   *
   * This method will be called automatically from close(), but in
   * that case it is not possible to request a progress callback.
   *
   * If the processing raises an exception the data is still marked
   * as clean. Called can force a retry by passing force=True.
   *
   * The C++ code is very different from Python because  it needs
   * an entirely different approach to be performant.
   *
   * \param decimation: Optionally override the decimation algorithms by
   *                    passing an array of DecimationType
   *                    with one entry for each level of detail. If the
   *                    array is too short then the last entry is used for
   *                    subsequent levels.
   * \param progress:   Function(done, total) called to report progress.
   *                    If it returns False the computation is aborted.
981
982
983
   *                    The callback implementation should not invoke any
   *                    OpenZGY methods except for any calls expressly
   *                    documented as safe for this purpose and deadlock-free.
984
985
986
987
   *                    Will be called at least one, even if there is no work.
   * \param force:      If true, generate the low resolution data even if
   *                    it appears to not be needed. Use with caution.
   *                    Especially if writing to the cloud, where data
988
989
990
991
   *                    should only be written once. passing force=true
   *                    is currently required if cerating an empty file,
   *                    no samples written at all, if the statistics and
   *                    histogram data should still be stored.
992
993
994
995
996
997
   */
  void finalize(const std::vector<DecimationType>& decimation,
                const std::function<bool(std::int64_t,std::int64_t)>& progress,
                bool force = false)
  {
    if (this->_dirty || force) {
998
      InternalZGY::DataBuffer::cleartimers(true);
999
1000
      this->_dirty = false;
      std::shared_ptr<InternalZGY::StatisticData> stats;
For faster browsing, not all history is shown. View entire blame