api.py 51 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env python3

"""
This file contains the public OpenZGY API.

The API is modeled roughly after the API exposed by the Python wrapper
around the existing C++ ZGY-Public API. This is probably just as good
a starting point than anything else. And it makes testing simpler for
those tests that compare the old and new behavior.

    api.ZgyMeta:

        * Has a private reference to a impl.meta.ZgyInternalMeta instance.
        * Contains a large number of properties exposing meta data,
          most of which will present information from the ZgyInternalMeta
          in a way that is simpler to use and doesn't depend on the
          file version.
        * End users will access methods from this class. Most likely
          via the derived ZgyReader and ZgyWriter classes. The end users
          might not care that there is a separate base class.

    api.ZgyMetaAndTools(ZgyMeta):

        * Add coordinate conversion routines to a ZgyMeta instance.

    api.ZgyReader(ZgyMetaAndTools): # + read, readconst, close
    api.ZgyWriter(ZgyMetaAndTools): # + write, writeconst, finalize, close

        * Has a private reference to a impl.meta.ZgyInternalBulk instance.
        * Add bulk read and write functionality, forwarding the requests
          to the internal bulk instance.
        * These classes with their own and inherited methods and properties
          comprise the public OpenZGY API.
        * Currently the ZgyWriter does not expose the bulk read() function
          but it does allow accessing all the metadata. Allowing read()
          might be added later if it appears to be useful.
          In practice this just means to let ZgyWriter inherit ZgyReader.
"""

##@package openzgy
#@brief The top level only has package members.
##@package openzgy.api
#@brief User visible apis are here.

##
# \mainpage
#
# The %OpenZGY Python API allows read/write access to files
# stored in the ZGY format. The main part of the API is here:
#
# \li ZgyReader and its ZgyMeta base class.
# \li ZgyWriter also extending ZgyMeta.
# \li ZgyUtils for anything not read or write.
# \li \ref exception.ZgyError
#  you might want to catch.
# \li ProgressWithDots example of progress reporting.
# \li \ref Example Example application.
#
59
60
61
62
63
# If you are reading this document from doxygen/pure/apidoc.pdf
# in the source tree then please see doxygen/README.md for an
# explanation of why the documentation produced by the build might
# be better.
#
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# \if IMPL
# If you are viewing the full Doxygen documentation then this
# covers both the API and most of the implementation. So if you
# look at the list of classes and methods this might seem a bit
# daunting. All you really need to use the API should be in the
# above list. Excluding trivial structs that will be cross
# referenced as needed. So you don't need to go looking for them.
# Of course, if you want to work om %OpenZGY itself then you
# probably need everything.
#
# See also the following related pages:
#
# \li \ref physicalformat
# \li \ref implementation
# \li \ref lowres
# \li \ref migration
#
# \endif
# \page Example
# \include simplecopy.py

import numpy as np
import json
import sys
from collections import namedtuple
from enum import Enum

from .exception import *
from .impl import enum as impl_enum
from .impl.meta import ZgyInternalMeta
from .impl.bulk import ZgyInternalBulk, ScalarBuffer
from .impl.transform import generalTransform
from .impl.file import FileFactory
from .impl.stats import StatisticData
from .impl.lodalgo import DecimationType
from .impl.compress import CompressFactoryImpl
from .impl.zfp_compress import ZfpCompressPlugin
from .impl.genlod import GenLodC

##@cond IMPL

##@brief Explicit control of imported symbols.
class _internal:
    """
    This class is only used to rename some internal code.

    I want to be explicit about which classes I need from impl.*
    but at the same time I don't want to pollute the user-visible api
    namespace with names not even starting with an underscore that the
    user has no business accessing. I am not sure whether my way of
    achieving this is considered a kludge.

    What I really want is to say something like:

        from .impl.genlod import GenLodC as _internal.GenLodC

    but apparently this is not allowed.
    """
    pass
_internal.enum             = impl_enum;        del impl_enum
_internal.ZgyInternalMeta  = ZgyInternalMeta;  del ZgyInternalMeta
_internal.ZgyInternalBulk  = ZgyInternalBulk;  del ZgyInternalBulk
_internal.ScalarBuffer     = ScalarBuffer;     del ScalarBuffer
_internal.generalTransform = generalTransform; del generalTransform
_internal.FileFactory      = FileFactory;      del FileFactory
_internal.StatisticData    = StatisticData;    del StatisticData
_internal.DecimationType   = DecimationType;   del DecimationType
_internal.CompressFactoryImpl = CompressFactoryImpl; del CompressFactoryImpl
_internal.ZfpCompressPlugin   = ZfpCompressPlugin;   del ZfpCompressPlugin
_internal.GenLodC          = GenLodC;          del GenLodC

##@endcond

##@brief Sample data type used in the public API.
class SampleDataType(Enum):
    """
    Sample data type used in the public API.
    Corresponds to RawDataType used in the ZGY file format.
    """
    unknown = 1000
    int8    = 1001
    int16   = 1002
    float   = 1003

##@brief Horizontal or vertical dimension as used in the public API.
class UnitDimension(Enum):
    """
    Horizontal or vertical dimension as used in the public API.

    Horizontal dimension may be length or arc angle, although most
    applications only support length. Vertical dimension may be time
    or length. Vertical length is of course the same as depth.
    Arguably there should have been separate enums for horizontal and
    vertical dimension since the allowed values differ.
    """
    unknown  = 2000
    time     = 2001
    length   = 2002
    arcangle = 2003

##@brief Base class shared betewwn ZgyReader and ZgyWriter.
class ZgyMeta:
    """
    Base class shared betewwn ZgyReader and ZgyWriter.
    """
    def __init__(self, meta):
        """Create an instance, providing the ZgyInternalMeta to use."""
        assert meta is not None
        assert isinstance(meta, _internal.ZgyInternalMeta)
        self._meta = meta

    @property
    def size(self): # (iii)
        """
        Number of inlines, crosslines, and samples in that order.
        """
        return self._meta._ih._size

    @property
    def datatype(self): # s -> Enum
        """
        Sample data type.
        The ZGY-Public API uses enums: "int8", "int16", "float".
        In some cases these are also passed as strings.
        The old Python wrapper for ZGY-Public is stringly typed.
        Instead of returning a real enum it returns the name.
        """
        return _map_DataTypeToSampleDataType(self._meta._ih._datatype)

    @property
    def datarange(self): # (ff), a.k.a. DataMinMax
        """
        For integral data this is the lowest and highest sample value
        than can be represented in storage. The lowest storage value
        (e.g. -128 for SignedInt8 data) will be returned as DataMinMax[0]
        when read as float. Similarly the highest storage value e.g. +127
        will be returned as DataMinMax[1]. When integer data is read as
        the "native" integral type then no automatic scaling is applied.
        Note that in this case the actual range of the samples on file might
        be smaller (for int8, not all of the range -128..+127 might be used)
        but it cannot be larger.

        For floating point data these numbers are supposed to be the actual
        value range of the samples on file. It is a good idea to enforce
        this here, as the values stored by older writers cannot be trusted.
        Note: Also enforced on write in impl.meta.InfoHeaderV2.calculate_write.
        TODO-Worry: In some float32 datasets the bulk data might have
        ridiculously large spikes wich will be included in the statistical
        range but not in the codingrange. So, codingrange is actually the
        one that is correct. Also, can we have a situation where stats
        are not filled in while the codingrange is set? I am not sure
        this is currently handled.
        """
        if self._meta._ih._datatype == _internal.enum.RawDataType.Float32:
            return (self._meta._ih._smin, self._meta._ih._smax)
        else:
220
221
222
223
224
225
226
227
228
229
230
231
232
233
            return (self._meta._ih._safe_codingrange[0], self._meta._ih._safe_codingrange[1])

    @property
    def raw_datarange(self): # (ff), a.k.a. DataMinMax
        """
        As datarange, but the actual values read from the file before
        they might have been changed to try to fix a bad file.
        Only use this property if you want to handle such files
        differently than the library does.
        """
        if self._meta._ih._datatype == _internal.enum.RawDataType.Float32:
            return (self._meta._ih._smin, self._meta._ih._smax)
        else:
            return (self._meta._ih._file_codingrange[0], self._meta._ih._file_codingrange[1])
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561

    @property
    def zunitdim(self):
        """
        Dimension in the vertical direction. "time" or "length".
        "time" might on file be "SeismicTWT" or "SeismicOWT".
        The old Python wrapper for ZGY-Public is stringly typed.
        Instead of returning a real enum it returns the name.
        """
        return _map_VerticalDimensionToUnitDimension(self._meta._ih._vdim)

    @property
    def hunitdim(self):
        """
        Dimension in the horizontal direction. Should always be "length".
        The original specification called for supporting "arcangle" as well,
        i.e. coordinates in degrees instead of a projection. But most
        application code that use ZGY will not support this.
        The old Python wrapper for ZGY-Public is stringly typed.
        Instead of returning a real enum it returns the name.
        """
        return _map_HorizontalDimensionToUnitDimension(self._meta._ih._hdim)

    @property
    def zunitname(self):
        """
        Unit in the horizontal direction. E.g. "ms", "m", or "ft".
        Note that Petrel might ignore this settings and instead
        prompt the user to state what the unit should be.
        """
        return self._meta._ih._vunitname

    @property
    def hunitname(self):
        """
        Unit in the horizontal direction. E.g. "m" or "ft".
        """
        return self._meta._ih._hunitname

    @property
    def zunitfactor(self):
        """
        Factor to multiply stored vertical values with to get SI units.
        E.g. 0.001 for ms, 1.0 for m or 0.3048 for ft.
        """
        return self._meta._ih._vunitfactor

    @property
    def hunitfactor(self):
        """
        Factor to multiply stored horizontal values with to get SI units.
        """
        return self._meta._ih._hunitfactor

    @property
    def zstart(self):
        """
        Distance from surface/MSL to first sample, given in the vertical unit.
        """
        return self._meta._ih._orig[2]

    @property
    def zinc(self):
        """
        Sample interval, given in the vertical unit.
        """
        return self._meta._ih._inc[2]

    @property
    def annotstart(self):
        """
        First inline and crossline numbers.
        """
        return self._meta._ih._orig[0:2]

    @property
    def annotinc(self):
        """
        Inline and crossline number increments between adjacent
        sections of the cube.
        """
        return self._meta._ih._inc[0:2]

    @property
    def corners(self):
        """
        World XY coordinates of each of the 4 corners.
        The same coordinates in ordinal numbers are
        ((0, 0), (Size[0]-1, 0), (0, Size[1]-1), (Size[0]-1, Size[0]-1))
        """
        return self._meta._ih._ocp_world

    @property
    def indexcorners(self):
        """
        Redundant with Size.
        Ordinal coordinates of each of the 4 corners, ordered as "corners".
        """
        return self._meta._ih._ocp_index

    @property
    def annotcorners(self):
        """
        Redundant with Start, Inc, Size.
        Annotation coordinates of each of the 4 corners, ordered as HCorners.
        """
        return self._meta._ih._ocp_annot

    @property
    def bricksize(self):
        """
        Size of a brick. Should always be (64, 64, 64).
        """
        return self._meta._ih._bricksize

    @property
    def brickcount(self):
        """
        Number of bricks (including empties) ordered by [lod][dimension].
        """
        return self._meta._ih._lodsizes

    @property
    def nlods(self):
        """
        Number of level-of-detail layers, including lod 0 a.k.a. full resolution.
        """
        return self._meta._ih._nlods

    @property
    def meta(self):
        """
        A dictionary of all the meta information, which can
        later be passed as **kwargs to the ZgyWriter constructor.
        and "indexcorners", "annotcorners", "brickcount", "nlods"
        are all derived properties that will never be settable.
        "numthreads" is a property of the implementation, not the file.
        """
        return {
            "size":        self.size,
            "bricksize":   self.bricksize,
            "datatype":    self.datatype,
            "datarange":   self.datarange,
            "zunitdim":    self.zunitdim,
            "hunitdim":    self.hunitdim,
            "zunitname":   self.zunitname,
            "hunitname":   self.hunitname,
            "zunitfactor": self.zunitfactor,
            "hunitfactor": self.hunitfactor,
            "zstart":      self.zstart,
            "zinc":        self.zinc,
            "annotstart":  self.annotstart,
            "annotinc":    self.annotinc,
            "corners":     self.corners,
        }

    @property
    def numthreads(self):
        """
        How many threads to use when reading. Currently ignored.
        """
        return 1
    @numthreads.setter
    def numthreads(self, x):
        print("Warning: numthreads is ignored.")

    def dump(self, file=None):
        file = file or sys.stdout
        print("{", file=file)
        for e in sorted(self.meta.items()):
            value = '"'+e[1]+'"' if isinstance(e[1], str) else str(e[1])
            print('  "{0}": {1},'.format(e[0], value), file=file)
        print("}", file=file)

    ### New in OpenZGY ###
    _statisticsType = namedtuple("Statistics", "cnt sum ssq min max")
    @property
    def statistics(self):
        """
        Return the statistics stored in the file header as a named tuple.
        NOTE, I might want to change this to another type if there is a
        need to implement the same method in the ZGY-Public wrapper,
        as it might be trickier to define a namedtuple there.
        """
        return self._statisticsType(self._meta._ih._scnt,
                                    self._meta._ih._ssum,
                                    self._meta._ih._sssq,
                                    self._meta._ih._smin,
                                    self._meta._ih._smax)

    _histogramType = namedtuple("Histogram", "cnt min max bin")
    @property
    def histogram(self):
        """
        Return the statistics stored in the file header as a named tuple.
        NOTE, I might want to change this to another type if there is a
        need to implement the same method in the ZGY-Public wrapper,
        as it might be trickier to define a namedtuple there.
        """
        if not self._meta._hh: return None
        return self._histogramType(self._meta._hh._cnt,
                                   self._meta._hh._min,
                                   self._meta._hh._max,
                                   self._meta._hh._bin)

##@brief Base class shared betewwn ZgyReader and ZgyWriter.
class ZgyMetaAndTools(ZgyMeta):
    """
    Base class shared betewwn ZgyReader and ZgyWriter.
    Adds coordinate conversion tools.
    """

    @staticmethod
    def transform(A, B, data):
        """
        Linear transformation of an array of double-precision coordinates.
        The coordinate systems to convert between are defined by
        three arbitrary points in the source system and the target.
        Arguments: ((ax0,ay0), (ax1,ay1), (ax2,ay2)),
                   ((bx0,by0), (bx1,by1), (bx2,by2)),
                   data
        where data is a 2d array of size (length, 2)
        """
        # performance note: In Python it can be far more efficient to
        # build and cache 6 transformation matrices between index/annot/world
        # and use those for the 6 transforms. But if we only transform
        # a few values at a time anyway, or if we are planning to convert
        # the accessor back to C++ fairly soon, this is a non-issue.
        _internal.generalTransform(
            A[0][0], A[0][1], A[1][0], A[1][1], A[2][0], A[2][1],
            B[0][0], B[0][1], B[1][0], B[1][1], B[2][0], B[2][1],
            data)

    @staticmethod
    def transform1(A, B, point):
        data = [[point[0], point[1]]]
        ZgyMetaAndTools.transform(A, B, data)
        return tuple(data[0])

    def annotToIndex(self, point):
        """Convert inline, crossline to ordinal"""
        return self.transform1(self.annotcorners, self.indexcorners, point)

    def annotToWorld(self, point):
        """Convert inline, crossline to world X,Y"""
        return self.transform1(self.annotcorners, self.corners, point)

    def indexToAnnot(self, point):
        """Convert ordinal to inline, crossline"""
        return self.transform1(self.indexcorners, self.annotcorners, point)

    def indexToWorld(self, point):
        """Convert ordinal to world X,Y"""
        return self.transform1(self.indexcorners, self.corners, point)

    def worldToAnnot(self, point):
        """Convert world X,Y to inline, crossline"""
        return self.transform1(self.corners, self.annotcorners, point)

    def worldToIndex(self, point):
        """Convert world X,Y to ordinal"""
        return self.transform1(self.corners, self.indexcorners, point)

##@brief Main entry point for reading ZGY files.
class ZgyReader(ZgyMetaAndTools):
    """
    Main entry point for reading ZGY files.

    Obtain a concrete instance by calling the constructor.
    You can then use the instance to read both meta data and bulk data.
    It is recommended to explicitly close the file when done with it.
    """
    def __init__(self, filename, *, _update=False, iocontext=None):
        # No "with" statement for the FileFactory, so we must remember
        # to close it ourselves in our own __exit__.
        self._fd = _internal.FileFactory(filename, ("r+b" if _update else "rb"), iocontext)
        self._meta = _internal.ZgyInternalMeta(self._fd)
        # self._meta._ih and friends will all be allocated.
        # Prove that all the tests for "._ih is not None" are redundant.
        self._meta._assert_all_headers_allocated()
        # At the implementation level the bulk- and meta access are separate,
        # and the bulk accessor needs some of the meta information to work.
        self._accessor = _internal.ZgyInternalBulk(self._fd, self._meta)
        # This causes an assignment to the parent's self._meta
        # which in Python is a no-op but in C++ the parent might
        # have its own _meta that we shadow here. Or not.
        super().__init__(self._meta)

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        # Note that if the block was exited due to an exception, and if we
        # also get an exception from close, then it is the second one
        # that gets caught in a try/catch block placed outside the "while".
        # Callers will in this case often want to report just the first
        # exception since the close() probably failed as a cause of it.
        # Caller needs to do e.g. "ex2 = ex.__cause__ or ex.__context__".
        # It is possible to instead suppress any errors from close if
        # another exception is already pending. Simplifying the caller.
        # But then the close() would not be available at all. Bad idea.
        self.close()

    ##@brief Read bulk data into a caller specified buffer.
    def read(self, start, data, *, lod = 0, verbose = None):
        """
        Read an arbitraty region of bulk data into a caller specified buffer.

        The buffer's type must be float, short, or char. Any file may
        be read as float. If the buffer is of type short or char then
        the file must be of precisely that type.

        Arguments: (i0,j0,k0), buffer, lod=0
        """
        file_dtype = _internal.enum._map_DataTypeToNumpyType(self._meta._ih._datatype)
        if (not isinstance(data, np.ndarray) or
            data.dtype not in (np.float32, file_dtype) or
            len(data.shape) != 3 or
            not data.flags.writeable
        ):
            raise ZgyUserError("Expected a writeable 3d numpy array of np.{0} or np.{1}".format(
                np.dtype(np.float32).name, np.dtype(file_dtype).name))
        if not self._accessor: raise ZgyUserError("ZGY file is not open for read.")
        self._accessor.readToExistingBuffer(data, start, lod=lod,
                                            as_float=(data.dtype==np.float32),
                                            verbose=verbose)

    ##@brief Get hint about all constant region.
562
563
    ##@image html readconst-fig1.png
    ##@image latex readconst-fig1.png
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
    def readconst(self, start, size, *, lod = 0, as_float = True, verbose = None):
        """
        Get hint about all constant region.

        Check to see if the specified region is known to have all samples
        set to the same value. Returns that value, or None if it isn't.

        The function only makes inexpensive checks so it might return
        None even if the region was in fact constant. It will not make
        the opposite mistake. This method is only intended as a hint
        to improve performance.

        For int8 and int16 files the caller may specify whether to scale
        the values or not.
        """
        if not self._accessor: raise ZgyUserError("ZGY file not open for read")
        return self._accessor.readConstantValue(start, size, lod=lod,
                                                as_float=as_float,
                                                verbose=verbose)

    def close(self):
        if self._fd:
            self._fd.xx_close()
            self._fd = None
        if self._accessor:
            self._accessor = None # No other cleanup needed.
        # Metadata remains accessible. Not sure whether this is a good idea.


##@brief Main API for creating ZGY files.
class ZgyWriter(ZgyMetaAndTools):
    """
    Main API for creating ZGY files.

    Obtain a concrete instance by calling the constructor.
    All meta data is specified in the call to open(), so meta data
    will appear to be read only. You can use the instance to write
    bulk data. The file becomes read only once the instance is closed.

    It is recommended to call finalize() and close() after all bulk
    has been written. But if you forget then this will be done when
    the writer goes out of scope, provided of course that you used a
    "with" block.
    """
    def __init__(self, filename, *,
                 iocontext=None, compressor=None, lodcompressor=None, **kwargs):
        """
        Create a new ZGY file.

        Optionally pass templatename = otherfile to create a new file
        similar to otherfile. Explicit keyword argumants override
        information from otherfile.

        Optionally pass templatename = filename to erase all data blocks
        from filename but keep the metadata. New data blocks can then
        be written to the file. Petrel/BASE might need this feature,
        due to the way it writes new files. They tend to get opened
        several times to add meta information. Caveat: Behind the
        scenes the file is simply deleted and re-created. This is
        slightly less efficient than opening the file for read/write.

        templatename: string
            Optionally create a file similar to this one.
            TODO-Low: In the future might also accept a ZgyReader instance.
            This is convenient if a file is being copied, so as to not
            needing to open it twice.

        filename: string
            The local or cloud file to create.

        size: (int, int, int)
            Number of inlines, crosslines, samples.

        bricksize: (int, int, int)
            Size of a single brick. Defaults to (64, 64, 64).
            Please use the default unless you really know what
            you are doing. In any case, each size needs to be
            a power of 2.

        datatype: SampleDataType
            Specify int8, int16, or float.

        datarange = (float, float)
            Used only if datatype is integral, to convert from storage to
            actual sample values. The lowest possible storage value, i.e.
            -128 or -32768, maps to datarange[0] and the highest possible
            storage value maps to datarange[1].

        zunitdim:    UnitDimension. time, length, or unknown.
        zunitname:   string
        zunitfactor: float
            Describe how to convert between storage units and SI units
            in the vertical direction. Petrel ignores these settings and
            prompts the user.

        hunitdim:    UnitDimension. length, arcangle, or unknown.
        hunitname:   string
        hunitfactor: float
            Describe how to convert between storage units and SI units
            in the horizontal direction. Most applications cannot handle
            arcangle. Petrel ignores these settings and prompts the user.

        zstart: float
            The time or depth corresponding to the shallowest sample.

        zinc: float
            The vertical time or depth distance between neighboring samples.

        annotstart: (float, float)
            The inline and crossline numbers corresponding to the ordinal
            position (0, 0) i.e. the first sample on the file.

        annotinc: (float, float)
            The inline / crossline step between neighboring samples.
            The samples at ordinal (1, 1) will have annotation
            annotstart + annotinc.

        corners: (float, float)[4]
            World coordinates of each corner, order as
                First inline / first crossline,
                last inline / first crossline,
                first inline / last crossline,
                last inline / last crossline.

        compressor, lodcompressor: callable
            If set, attempt to compress each block with this callable.
            Typically this should be a lambda or a class, because it
            needs to capture the compression parameters.
            Example:
                compressor = ZgyCompressFactory("ZFP", snr = 30)
            If different compression parameters are desired for
            full- and low resolution bricks then lodcompressor can
            be provided as well. It defaults to compressor. Using
            two different instances, even if the parameters match,
            may also cause statistics to be reported separately
            for fullres and lowres.
700
701
702
            TODO-Low: Future: passing zfp_compressor = snr is equivalent
            to compressor = ZgyCompressFactory("ZFP", snr = snr).
            Unlike the compressor keyword this also works in the wrapper.
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
        """
        # The following arguments are not passed on to _create:
        #   -  templatename is handled locally, using the template for defaults.
        #   -  iocontext is only made available to the FileADT layer
        #   -  compressor is explicitly passed as an argument to those
        #      functions (write, writeconst, finalize) that need it.

        if "templatename" in kwargs:
            with ZgyReader(kwargs["templatename"]) as t:
                for k, v in t.meta.items():
                    if not k in kwargs:
                        kwargs[k] = v
            del kwargs["templatename"]

        # Compressing a file as integer is not useful, it just adds more noise
        # at least as long as we are talking about seismic. Even if we allowed
        # int8 or int16 here the code in impl_zfp_compress will currently
        # use the ZFP float interface.
        #
        # Flagging as int8 or int16 would save memory at the cost of adding
        # even more noise. Converting the decompressed data to integral before
        # returning it as that type. But if the applicaton wants this then
        # it can easily convert the result itself.
        #
        # There are other subtle issues with int8 / int16. Even with enabled
        # compression, individual bricks are allowed to be stored uncompressed
        # if neither lossy nor lossless compression works as desired. The
        # declared value type then controls how these blocks are stored.
        # Storing those (presumably very few) blocks as integral means
        # more that can go wrong and more unit  tests. And keep in mind
        # that float->int8 and float->int16 are also a form of compression
        # (4x and 2x respectively) but is a lot noisier than ZFP at the same
        # compression factor.
        #
        # We may need to revisit this if switching to another compression
        # algorithm where integral compression works better.
        if compressor and kwargs.get("datatype", SampleDataType.float) != SampleDataType.float:
            raise ZgyUserError("Compressed files need to be stored as float.")

        # After this, self._meta._ih and friends exists but will be None.
        self._meta = _internal.ZgyInternalMeta(None)
        # This causes an assignment to the parent's self._meta
        # which in Python is a no-op but in C++ the parent might
        # have its own _meta that we shadow here. Or not.
        super().__init__(self._meta)
748
        self._create(filename, compressed = bool(compressor or lodcompressor), **kwargs)
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
        # Now self._meta._ih and friends will all be allocated.
        # Prove that all the tests for "._ih is not None" are redundant.
        self._meta._assert_all_headers_allocated()
        # The file creation was deferred until after the consistency checks.
        # No "with" statement for the FileFactory, so we must remember
        # to close it ourself in our own __exit__.
        self._fd = _internal.FileFactory(filename, "w+b", iocontext)
        self._meta._flush_meta(self._fd)
        # The accessor needs to know whether we will do compression or not,
        # because this determines whether bricks will be written aligned
        # and possibly whether updates are allowed. The actual
        # compression algorithm is passed on each write etc.
        # TODO-Low consider storing the compressor as context of the accessor
        # instead. Less precise control though. We might want a different
        # snr on the low resolution bricks.
        self._accessor = _internal.ZgyInternalBulk(
            self._fd, self._meta,
            compressed = bool(compressor or lodcompressor))
        self._dirty = False # If True need LOD, stats, histogram.
        self._compressor = compressor or lodcompressor
        self._lodcompressor = lodcompressor or compressor

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        # Note that if the block was exited due to an exception, and if we
        # also get an exception from close, then it is the second one
        # that gets caught in a try/catch block placed outside the "while".
        # Callers will in this case often want to report just the first
        # exception since the close() probably failed as a cause of it.
        # Caller needs to do e.g. "ex2 = ex.__cause__ or ex.__context__".
        # This is mitigated by the close() method skipping work and/or
        # suppressing exceptions if the file has already been flagged bad.
        self.close()

785
    def _create(self, filename, *, size = None, compressed = False,
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
                bricksize = None,
                datatype = SampleDataType.float, datarange = None,
                zunitdim = UnitDimension.unknown,
                hunitdim = UnitDimension.unknown,
                zunitname = None, hunitname = None,
                zunitfactor = 0.0, hunitfactor = 0.0, zstart = 0.0, zinc = 0.0,
                annotstart = (0, 0), annotinc = (0, 0),
                corners = ((0,0),(0,0),(0,0),(0,0))):
        """
        Called from __init__. Do not call directly.
        The user should use a "using" statement when creating the reader.
        Datatype can be "int8", "int16", "float".
        Dimension can be "time", "length", or "arcangle".
        """
        self._meta._init_from_scratch(filename = filename,
                                size = size,
802
                                compressed = compressed, # If True this is V4.
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
                                bricksize = bricksize,
                                datatype = _map_SampleDataTypeToDataType(datatype),
                                datarange = datarange,
                                zunitdim = _map_UnitDimensionToVerticalDimension(zunitdim),
                                hunitdim = _map_UnitDimensionToHorizontalDimension(hunitdim),
                                zunitname = zunitname,
                                hunitname = hunitname,
                                zunitfactor = zunitfactor,
                                hunitfactor = hunitfactor,
                                zstart = zstart,
                                zinc = zinc,
                                annotstart = annotstart,
                                annotinc = annotinc,
                                corners = corners)

    ##@brief Write an arbitrary region.
    def write(self, start, data, *, verbose = None):
        """
        Write bulk data. Type must be np.float32, np.int16, or np.int8.
        np.float32 may be written to any file and will be converted
        if needed before storing. Writing an integral type implies that
        values are to be written without conversion. In that case the
        type of the buffer must match exactly the file's storage type.
        You cannot write int8 data to an int16 file or vice versa.

        Arguments:
            start: tuple(i0,j0,k0) where to start writing.
            data: np.ndarray of np.int8, np.int16, or np.float32
        """
        file_dtype = _internal.enum._map_DataTypeToNumpyType(self._meta._ih._datatype)
        if (not isinstance(data, np.ndarray) or
            data.dtype not in (np.float32, file_dtype) or
            len(data.shape) != 3
        ):
            raise ZgyUserError("Expected a 3d numpy array of np.{0} or np.{1}".format(
                np.dtype(np.float32).name, np.dtype(file_dtype).name))
        if not self._accessor: raise ZgyUserError("ZGY file is not open.")
        if self._accessor._is_bad or self._meta._is_bad:
            raise ZgyCorruptedFile("Cannot continue due to previous errors.")
        self._accessor._writeRegion(data, start, lod=0,
                                    compressor=self._compressor,
                                    is_storage=(data.dtype != np.float32),
                                    verbose=verbose)
        self._dirty = True

    ##@brief Write a single value to a region of the file.
    def writeconst(self, start, value, size, is_storage, *, verbose = None):
        """
        Write a single value to a region of the file.

        This is equivalent to creating a constant-value array with np.full()
        and write that. But this version might be considerably faster.

        If is_storage is false and the input value cannot be converted to
        storage values due to being outside range after conversion then
        the normal rules (use closest valid value) apply. If
        is_storage is True then an error is raised if the supplied value
        cannot be represented.

        Arguments:
            start:     tuple(i0,j0,k0) where to start writing.
            size:      tuple(ni,nj,nk) size of region to write.
            value:     Scalar to be written.
            is_storge: False if the value shall be converted to storage
                       True if it is already storage and should be written
                       unchanged. Ignored if the storage type is float.
        """
        if self._meta._ih._datatype == _internal.enum.RawDataType.Float32:
            dtype = np.float32 # "Convert" to user now. Actually a no-op.
            is_storage = False
        elif not is_storage:
            dtype = np.float32 # Force conversion.
        elif self._meta._ih._datatype == _internal.enum.RawDataType.SignedInt16:
            dtype = np.int16
        elif self._meta._ih._datatype == _internal.enum.RawDataType.SignedInt8:
            dtype = np.int8
        else:
            raise ZgyFormatError("Unrecognized datatype on file")
        if np.issubdtype(dtype, np.integer) and not np.isfinite(value):
            raise ZgyUserError("Cannot store {0} in a {1}".format(value, np.dtype(dtype)))
        self._accessor._writeRegion(_internal.ScalarBuffer(size, value, dtype),
                                    start, lod=0,
                                    compressor=self._compressor,
                                    is_storage=is_storage,
                                    verbose=verbose)
        self._dirty = True

    def finalize(self, *, decimation=None, progress=None, force=False, verbose=None):
        """
        Generate low resolution data, statistics, and histogram.
        This will be called automatically from close(), but in
        that case it is not possible to request a progress callback.

        If the processing raises an exception the data is still marked
        as clean. Called can force a retry by passing force=True.

        Arguments:
            decimation: Optionally override the decimation algorithms by
                        passing an array of impl.lodalgo.DecimationType
                        with one entry for each level of detail. If the
                        array is too short then the last entry is used for
                        subsequent levels.
                        TODO-Low: The expected enum type is technically internal
                        and ought to have been mapped to an enum api.XXX.
            progress:   Function(done, total) called to report progress.
                        If it returns False the computation is aborted.
                        Will be called at least one, even if there is no work.
            force:      If true, generate the low resolution data even if
                        it appears to not be needed. Use with caution.
                        Especially if writing to the cloud, where data
                        should only be written once.
            verbose:    optional function to print diagnostic information.
        """
        if self._dirty or force:
            self._dirty = False
            stats, histo = _internal.GenLodC(
                accessor   = self._accessor,
                compressor = self._lodcompressor,
                decimation = decimation,
                progress   = progress,
                verbose    = verbose)()
            # TODO-Low: Refactor:
            # violating encapsulation rather more than usual.
            # Note that _accessor._metadata is private; it is a copy
            # or actually a reference letting ZgyInternalBulk use
            # some parts of the metadata.
            (a, b) = self._accessor._scaleDataFactorsStorageToFloat()
            stats.scale(a, b)
            histo.scale(a, b)
            histo.resize(256)
            self._meta._ih._scnt = stats._cnt
            self._meta._ih._ssum = stats._sum
            self._meta._ih._sssq = stats._ssq
            self._meta._ih._smin = stats._min
            self._meta._ih._smax = stats._max
            self._meta._hh._min = histo.vv_range[0]
            self._meta._hh._max = histo.vv_range[1]
            self._meta._hh._bin = histo.bins
            self._meta._hh._cnt = np.sum(histo.bins)
        else:
            if progress:
                progress(0, 0)
        # For debugging and measurements only.
        if False:
            if self._compressor:
                self._compressor.dump(msg="Compress")
            if self._lodcompressor and not self._lodcompressor is self._compressor:
                self._lodcompressor.dump(msg="LOD_data")

        # TODO-Low: Refactor: the accessor should logically have a close(),
        # shouldn't it? And maybe I shouldn't set self._fd here.
        # Allowing the accessor to manage it.

    def close(self):
        """
        Close the currently open file.
        Failure to close the file will corrupt it.
        """
        if self._fd:
            # Both the meta accessor and the bulk accessor has an _is_bad
            # flag that is set if we got an error while writing to the file.
            # If set then don't bother with statistics, histogram, and lowres.
            # The file will probably just be discarded amyway.
            if not (self._meta._is_bad or self._accessor._is_bad):
                self.finalize()

            # Flushing metadata is a bit more ambiguous. Parts of the file
            # might still be salvageable, so especially when testing we might
            # still want to flush a file marked as bad. But, ignore any
            # secondary errors as the user already knows something is wrong.
            if not (self._meta._is_bad or self._accessor._is_bad):
                self._meta._flush_meta(self._fd)
            else:
                try:
                    self._meta._is_bad = False
                    self._accessor._is_bad = False
                    self._meta._flush_meta(self._fd)
                except Exception:
                    pass
                finally:
                    self._meta._is_bad = True
                    self._accessor._is_bad = True

            # TODO-Low it would be more consistent if the final write was
            # of the updated headers, in case the app crashes before
            # xx_close. This is true for local file access. But for
            # cloud access the last bulk data segment might be written
            # on xx_close(). Difficult to change without complicating
            # the internal FileADT api.

            # Closing the local or cloud handle is always needed as there
            # might be resources that need to be cleaned up.
            self._fd.xx_close()
            self._fd = None

            # The client code is strongly advised to delete the file if it was
            # opened for create. OpenZGY might have deleted the file itself but
            # this is probably too harsh. A file opened for update (which is not
For faster browsing, not all history is shown. View entire blame