iocontext.h 13.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
// Copyright 2017-2020, Schlumberger
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
 * \file iocontext.h
 * \brief Backend specific context.
 *
 * Class IOContext and derivatives are used to hold backend specific
 * information such as authorization tokens.
 */

#include "declspec.h"
#include "exception.h"

#include <cstdint>
#include <string>
#include <vector>
#include <functional>

namespace Test
{
  class TestIOContext;
}
namespace InternalZGY
{
  class SeismicStoreFile;
  class SeismicStoreFileDelayedWrite;
}

namespace OpenZGY {
#if 0
}
#endif

/**
 * \brief Base class for backend specific context.
48
49
50
51
52
 *
 * \details Thread safety:
 * Modification may lead to a data race. This should not be an issue,
 * because instances are only meant to be modified when created or
 * copied or assigned prior to being made available to others.
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
 */
class OPENZGY_API IOContext
{
public:
  virtual ~IOContext();
  /** Display the context in a human readable format for debugging. */
  virtual std::string toString() const = 0;
};

/** \cond SSTORE */

/**
 * \brief Credentials and configuration for Seismic Store.
 *
 * Define an iocontext for seismic store, doing consistency checks
 * and applying fallbacks from environment variables and hard coded
 * defaults.
 *
 * TODO-Low: Still undecided whether I should allow SeismicStoreFile
 * to use this class directly or whether I should map the contents to
 * an internal SDConfig class.
 *
 * TODO-Low: Move this class to a separate extensions/seismic_store.h
 * to be included by applications if and only if they need that access.
77
78
79
80
81
 *
 * Thread safety:
 * Modification may lead to a data race. This should not be an issue,
 * because instances are only meant to be modified when created or
 * copied or assigned prior to being made available to others.
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
 */
class OPENZGY_API SeismicStoreIOContext : public IOContext
{
  // Needed because there are no public accessors. Because I am lazy.
  friend class Test::TestIOContext;
  friend class InternalZGY::SeismicStoreFile;
  friend class InternalZGY::SeismicStoreFileDelayedWrite;

public:
  typedef std::function<std::string()> tokencb_t;
  typedef std::function<void(const std::string&, std::int64_t, std::int64_t, std::int64_t, const std::vector<std::int64_t>&)> debugtrace_t;
  virtual std::string toString() const override;
  SeismicStoreIOContext();

private:
  std::string  _sdurl;
  std::string  _sdapikey;
  std::string  _sdtoken;
  std::string  _sdtokentype;
  std::int64_t _maxsize;
  std::int64_t _maxhole;
  std::int64_t _aligned;
  std::int64_t _segsize;
105
  std::int64_t _segsplit;
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
  std::int64_t _threads;
  std::string  _legaltag;
  std::string  _writeid;
  std::string  _seismicmeta;
  std::string  _sdtoken_cbtype;
  tokencb_t    _sdtokencb;
  debugtrace_t _debug_trace;

public:
  /**
   * Where to contact the seismic store service.
   * Defaults to $OPENZGY_SDURL. There is no hard coded fallback
   * in case that variable isn't found either. This is to mitigate
   * the risk of code stopping to work in the PROD environment.
   */
  SeismicStoreIOContext& sdurl(const std::string& value)
  {
    this->_sdurl = value;
    return *this;
  }

  /**
   * Authorization for application to access the seismic store API.
   * Defaults to $OPENZGY_SDAPIKEY. There is no hard coded fallback
   * in case that variable isn't found either. This is to mitigate
   * the risk of code stopping to work in the PROD environment.
   */
  SeismicStoreIOContext& sdapikey(const std::string& value)
  {
    this->_sdapikey = value;
    return *this;
  }

  /**
   * Provide the SAuth token used to validate requests to seismic store.
   * The token is associated with the open file and cannot be changed.
   * tokentype is currently ignored, but may in the future be specified
   * as e.g. "stoken" (normal), "imptoken" (impersonation), etc.
   * Currently the token will (usually) not be automatically refreshed.
   * If you need this or need more detailed control then you should use
   * sdtokenCb() intead of sdtoken().
   *
   * If neither sdtoken() nor sdtokenCb() are called then the environment
   * variable "OPENZGY_TOKEN" is tried. Older versions of the
   * accessor had a final fallback that would try to pick up sdutil's saved
   * credentials but that is now considered too insecure. You can set
   * OPENZGY_TOKEN=FILE:carbon.slbapp.com if you want that
   * behavior.
   */
  SeismicStoreIOContext& sdtoken(const std::string& value, const std::string& type) {
    this->_sdtoken = value;
    this->_sdtokentype = type;
    this->_sdtokencb = tokencb_t();
    return *this;
  }

  /**
   * Register a callback that will be invoked each time an access token
   * is needed for seismic store. This is an alternative to sdtoken().
   * You don't need both. The callback might be called very frequently
   * so it should cache the token. The callback should ensure that the
   * token is not about to expire, and refresh it if needed.
   * The optional tokentype is currently unused. It has the same meaning
   * as in sdtoken(). Note that the callback itself does not provide the
   * token type. You need to specify the type in this call. Promising
   * that whenever the callback returns a token it will be of this type.
172
173
174
   *
   * The callback implementation should not invoke any OpenZGY methods
   * except for any calls expressly documented as safe and deadlock-free.
175
176
177
178
179
180
181
182
183
184
   * The callback implementation must also be thread safe. Set a lock
   * if needed. Finally, the callback needs to be legal to call at least
   * until close() is called on the reader or writer instance and preferably
   * also until the instance goes out of scope.
   *
   * The library will try to close the reader or writer instance if the
   * application fails to do so before the instance goes out of scope.
   * E.g. so any locks may be reset. The library *tries* to avoid invoking
   * the callback in that situation. On the suspicion that it might have
   * gone out of scope. But the application code should not rely on that.
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
   */
  SeismicStoreIOContext& sdtokencb(const tokencb_t& value, const std::string& type) {
    this->_sdtokencb = value;
    this->_sdtokentype = type;
    this->_sdtoken = std::string();
    return *this;
  }

  /**
   * Maximum size of consolidated requests, in MB. Must be
   * between 0 and 1024. Zero is taken to mean do not consolidate.
   *
   * Tell the reader to try to consolidate neighboring bricks
   * when reading from seismic store. This is usually possible
   * when the application requests full traces or at least traces
   * traces longer then 64 samples. Setting maxsize limits this
   * consolidation to the specified size. The assumption is that
   * for really large blocks the per-block overhead becomes
   * insignificant compared to the transfer time.
   *
   * Consolidating requests has higher priority than using
   * multiple threads. So, capping maxsize might allow more
   * data to be read in parallel.
   *
   * Note that currently the spitting isn't really smart. With a
   * 64 MB limit and 65 contiguous 1 MB buffers it might end up
   * reading 64+1 MB instead of e.g. 32+33 MB.
   *
   * Note that the low level reader should not assume that
   * requests are capped at this size. They might be larger
   * e.g. when reading the header information.
   *
   * Defaults to $OPENZGY_MAXSIZE_MB if not specified, or 2 MB.
   */
  SeismicStoreIOContext& maxsize(int value)
  {
    if (value < 0 || value > 1024)
      throw OpenZGY::Errors::ZgyUserError("maxsize must be between 0 and 1024 MB");
    this->_maxsize = value * (std::int64_t)(1024*1024);
    return *this;
  }

  /**
   * Maximum size to waste, in MB. Must be between 0 and 1024.
   *
   * This applies when consolidate neighboring bricks when
   * reading from seismic store. Setting maxhole > 0 tells the
   * reader that it is ok to also consolidate requests that are
   * almost neighbors, with a gap up to and including maxhole.
   * The data read from the gap will be discarded unless picked
   * up by some (not yet implemented) cache.
   *
   * For cloud access with high bandwidth (cloud-to-cloud) this
   * should be at least 2 MB because smaller blocks will take
   * just as long to read. For low bandwidth cloud access
   * (cloud-to-on-prem) it should be less. If a fancy cache
   * is implemented it should be more. For accessing on-prem
   * ZGY files it probably makes no difference.
   * Defaults to $OPENZGY_MAXHOLE_MB if not specified, or 2 MB.
   */
  SeismicStoreIOContext& maxhole(int value)
  {
    if (value < 0 || value > 1024)
      throw OpenZGY::Errors::ZgyUserError("maxhole must be between 0 and 1024 MB");
    this->_maxhole = value * (std::int64_t)(1024*1024);
    return *this;
  }

  /**
   * File alignment, in MB. Must be between 0 and 1024.
   *
   * This is similar to the maxhole parameter. If set, starting
   * and ending offsets are extended so they both align to the
   * specified value. Set this parameter if the lower levels
   * implement a cache with a fixed blocksize and when there is
   * an assumpton that most reads will be aligned anyway.
   * TODO-Worry: Handling reads past EOF may become a challenge
   * for the implementation.
   * Defaults to $OPENZGY_ALIGNED_MB if not specified, or zero.
   */
  SeismicStoreIOContext& aligned(int value)
  {
    if (value < 0 || value > 1024)
      throw OpenZGY::Errors::ZgyUserError("aligned must be between 0 and 1024 MB");
    this->_aligned = value * (std::int64_t)(1024*1024);
    return *this;
  }

  /**
   * Segment size used when writing, in MB. Must be between 1 and
   * 16*1024 (i.e. 16 GB). Defaults to $OPENZGY_SEGSIZE_MB if not
   * specified, or 1024 (i.e. 1 GB). The default should work fine
   * in almost all cases.
   */
  SeismicStoreIOContext& segsize(int value)
  {
    if (value < 0 || value > 16*1024)
      throw OpenZGY::Errors::ZgyUserError("segsize must be between 0 and 16*1024 MB");
    this->_segsize = value * (std::int64_t)(1024*1024);
    return *this;
  }

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
  /**
   * Maximum number of threads to be used when writing data to the cloud.
   * Default is 8. Set to 1 if you don't want multithreaded uploads.
   *
   * The value of segsplit must divide evenly into segsize.
   *
   * Multi-threading is achieved by splitting up the segsize write buffer
   * into "segsplit" SDAPI objects. Hence the name. The splitting can
   * be observed directly by the caller because it determines the segment
   * size seen when reading. So the "split" effect is more important
   * than the "maybe-threaded" effect.
   */
  SeismicStoreIOContext& segsplit(int value)
  {
    if (value < 1 || value > 1024)
      throw OpenZGY::Errors::ZgyUserError("segsplit must be between 0 and 1024");
    this->_segsplit = value;
    return *this;
  }

307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
  /**
   * Use up to this many parallel requests to seismic store in order
   * to speed up processing. Set between 1 and 1024, This applies to
   * individual reads in the main API. So the reads must be for a
   * large area (i.e. covering many bricks) for the setting to be
   * of any use. Set to $OPENZGY_NUMTHREADS if not found, and 1
   * (i.e. no threading) if the environment setting is also missing.
   *
   * Whether it is useful to set the variable depends on the
   * application. Apps such as Petrel/BASE generally do their own
   * multi threading, issuing multiple read requests to the high level
   * API in parallel. In that case it might not be useful to also
   * parallelize individual requests.
   */
  SeismicStoreIOContext& threads(int value)
  {
    if (value < 1 || value > 1024)
      throw OpenZGY::Errors::ZgyUserError("threads must be between 1 and 1024");
    this->_threads = value;
    return *this;
  }

  /**
   * The legaltag stored in the file. Used only on create.
   */
  SeismicStoreIOContext& legaltag(const std::string& value)
  {
    this->_legaltag = value;
    return *this;
  }

  /**
   * If set, re-use this lock instead of creating a new one.
   * Works both for read and write locks; the name reflects
   * what SDAPI calls it.
   */
  SeismicStoreIOContext& writeid(const std::string& value)
  {
    this->_writeid = value;
    return *this;
  }

  /**
   * a dictionary of additional information to be associated
   * with this dataset in the data ecosystem. Currently used
   * only on create, although SDAPI allows this to be set on
   * an existing file by calling {get,set}SeismicMeta().
   * This setting cannot be set from the environment.
   */
  SeismicStoreIOContext& seismicmeta(const std::string& value)
  {
    this->_seismicmeta = value;
    return *this;
  }

  /**
   * For debugging and unit tests only.
   * Callback to be invoked immediately before a read or write
   * is passed on to seismic store. Typically used to verify
   * that consolidating bricks works as expected. Can only be
   * set programmatically. Not by an environment variable.
368
369
370
371
   *
   * The callback implementation should not invoke any OpenZGY
   * methods except for any calls expressly documented as safe
   * for this purpose and deadlock-free.
372
373
374
375
376
377
378
379
380
381
382
383
384
   */
  SeismicStoreIOContext& debug_trace(const debugtrace_t& value)
  {
    this->_debug_trace = value;
    return *this;
  }
};

/** \endcond */

}

// namespace