file.cpp 10.7 KB
Newer Older
1
// Copyright 2017-2021, Schlumberger
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "file.h"
#include "environment.h"
17
#include "fancy_timers.h"
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "../exception.h"

#include <vector>
#include <string>
#include <memory>
#include <functional>
#include <iostream>
#include <sstream>

using OpenZGY::IOContext;
namespace InternalZGY {
#if 0
}
#endif

/////////////////////////////////////////////////////////////////////////////
//    FileADT (base class)  /////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////

FileADT::~FileADT()
{
}

void
FileADT::xx_close()
{
}

/**
 * \brief Human readable number.
 */
std::string
FileADT::_nice(std::int64_t n)
{
  if (n >= 1024*1024 && (n % (1024*1024)) == 0)
    return std::to_string(n/(1024*1024)) + " MB"; // whole number of MB
  else if (n >= 256*1024 && (n % (256*1024)) == 0)
    return std::to_string(n/(1024*1024.0)) + " MB"; // e.g. 42.75 MB
  else if (n >= 1024 && (n % 1024) == 0)
    return std::to_string(n/1024) + " kB";
  else
    return std::to_string(n) + " bytes";
}

void
FileADT::_validate_read(void *data, std::int64_t offset, std::int64_t size, std::int64_t eof, OpenMode mode)
{
  switch (mode) {
  case OpenMode::ReadOnly:
  case OpenMode::ReadWrite:
  case OpenMode::Truncate:
    break;
  case OpenMode::Closed:
  default:
    throw OpenZGY::Errors::ZgyUserError("The file is not open for reading.");
  }

  // These test are more relevant in the Python version because there
  // it can also do a type check on the arguments.
  if (data == nullptr)
    throw OpenZGY::Errors::ZgyUserError("Trying to read into null buffer.");

  if (offset < 0)
    throw OpenZGY::Errors::ZgyUserError("Trying to read at negative offset.");

  if (size < 1)
    throw OpenZGY::Errors::ZgyUserError("Trying to read zero or negative bytes.");

  // The next one night be an internal error or a corrupted file,
  // but let's report only the immediate error and not try to guess.
  if (offset + size > eof)
    throw OpenZGY::Errors::ZgyEndOfFile("Offset " + _nice(offset) +
                                " size " + _nice(size) +
                                " is past EOF at " + _nice(eof));
}

void
FileADT::_validate_write(const void *data, std::int64_t offset, std::int64_t size, OpenMode mode)
{
  switch (mode) {
  case OpenMode::ReadWrite:
  case OpenMode::Truncate:
    break;
  case OpenMode::Closed:
  case OpenMode::ReadOnly:
  default:
    throw OpenZGY::Errors::ZgyUserError("The file is not open for write.");
  }

  if (offset < 0)
    throw OpenZGY::Errors::ZgyUserError("Trying to write to negative offset.");

  if (size < 1 || data == nullptr)
    throw OpenZGY::Errors::ZgyUserError("Trying to write zero or negative bytes.");
}

void
FileADT::_validate_readv(const ReadList& requests, std::int64_t eof, OpenMode mode)
{
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
  switch (mode) {
  case OpenMode::ReadOnly:
  case OpenMode::ReadWrite:
  case OpenMode::Truncate:
    break;
  case OpenMode::Closed:
  default:
    throw OpenZGY::Errors::ZgyUserError("The file is not open for reading.");
  }
  for (const ReadRequest& rr : requests) {
    if (rr.offset < 0)
      throw OpenZGY::Errors::ZgyUserError("Trying to read at negative offset.");
    if (rr.size < 1)
      throw OpenZGY::Errors::ZgyUserError("Trying to read zero or negative bytes.");
    if (rr.offset + rr.size > eof)
      throw OpenZGY::Errors::ZgyEndOfFile("Offset " + _nice(rr.offset) +
                                " size " + _nice(rr.size) +
                                " is past EOF at " + _nice(eof));
  }
136
137
}

138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
/**
 * Convenience function to invoke a delivery functor with optional
 * pointer arithmetic, delivering just a part of the buffer.
 *
 * If and when the delivery function is changed to use a smart pointer:
 *
 * Optionally check that the called function did not retain a pointer
 * to the data if it promised not to do that.
 *
 * If the contract about not retaining references is broken then raise
 * an exception. Even if the code happens to have allocate a unique
 * buffer so it doesn't really care. Note: In some cases, e.g. if a
 * proper cache is involved, the functor might end up retaining a
 * pointer aliased to the entire cache. Which would be fatal and would
 * warrant an abort(). The code here will still just throw an
 * exception, though.
 *
 * If the functor states that it needs to retain a pointer then make
 * sure it gets a smart pointer that is aliased to the entire buffer.
157
158
159
160
161
 *
 * Transition aid, TO BE REMOVED.
 * In some places the caller of _deliver() only has an unsafe pointer.
 * Typically because it got that pointer from another delivery, and
 * the delivery functor itself still just provides a dumb pointer.
162
163
 */
void
164
FileADT::_deliver_old(
165
     const ReadRequest::delivery_t& fn,
166
     ReadRequest::data_t data,
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
     std::int64_t offset,
     std::int64_t size,
     bool transient)
{
  if (!data)
    throw OpenZGY::Errors::ZgyInternalError("Attempt to deliver null data");
  if (!fn)
    return; // Caller doesn't need the data. This is ok.
  if (offset == 0) {
    fn(data, size);
  }
  else {
    auto dumb_ptr  = static_cast<const char*>(data) + offset;
    fn(dumb_ptr, size);
  }
}

184
185
186
187
188
189
190
191
192
193
194
void
FileADT::_deliver(
     const ReadRequest::delivery_t& fn,
     const std::shared_ptr<const void>&  data,
     std::int64_t offset,
     std::int64_t size,
     bool transient)
{
  _deliver_old(fn, data.get(), offset, size, transient);
}

195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
std::shared_ptr<FileADT>
FileADT::factory(const std::string& filename, OpenMode mode, const IOContext *iocontext)
{
  throw std::runtime_error("Not implemented yet: FileADT::factory");
}

/////////////////////////////////////////////////////////////////////////////
//    FileFactory   /////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////

/**
 * Try the registered factories in the order of registration until one
 * is found that can handle this file. Caveat: If registration is done
 * using static initializers then that order is undefined. So when a
 * factory decides whether to handle a file or not it should not make
 * assumptions about where it is in the list.
211
212
213
 *
 * Thread safety: The factory is synchronized using a lock.
 * The lock is dropped before the actual file open.
214
215
216
217
 */
std::shared_ptr<FileADT>
FileFactory::create(const std::string& filename, OpenMode mode, const OpenZGY::IOContext *iocontext)
{
218
219
220
221
222
223
224
225
  // Need to copy the registry because the factory's lock must not be
  // held while opening the file. Should not be a performance issue
  // since the open itself is usually expensive and infrequent,
  std::vector<factory_t> registry_copy;
  {
    std::lock_guard<std::mutex> lk(_mutex);
    registry_copy = _registry;
  }
226
  std::shared_ptr<FileADT> result;
227
  for (const factory_t& f : registry_copy) {
228
229
230
231
232
233
234
235
236
    result = f(filename, mode, iocontext);
    if (result)
      break;
  }
  if (!result)
    throw OpenZGY::Errors::ZgyUserError("Don't know how to open \"" + filename + "\".");
  return result;
}

237
238
239
240
241
242
243
244
/**
 * Register a new factory.
 *
 * It is allowed to do this from a static constructor. When this
 * method is entered we know that the _registry data member has been
 * initialized because the static _instance is declared inside
 * FileFactory::instance() which means the compiler is responsible for
 * constructing it early enough.
245
246
247
248
249
250
251
 *
 * Similarly, _mutex has already been constructed when any member
 * function is called. So it can be used to synchronize access.
 * The mutex cannot be use to protect the instance itself.
 * Fortunately it doesn't need to, because the compiler does that.
 *
 * Thread safety: The factory is synchronized using a lock.
252
 */
253
254
255
void
FileFactory::add_factory(const factory_t& factory)
{
256
  std::lock_guard<std::mutex> lk(_mutex);
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
  _registry.push_back(factory);
}

FileFactory&
FileFactory::instance()
{
  static FileFactory _instance;
  return _instance;
}

FileFactory::FileFactory()
  : _registry()
{
}

/////////////////////////////////////////////////////////////////////////////
//    FileADT -> FileCommon   ///////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////

276
FileCommon::FileCommon(const std::string& filename, OpenMode mode)
277
278
279
280
281
  : FileADT()
  , _mode(mode)
  , _name(filename)
  , _eof(0)
{
282
283
  _rtimer.reset(new SummaryPrintingTimerEx(mode == OpenMode::ReadWrite || mode == OpenMode::Truncate ? "File.reread" : "File.read"));
  _wtimer.reset(new SummaryPrintingTimerEx("File.write"));
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
}

/**
 * \brief Get the current file size for error reporting.
 *
 * The default implementation in the base class just assumes that the
 * xx_eof() that is (probably) maintained internally is correct.
 */
std::int64_t
FileCommon::_real_eof() const
{
  return xx_eof();
}

/**
 * \brief Throw a descriptive error if there was something wrong with the read.
 * \details Currently works for local files only. TODO-Low fix?
301
302
 * If fixing this then make sure all implementations of xx_eof()
 * and _real_eof() are also thread safe.
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
 */
void
FileCommon::_check_short_read(std::int64_t offset, std::int64_t size, std::int64_t got) const
{
  using OpenZGY::Errors::ZgyEndOfFile;
  using OpenZGY::Errors::ZgyInternalError;

  if (got == size)
    return;

  std::string msg = ("Cannot read offset " + _nice(offset) +
                     " size " + _nice(size) + ": ");
  if (got > size) {
    // Likely some kind of OS error. Beware possible buffer overrun.
    throw ZgyInternalError(msg + "got too much data: " + _nice(got) + ".");
  }
  else if (offset + size > xx_eof()) {
    // This can only happen if I (bug!) forgot to call _validate_read.
    throw ZgyEndOfFile(msg + "past EOF at " + _nice(xx_eof()) + ".");
  }
  else if (_real_eof() < xx_eof()) {
    // This can happen if opening /dev/null for read/write,
    // or if a write failed due to a full disk (and was not checked),
    // or I somehow (bug!) failed to keep track of eof while writing.
    // Or maybe some other process truncated the file.
    throw ZgyEndOfFile(msg + "File is shorter than expected: " +
                       _nice(_real_eof()) + " / " +
                       _nice(xx_eof()) + ".");
  }
  else {
    // The os returned a short read for no apparent reason.
    // Maybe the file is a special device other than /dev/null.
    throw ZgyEndOfFile(msg + "short read for unknown reason.");
  }
}

} // namespace