Commit 66337515 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

Numerical accuracy issues.

parent ecb3b856
......@@ -27,6 +27,7 @@
#include <sstream>
#include <cmath>
#include <iostream>
#include <iomanip>
namespace InternalZGY {
#if 0
......@@ -1059,6 +1060,13 @@ DataBufferNd<T,NDim>::s_scaleToFloat(const DataBuffer* in,
typename dst_type::value_type *dst_ptr = dst->data();
const src_type::value_type *src_ptr = src->data();
const src_type::value_type *src_end = src_ptr + src->allocsize();
#if 0
std::cerr << "@@ Converting" << std::setprecision(14)
<< " " << *src_ptr
<< " to " << static_cast<typename dst_type::value_type>(*src_ptr * a + b)
<< " using factors " << a << " " << b
<< std::endl;
#endif
while (src_ptr < src_end)
*dst_ptr++ = static_cast<typename dst_type::value_type>(*src_ptr++ * a + b);
return dst;
......@@ -1106,7 +1114,6 @@ DataBufferNd<T,NDim>::s_scaleFromFloat(const DataBuffer* in,
// for infinite or both is needed.
dst_type::value_type *dst_ptr = dst->data();
const typename src_type::value_type *src_ptr = src->data();
const typename src_type::value_type *src_end = src_ptr + src->allocsize();
const std::int64_t totalsize = src->allocsize();
// TODO-Performance: There is a risk of the OpemMP overhead being larger
// then the speedup gained by multiple threads. I ran tests only in one
......
......@@ -644,6 +644,18 @@ InfoHeaderAccess::bytespersample() const
* The smallest value in storage (e.g. -128) should map to codingrange[0]
* and the largest value (e.g. +127) should map to codingrange[1].
* If file_dtype is a floating point type there is never any conversion.
*
* Note that the codingrange stored in file is two float32 numbers i.e.
* 24 bits mantissa. This is ok for our purpose because it is only used
* for converting int8 and int16 data so the user doesn't expect anything
* more than 16 bits of precision. But to avoid numerical issues with
* the transform the codingrange should immediately be upcast to float64
* befor being used. Case in point: (double)(hi-lo) is less accurate than
* ((double)hi - (double)lo) when hi,lo are float32. Admittedly only by
* 1 ULPS but the error might be amplified later. Enough to raise errors
* when testing even if the signal still has 16 bits of precision intact.
*
* See also ZgyInternalBulk._scaleToFloat in OpenZGY/Python.
*/
std::array<double,2>
InfoHeaderAccess::storagetofloat() const
......@@ -652,8 +664,22 @@ InfoHeaderAccess::storagetofloat() const
const RawDataTypeDetails info(this->datatype());
if (info.is_integer) {
const std::array<float,2> range = this->safe_codingrange();
slope = (range[1] - range[0]) / (info.highest - info.lowest);
slope = ((double)range[1]-(double)range[0]) / (info.highest - info.lowest);
intercept = range[0] - slope * info.lowest;
#if 0 // Debug issues with numerical inaccuracy.
static double debug_old_slope = 0, debug_old_intercept = 0;
if (debug_old_slope != slope || debug_old_intercept != intercept) {
std::cerr << "@@ storagetofloat " << std::setprecision(14)
<< " slope " << slope
<< " intercept " << intercept
<< " range " << range[0] << " " << range[1]
<< " test " << slope*-32768+intercept
<< " " << slope*32767+intercept
<< std::endl;
debug_old_slope = slope;
debug_old_intercept = intercept;
}
#endif
}
return std::array<double,2>{slope,intercept};
}
......
......@@ -289,6 +289,23 @@ class ZgyInternalBulk:
if not np.issubdtype(file_dtype, np.integer): return data
(a, b) = cls._scaleFactorsStorageToFloat(codingrange, file_dtype)
if isinstance(data, np.ndarray):
# Numerical accuracy notes:
#
# To match the (arguably broken) old ZGY implementation
# do all the computations, including the codingrange to
# (slope, intercept) calculation, using single precision
# float. The result isn't seriously wrong but values
# close to zero may see a noticeable shift.
#
# To get the result as accurate as possible, convert the
# entire array to float64, then apply the conversion,
# then convert back to float32. The compromise currently
# used is to accurately compute slope, intercept but do
# the scaling on float32 values. The difference compared
# to the most accurate case is hardly noticeable.
#
# See also InfoHeaderAccess::storagetofloat() in OpenZGY/C++.
#
data = data.astype(np.float32)
# In-place operators to avoid copying arrays again.
data *= a
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment