Commit 78f0da23 authored by Paal Kvamme's avatar Paal Kvamme
Browse files

Add performance related logging enabled via environment variables.

parent d542d430
......@@ -18,6 +18,7 @@
#include "../exception.h"
#include "timer.h"
#include "environment.h"
#include "file_performance.h"
#include <vector>
#include <string>
......@@ -150,8 +151,12 @@ LocalFileLinux::~LocalFileLinux()
std::shared_ptr<FileADT>
LocalFileLinux::xx_make_instance(const std::string& filename, OpenMode mode, const IOContext *iocontext)
{
if (filename.find("://") == std::string::npos)
return std::shared_ptr<FileADT>(new LocalFileLinux(filename, mode, iocontext));
if (filename.find("://") == std::string::npos) {
auto file = std::shared_ptr<FileADT>(new LocalFileLinux(filename, mode, iocontext));
// This is a no-op unless enabled by enviroment variables
file = FileWithPerformanceLogger::inject(file);
return file;
}
else
return std::shared_ptr<FileADT>();
}
......
// Copyright 2017-2020, Schlumberger
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "file_performance.h"
#include "timer.h"
#include "environment.h"
#include "../exception.h"
#include <string.h>
#include <iostream>
#include <sstream>
#include <cmath>
namespace InternalZGY {
#if 0
}
#endif
FileWithPerformanceLogger::FileWithPerformanceLogger(std::shared_ptr<FileADT> relay, std::int64_t chunksize, int hist_bincount, double hist_min, double hist_max)
: _relay(relay)
, _chunksize(chunksize)
, _mutex()
, _nsamples(0)
, _histmin(hist_min)
, _histmax(hist_max)
, _statmin(std::numeric_limits<double>::max())
, _statmax(std::numeric_limits<double>::lowest())
, _statsum(0)
, _statssq(0)
, _histbins(hist_bincount, 0)
{
}
FileWithPerformanceLogger::~FileWithPerformanceLogger()
{
std::string str = dump(false);
if (!str.empty())
std::cout << str << std::flush;
}
void
FileWithPerformanceLogger::xx_read(void *data, std::int64_t offset, std::int64_t size, UsageHint usagehint)
{
if (size == _chunksize || _chunksize < 0) {
Timer timer;
_relay->xx_read(data, offset, size, usagehint);
timer.stop();
add(timer.getTotal()*1000, size);
}
else {
_relay->xx_read(data, offset, size, usagehint);
}
}
void
FileWithPerformanceLogger::xx_readv(const ReadList& requests, bool parallel_ok, bool immutable_ok, bool transient_ok, UsageHint usagehint)
{
// This is only effective in the fully random access case,
// To include the "consolidate" case I might need to implenent it
// inside SeismicStoreFile:. Or make the entire consolidate logic
// into a separate module that can be chained. Either way I may need
// multiple histograms to cover different brick sizes.
if ((requests.size() == 1 && requests.front().size == _chunksize) || _chunksize < 0) {
Timer timer;
_relay->xx_readv(requests, parallel_ok, immutable_ok, transient_ok, usagehint);
timer.stop();
std::int64_t size = 0;
for (const ReadRequest& it : requests)
size += it.size;
add(timer.getTotal()*1000, size);
}
else {
_relay->xx_readv(requests, parallel_ok, immutable_ok, transient_ok, usagehint);
}
}
void
FileWithPerformanceLogger::xx_write(const void* data, std::int64_t offset, std::int64_t size, UsageHint usagehint=UsageHint::Unknown)
{
_relay->xx_write(data, offset, size, usagehint);
}
void
FileWithPerformanceLogger::xx_close()
{
_relay->xx_close();
std::string str = dump(true);
if (!str.empty())
std::cout << str << std::flush;
}
void
FileWithPerformanceLogger::add(double value, std::int64_t blocksize)
{
// Note _hist{min,max} is the center of first and last bin.
// Example: min=0, max=1000, nbuckets = 101
// gives binwidth=10, bin(3) = 0, bin(7)=1
// which is correct because bin 0 covers 0 +/- 5.
const int nbuckets = static_cast<int>(_histbins.size());
const double binwidth = (_histmax - _histmin) / (nbuckets - 1);
const double fbin = (value - _histmin) / binwidth;
const int ibin = (fbin < 0 ? 0 :
fbin > (nbuckets-1) ? nbuckets-1 :
std::floor(fbin+0.5));
std::lock_guard<std::mutex> lk(_mutex);
_histbins[ibin] += 1;
_nsamples += 1;
_statsum += value;
_statssq += value*value;
if (_statmin > value) _statmin = value;
if (_statmax < value) _statmax = value;
}
/**
* Format the results as CSV that is simple to paste into a spreadsheet.
* Use a distinct start/end marker so the csv data can be easily
* extracted from a log file. Even if the previous line did not end in
* a newline.
*
* Quick instructions for the spreadsheet:
* Select the CSV3,CSV4,CSV5 cells except for the first and last columns.
* Make a line graph: data series in rows, first row and first column as label.
* This shows distibution of latency as well as cumulative counts; the latter
* can be used to get the percentile of samples below a certain latency.
*/
std::string
FileWithPerformanceLogger::dump(bool clear)
{
std::lock_guard<std::mutex> lk(_mutex);
std::stringstream ss;
if (_nsamples != 0) {
const int nbins = static_cast<int>(_histbins.size());
const double binwidth = (_histmax - _histmin) / (nbins - 1);
ss << "CSV1,Samplecount,Histogram min,Histogram max"
<< ",Statistic min,Statistic max,Statistic average,END\n"
<< "CSV2," << _nsamples
<< "," << _histmin << "," << _histmax
<< "," << _statmin << "," << _statmax
<< "," << _statsum / _nsamples
<< ",END\n";
// Dump center value of each histogram bin.
// Note that CSV3, CSV4, CSV5 can all be computed in a
// spreadsheet using a simple formula. Bit I'd like to
// have an (almost) single-click way of making the graph.
ss << "CSV3,Latency";
for (int ii=0; ii<nbins; ++ii)
ss << "," << _histmin + binwidth * ii;
ss << ",END\n";
// Dump the bins as percentages
ss << "CSV4,Frequency%";
for (const std::int64_t count : _histbins)
ss << "," << count / (double)_nsamples;
ss << ",END\n";
// Dump the cumulative counts.
ss << "CSV5,Cumulative%";
std::int64_t summed = 0;
for (const std::int64_t count : _histbins) {
summed += count;
ss << "," << summed / (double)_nsamples;
}
ss << ",END\n";
// Dump the bins as raw counts
ss << "CSV6,Count";
for (const std::int64_t count : _histbins)
ss << "," << count;
ss << ",END\n";
if (clear) {
_nsamples = 0;
_statmin = std::numeric_limits<double>::max();
_statmax = std::numeric_limits<double>::lowest();
_statsum= 0;
_statssq = 0;
_histbins = std::vector<std::int64_t>(_histbins.size(), 0);
}
}
return ss.str();
}
/**
* Inject a telemetry module if enabled by environment variables.
* If not enabled the telemetry code has zero impact on the system.
*
* OPENZGY_MEASURE_KB = brick size to monitor, or -1 for all reads.
* OPENZGY_MEASURE_BINS = bins in histogram (default 251)
* OPENZGY_MEASURE_TIME = highest latency in ms (default 500)
*/
std::shared_ptr<FileADT>
FileWithPerformanceLogger::inject(std::shared_ptr<FileADT> file)
{
int target = Environment::getNumericEnv("OPENZGY_MEASURE_KB", 0);
if (target != 0) {
int bincount = Environment::getNumericEnv("OPENZGY_MEASURE_BINS", 251);
int maxtime = Environment::getNumericEnv("OPENZGY_MEASURE_TIME", 500);
file = std::shared_ptr<FileADT>(new FileWithPerformanceLogger(file, target*1024, bincount, 0.0, maxtime));
}
return file;
}
} // namespace
// Copyright 2017-2020, Schlumberger
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
/**
* \file: file_performance.h
* \brief FileADT wrapper logging performance statistics.
*/
#include <cstdint>
#include <vector>
#include <string>
#include <memory>
#include <mutex>
#include "file.h"
namespace InternalZGY {
#if 0
}
#endif
/**
* \brief FileADT wrapper logging performance statistics.
*/
class FileWithPerformanceLogger : public FileADT
{
private:
std::shared_ptr<FileADT> _relay;
std::int64_t _chunksize;
mutable std::mutex _mutex;
std::int64_t _nsamples;
double _histmin, _histmax, _statmin, _statmax, _statsum, _statssq;
std::vector<std::int64_t> _histbins;
FileWithPerformanceLogger(const FileWithPerformanceLogger&) = delete;
FileWithPerformanceLogger& operator=(const FileWithPerformanceLogger&) = delete;
public:
explicit FileWithPerformanceLogger(std::shared_ptr<FileADT> relay, std::int64_t chunksize, int hist_bincount, double hist_min, double hist_max);
virtual ~FileWithPerformanceLogger();
virtual void xx_read(void *data, std::int64_t offset, std::int64_t size, UsageHint usagehint) override;
virtual void xx_readv(const ReadList& requests, bool parallel_ok, bool immutable_ok, bool transient_ok, UsageHint usagehint) override;
virtual void xx_write(const void* data, std::int64_t offset, std::int64_t size, UsageHint usagehint) override;
virtual void xx_close() override;
virtual std::int64_t xx_eof() const override { return _relay->xx_eof(); }
virtual bool xx_iscloud() const override { return _relay->xx_iscloud(); }
public:
void add(double value, std::int64_t blocksize);
std::string dump(bool clear);
static std::shared_ptr<FileADT> inject(std::shared_ptr<FileADT> file);
};
} // namespace
......@@ -18,6 +18,7 @@
#include "logger.h"
#include "file_consolidate.h"
#include "file_performance.h"
#include "../exception.h"
#include "../iocontext.h"
......@@ -964,8 +965,12 @@ std::shared_ptr<FileADT>
SeismicStoreFile::xx_make_instance(const std::string& filename, OpenMode mode, const IOContext *iocontext)
{
if (filename.substr(0, 5) == "sd://" &&
(mode != OpenMode::ReadWrite && mode != OpenMode::Truncate))
return std::shared_ptr<FileADT>(new SeismicStoreFile(filename, mode, iocontext));
(mode != OpenMode::ReadWrite && mode != OpenMode::Truncate)) {
auto file = std::shared_ptr<FileADT>(new SeismicStoreFile(filename, mode, iocontext));
// This is a no-op unless enabled by enviroment variables
file = FileWithPerformanceLogger::inject(file);
return file;
}
else
return std::shared_ptr<FileADT>();
}
......
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
......@@ -97,6 +97,7 @@
<ClCompile Include="..\src\impl\file.cpp" />
<ClCompile Include="..\src\impl\file_consolidate.cpp" />
<ClCompile Include="..\src\impl\file_local.cpp" />
<ClCompile Include="..\src\impl\file_performance.cpp" />
<ClCompile Include="..\src\impl\file_windows.cpp" />
<ClCompile Include="..\src\impl\file_sd.cpp" />
<ClCompile Include="..\src\impl\genlod.cpp" />
......@@ -132,6 +133,7 @@
<ClInclude Include="..\src\impl\environment.h" />
<ClInclude Include="..\src\impl\file.h" />
<ClInclude Include="..\src\impl\file_consolidate.h" />
<ClInclude Include="..\src\impl\file_performance.h" />
<ClInclude Include="..\src\impl\file_sd.h" />
<ClInclude Include="..\src\impl\genlod.h" />
<ClInclude Include="..\src\impl\guid.h" />
......
......@@ -47,6 +47,12 @@
<ClCompile Include="..\src\impl\file_windows.cpp">
<Filter>src\impl</Filter>
</ClCompile>
<ClCompile Include="..\src\impl\file_consolidate.cpp">
<Filter>src\impl</Filter>
</ClCompile>
<ClCompile Include="..\src\impl\file_performance.cpp">
<Filter>src\impl</Filter>
</ClCompile>
<ClCompile Include="..\src\impl\file_sd.cpp">
<Filter>src\impl</Filter>
</ClCompile>
......@@ -136,6 +142,12 @@
<ClInclude Include="..\src\impl\file.h">
<Filter>src\impl</Filter>
</ClInclude>
<ClInclude Include="..\src\impl\file_consolidate.h">
<Filter>src\impl</Filter>
</ClInclude>
<ClInclude Include="..\src\impl\file_performance.h">
<Filter>src\impl</Filter>
</ClInclude>
<ClInclude Include="..\src\impl\file_sd.h">
<Filter>src\impl</Filter>
</ClInclude>
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment