#include "benchmark_runner.h"
#include "benchmark/benchmark.h"
#include "benchmark_api_internal.h"
#include "internal_macros.h"
#ifndef BENCHMARK_OS_WINDOWS
#ifndef BENCHMARK_OS_FUCHSIA
#include <sys/resource.h>
#endif
#include <sys/time.h>
#include <unistd.h>
#endif
#include <algorithm>
#include <atomic>
#include <condition_variable>
#include <cstdio>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <memory>
#include <string>
#include <thread>
#include <utility>
#include "check.h"
#include "colorprint.h"
#include "commandlineflags.h"
#include "complexity.h"
#include "counter.h"
#include "internal_macros.h"
#include "log.h"
#include "mutex.h"
#include "perf_counters.h"
#include "re.h"
#include "statistics.h"
#include "string_util.h"
#include "thread_manager.h"
#include "thread_timer.h"
namespace benchmark {
namespace internal {
MemoryManager* memory_manager = nullptr;
namespace {
static constexpr IterationCount kMaxIterations = 1000000000;
BenchmarkReporter::Run CreateRunReport(
const benchmark::internal::BenchmarkInstance& b,
const internal::ThreadManager::Result& results,
IterationCount memory_iterations,
const MemoryManager::Result* memory_result, double seconds,
int64_t repetition_index, int64_t repeats) {
BenchmarkReporter::Run report;
report.run_name = b.name();
report.family_index = b.family_index();
report.per_family_instance_index = b.per_family_instance_index();
report.error_occurred = results.has_error_;
report.error_message = results.error_message_;
report.report_label = results.report_label_;
report.iterations = results.iterations;
report.time_unit = b.time_unit();
report.threads = b.threads();
report.repetition_index = repetition_index;
report.repetitions = repeats;
if (!report.error_occurred) {
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
} else {
report.real_accumulated_time = results.real_time_used;
}
report.cpu_accumulated_time = results.cpu_time_used;
report.complexity_n = results.complexity_n;
report.complexity = b.complexity();
report.complexity_lambda = b.complexity_lambda();
report.statistics = &b.statistics();
report.counters = results.counters;
if (memory_iterations > 0) {
assert(memory_result != nullptr);
report.memory_result = memory_result;
report.allocs_per_iter =
memory_iterations ? static_cast<double>(memory_result->num_allocs) /
memory_iterations
: 0;
}
internal::Finish(&report.counters, results.iterations, seconds,
b.threads());
}
return report;
}
void RunInThread(const BenchmarkInstance* b, IterationCount iters,
int thread_id, ThreadManager* manager,
PerfCountersMeasurement* perf_counters_measurement) {
internal::ThreadTimer timer(
b->measure_process_cpu_time()
? internal::ThreadTimer::CreateProcessCpuTime()
: internal::ThreadTimer::Create());
State st =
b->Run(iters, thread_id, &timer, manager, perf_counters_measurement);
BM_CHECK(st.error_occurred() || st.iterations() >= st.max_iterations)
<< "Benchmark returned before State::KeepRunning() returned false!";
{
MutexLock l(manager->GetBenchmarkMutex());
internal::ThreadManager::Result& results = manager->results;
results.iterations += st.iterations();
results.cpu_time_used += timer.cpu_time_used();
results.real_time_used += timer.real_time_used();
results.manual_time_used += timer.manual_time_used();
results.complexity_n += st.complexity_length_n();
internal::Increment(&results.counters, st.counters);
}
manager->NotifyThreadComplete();
}
}
BenchmarkRunner::BenchmarkRunner(
const benchmark::internal::BenchmarkInstance& b_,
BenchmarkReporter::PerFamilyRunReports* reports_for_family_)
: b(b_),
reports_for_family(reports_for_family_),
min_time(!IsZero(b.min_time()) ? b.min_time() : FLAGS_benchmark_min_time),
repeats(b.repetitions() != 0 ? b.repetitions()
: FLAGS_benchmark_repetitions),
has_explicit_iteration_count(b.iterations() != 0),
pool(b.threads() - 1),
iters(has_explicit_iteration_count ? b.iterations() : 1),
perf_counters_measurement(
PerfCounters::Create(StrSplit(FLAGS_benchmark_perf_counters, ','))),
perf_counters_measurement_ptr(perf_counters_measurement.IsValid()
? &perf_counters_measurement
: nullptr) {
run_results.display_report_aggregates_only =
(FLAGS_benchmark_report_aggregates_only ||
FLAGS_benchmark_display_aggregates_only);
run_results.file_report_aggregates_only =
FLAGS_benchmark_report_aggregates_only;
if (b.aggregation_report_mode() != internal::ARM_Unspecified) {
run_results.display_report_aggregates_only =
(b.aggregation_report_mode() &
internal::ARM_DisplayReportAggregatesOnly);
run_results.file_report_aggregates_only =
(b.aggregation_report_mode() & internal::ARM_FileReportAggregatesOnly);
BM_CHECK(FLAGS_benchmark_perf_counters.empty() ||
perf_counters_measurement.IsValid())
<< "Perf counters were requested but could not be set up.";
}
}
BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {
BM_VLOG(2) << "Running " << b.name().str() << " for " << iters << "\n";
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(b.threads()));
for (std::size_t ti = 0; ti < pool.size(); ++ti) {
pool[ti] = std::thread(&RunInThread, &b, iters, static_cast<int>(ti + 1),
manager.get(), perf_counters_measurement_ptr);
}
RunInThread(&b, iters, 0, manager.get(), perf_counters_measurement_ptr);
manager->WaitForAllThreads();
for (std::thread& thread : pool) thread.join();
IterationResults i;
{
MutexLock l(manager->GetBenchmarkMutex());
i.results = manager->results;
}
manager.reset();
i.results.real_time_used /= b.threads();
i.results.manual_time_used /= b.threads();
if (b.measure_process_cpu_time()) i.results.cpu_time_used /= b.threads();
BM_VLOG(2) << "Ran in " << i.results.cpu_time_used << "/"
<< i.results.real_time_used << "\n";
i.iters = i.results.iterations / b.threads();
i.seconds = i.results.cpu_time_used;
if (b.use_manual_time()) {
i.seconds = i.results.manual_time_used;
} else if (b.use_real_time()) {
i.seconds = i.results.real_time_used;
}
return i;
}
IterationCount BenchmarkRunner::PredictNumItersNeeded(
const IterationResults& i) const {
double multiplier = min_time * 1.4 / std::max(i.seconds, 1e-9);
bool is_significant = (i.seconds / min_time) > 0.1;
multiplier = is_significant ? multiplier : 10.0;
const IterationCount max_next_iters = static_cast<IterationCount>(
std::lround(std::max(multiplier * static_cast<double>(i.iters),
static_cast<double>(i.iters) + 1.0)));
const IterationCount next_iters = std::min(max_next_iters, kMaxIterations);
BM_VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n";
return next_iters; }
bool BenchmarkRunner::ShouldReportIterationResults(
const IterationResults& i) const {
return i.results.has_error_ ||
i.iters >= kMaxIterations || i.seconds >= min_time || ((i.results.real_time_used >= 5 * min_time) && !b.use_manual_time());
}
void BenchmarkRunner::DoOneRepetition() {
assert(HasRepeatsRemaining() && "Already done all repetitions?");
const bool is_the_first_repetition = num_repetitions_done == 0;
IterationResults i;
for (;;) {
b.Setup();
i = DoNIterations();
b.Teardown();
const bool results_are_significant = !is_the_first_repetition ||
has_explicit_iteration_count ||
ShouldReportIterationResults(i);
if (results_are_significant) break;
iters = PredictNumItersNeeded(i);
assert(iters > i.iters &&
"if we did more iterations than we want to do the next time, "
"then we should have accepted the current iteration run.");
}
MemoryManager::Result* memory_result = nullptr;
IterationCount memory_iterations = 0;
if (memory_manager != nullptr) {
memory_results.push_back(MemoryManager::Result());
memory_result = &memory_results.back();
memory_iterations = std::min<IterationCount>(16, iters);
memory_manager->Start();
std::unique_ptr<internal::ThreadManager> manager;
manager.reset(new internal::ThreadManager(1));
b.Setup();
RunInThread(&b, memory_iterations, 0, manager.get(),
perf_counters_measurement_ptr);
manager->WaitForAllThreads();
manager.reset();
b.Teardown();
BENCHMARK_DISABLE_DEPRECATED_WARNING
memory_manager->Stop(memory_result);
BENCHMARK_RESTORE_DEPRECATED_WARNING
}
BenchmarkReporter::Run report =
CreateRunReport(b, i.results, memory_iterations, memory_result, i.seconds,
num_repetitions_done, repeats);
if (reports_for_family) {
++reports_for_family->num_runs_done;
if (!report.error_occurred) reports_for_family->Runs.push_back(report);
}
run_results.non_aggregates.push_back(report);
++num_repetitions_done;
}
RunResults&& BenchmarkRunner::GetResults() {
assert(!HasRepeatsRemaining() && "Did not run all repetitions yet?");
run_results.aggregates_only = ComputeStats(run_results.non_aggregates);
return std::move(run_results);
}
}
}