改造benchmark,运行中获取ns耗时
double State::real_time_used() const {
return timer_->real_time_used();
}
定义测试套件
cpp
class PerfTest : public benchmark::Fixture {
public:
void SetUp(benchmark::State& state) override
{
Unit(benchmark::kSecond);
ThreadRange(1, 1);
MinWarmUpTime(1);
Test(); // 预热一次,防止RAII首次波动过大
}
void TearDown(benchmark::State& state) override
{
}
void Test() {
// 初始化资源单例
}
// 添加预期时间设置和检查方法
void SetExpectedTime(double seconds)
{
expected_time_ = seconds;
}
void report(benchmark::State& state, size_t& fCount, size_t& sCount)
{
if (expected_time_ > 0) {
double actual_time = state.real_time_used() / state.iterations();
auto computeScale = refCpuCycles_per_second / benchmark::CPUInfo::Get().
cycles_per_second; // 根据cpu频率动态调整预期耗时,当前cpu频率越低,预估耗时越高
actual_time *= computeScale;
// printf("cost time: %f s\n", actual_time);
actual_time > expected_time_ ? ++fCount : ++sCount;
if (fCount) {
char msg[1024] = { 0 };
snprintf(msg, 1024, "fail actual %fs,%fHZ - expect %fs,%fHZ = %fs", actual_time,
benchmark::CPUInfo::Get().cycles_per_second, expected_time_,
refCpuCycles_per_second, actual_time - expected_time_);
state.SkipWithError(msg);
}
}
}
void copyPaste3WTest(benchmark::State& state)
{
state.PauseTiming();
// prepare resource
state.ResumeTiming(); // 告诉反射框架需要准备的数据是什么
// do some thing
}
private:
double expected_time_ = 0.0; // 预期耗时
double refCpuCycles_per_second = 4503077393; // cpu参考HZ
};
BENCHMARK_F(PerfTest, sleepTest)(benchmark::State& state)
{
static size_t sCount = 0;
static size_t fCount = 0;
// 设置预期时间(单位:秒),例如0.5秒
SetExpectedTime(1.1);
Complexity();
for (auto _ : state) {
_sleep(1000);
}
// 检查结果是否达标
report(state, fCount, sCount);
state.counters["F"] = fCount;
state.counters["S"] = sCount;
}
输出基准报告
Benchmark Time CPU Iterations UserCounters...
PerfTest/sleepTest 1010025320 ns 0.000 ns 10 F=0 S=2
PerfTest/sleepTest ERROR OCCURRED: 'fail actual 0.000699s,4559561393.000000HZ - expect 0.000500s,4503077393.000000HZ = 0.000199s'
使用
cpp
#include <benchmark/benchmark.h>
#include <array>
constexpr int len = 6;
// constexpr function具有inline属性,你应该把它放在头文件中
constexpr auto my_pow(const int i)
{
return i * i;
}
// 使用operator[]读取元素,依次存入1-6的平方
static void bench_array_operator(benchmark::State& state)
{
std::array<int, len> arr;
constexpr int i = 1;
for (auto _ : state) {
arr[0] = my_pow(i);
arr[1] = my_pow(i + 1);
arr[2] = my_pow(i + 2);
arr[3] = my_pow(i + 3);
arr[4] = my_pow(i + 4);
arr[5] = my_pow(i + 5);
}
}
BENCHMARK(bench_array_operator)->Iterations(10)->Repetitions(3)->Unit(benchmark::kMillisecond)
->ComputeStatistics("max", [](const std::vector<double>& v)->double {
return *std::max_element(v.begin(), v.end());
}, benchmark::kTime)
->ComputeStatistics("min", [](const std::vector<double>& v)->double {
return *std::min_element(v.begin(), v.end());
}, benchmark::kTime);
// 使用at()读取元素,依次存入1-6的平方
static void bench_array_at(benchmark::State& state)
{
std::array<int, len> arr;
constexpr int i = 1;
for (auto _ : state) {
arr.at(0) = my_pow(i);
arr.at(1) = my_pow(i + 1);
arr.at(2) = my_pow(i + 2);
arr.at(3) = my_pow(i + 3);
arr.at(4) = my_pow(i + 4);
arr.at(5) = my_pow(i + 5);
}
}
BENCHMARK(bench_array_at);
// std::get<>(array)是一个constexpr function,它会返回容器内元素的引用,并在编译期检查数组的索引是否正确
static void bench_array_get(benchmark::State& state)
{
std::array<int, len> arr;
constexpr int i = 1;
for (auto _ : state) {
std::get<0>(arr) = my_pow(i);
std::get<1>(arr) = my_pow(i + 1);
std::get<2>(arr) = my_pow(i + 2);
std::get<3>(arr) = my_pow(i + 3);
std::get<4>(arr) = my_pow(i + 4);
std::get<5>(arr) = my_pow(i + 5);
}
}
BENCHMARK(bench_array_get);
//BENCHMARK(bench_array_ring_insert_int)->Arg(10)->Arg(100)->Arg(1000);
//BENCHMAEK(func)->Range(int64_t start, int64_t limit);
// #RangeMultiplier 每次乘倍数,默认从start起每次累乘8,一直达到end
//BENCHMARK(bench_array_ring_insert_int)->RangeMultiplier(10)->Range(10, 1000);
//BENCHMARK(func)->RangeMultiplier(10)->Ranges({ {10, 1000}, {128, 256} });
//BENCHMARK(func)->Args({ 10, 128 })
//->Args({ 100, 128 })
//->Args({ 1000, 128 })
//->Args({ 10, 256 })
//->Args({ 100, 256 })
//->Args({ 1000, 256 })
// 这次我们生成100,200,...,1000的测试用例,用range是无法生成这些参数的
//static void custom_args(benchmark::internal::Benchmark* b)
//{
// for (int i = 100; i <= 1000; i += 100) {
// b->Arg(i);
// }
//}
//
//BENCHMARK(bench_array_ring_insert_int)->RangeMultiplier(10)->Apply(custom_args);
template <typename T, std::size_t length, bool is_reserve = true>
void bench_vector_reserve(benchmark::State& state)
{
for (auto _ : state) {
std::vector<T> container;
if constexpr (is_reserve) {
container.reserve(length);
}
for (std::size_t i = 0; i < length; ++i) {
container.push_back(T{});
}
}
}
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 100);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 1000);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 10000);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 100000);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 100, false);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 1000, false);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 10000, false);
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 100000, false);
//BENCHMARK_TEMPLATE(bench_vector_reserve, std::string)->RangeMultiplier(10)->Range(10, 10000 * 10);
//BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, false)->RangeMultiplier(10)->Range(10, 10000 * 10);
//累加,每次加1000
BENCHMARK_TEMPLATE(bench_vector_reserve, std::string, 100)->DenseRange(1000, 100 * 100, 1000);
template <typename T, bool is_reserve = true>
void bench_vector_reserve2(benchmark::State& state)
{
for (auto _ : state) {
std::vector<T> container;
if constexpr (is_reserve) {
container.reserve(state.range(0));
}
for (std::size_t i = 0; i < state.range(0); ++i) {
container.push_back(T{});
}
}
}
//BENCHMARK(BM_test)->ArgsProduct({ {"a", "b", "c", "d"}, {1, 2, 3, 4} });
//
//// 等价于下面的
//BENCHMARK(BM_test)->Args({ "a", 1 })
//->Args({ "a", 2 })
//->Args({ "a", 3 })
//->Args({ "a", 4 })
//->Args({ "b", 1 })
//->Args({ "b", 2 })
//->Args({ "b", 3 })
//...
//->Args({ "d", 3 })
//->Args({ "d", 4 })
//ArgPair
//
//benchmark::CreateRange(8, 128, /*multi=*/2) // 生成:[8, 16, 32, 64, 128]
//benchmark::CreateDenseRange(1, 6, /*step=*/1) // 生成:[1, 2, 3, 4, 5, 6]
class MyClass {
public:
long i = 2L;
MyClass() { i = 2L; }
virtual ~MyClass() {}
virtual long get() { return i; }
MyClass& operator=(MyClass&&) = delete;
MyClass(const MyClass& obj) {
i = obj.i;
}
MyClass& operator=(const MyClass& obj) {
i = obj.i;
}
};
BENCHMARK_TEMPLATE(bench_vector_reserve2, std::string)->ArgsProduct({
benchmark::CreateRange(10, 10000 * 10, 10)
});
BENCHMARK_TEMPLATE(bench_vector_reserve2, std::string, false)->ArgsProduct({
benchmark::CreateRange(10, 10000 * 10, 10)
});
BENCHMARK_TEMPLATE(bench_vector_reserve2, std::size_t)->ArgsProduct({
benchmark::CreateRange(10, 10000 * 10, 10)
});
BENCHMARK_TEMPLATE(bench_vector_reserve2, std::size_t, false)->ArgsProduct({
benchmark::CreateRange(10, 10000 * 10, 10)
});
BENCHMARK_TEMPLATE(bench_vector_reserve2, MyClass)->ArgsProduct({
benchmark::CreateRange(10, 10000 * 10, 10)
});
BENCHMARK_TEMPLATE(bench_vector_reserve2, MyClass, false)->ArgsProduct({
benchmark::CreateRange(10, 10000 * 10, 10)
});
//#define generate_test(type) \
// BENCHMARK_TEMPLATE(bench_vector_reserve, type)->ArgsProduct({benchmark::CreateRange(10, 100000, 10)}); \
// BENCHMARK_TEMPLATE(bench_vector_reserve, type, false)->ArgsProduct({benchmark::CreateRange(10, 100000, 10)});
//
//generate_test(std::string);
//generate_test(std::size_t);
//generate_test(MyClass);
// 这里都是为了演示而写成的代码,没有什么实际意义
static void bench_N(benchmark::State& state)
{
int n = 0;
for ([[maybe_unused]] auto _ : state) {
for (int i = 0; i < state.range(0); ++i) {
benchmark::DoNotOptimize(n += 2); // 这个函数防止编译器将表达式优化,会略微降低一些性能
}
}
state.SetComplexityN(state.range(0));
}
BENCHMARK(bench_N)->RangeMultiplier(10)->Range(10, 1000000)->Complexity();
static void bench_LogN(benchmark::State& state)
{
int n = 0;
for ([[maybe_unused]] auto _ : state) {
for (int i = 1; i < state.range(0); i *= 2) {
benchmark::DoNotOptimize(n += 2);
}
}
state.SetComplexityN(state.range(0));
}
BENCHMARK(bench_LogN)->RangeMultiplier(10)->Range(10, 1000000)->Complexity(benchmark::oLogN);
static void bench_Square(benchmark::State& state)
{
int n = 0;
auto len = state.range(0);
for ([[maybe_unused]] auto _ : state) {
for (int64_t i = 1; i < len * len; ++i) {
benchmark::DoNotOptimize(n += 2);
}
}
state.SetComplexityN(len);
}
BENCHMARK(bench_Square)->RangeMultiplier(10)->Range(10, 100000)->Complexity();
//使用Fixture
class BMDemo : public benchmark::Fixture {
public:
void SetUp(const benchmark::State& state) {
Unit(benchmark::kSecond);
id_ = 2;
}
void TearDown(const ::benchmark::State& state) {
id_ = 0;
}
int GetId() const { return id_; };
private:
int id_{ 0 };
};
BENCHMARK_F(BMDemo, Test0)(benchmark::State& state) {
for (auto _ : state) {
printf("id:%d", GetId());
}
}
BENCHMARK_F(BMDemo, Test1)(benchmark::State& state) {
for (auto _ : state) {
printf("id:%d", GetId());
}
}
BENCHMARK_MAIN();
静态库使用
add_definitions(-DBENCHMARK_STATIC_DEFINE)
参考
https://github.com/google/benchmark
创作不易,小小的支持一下吧!

