背景
go 必须要 test 才能跑benchmark,导致一些情况下想要在main函数中测试benchmark会麻烦一些,因此我实现了一个简单的且没有开销的benchmark函数,方便使用!其次也方便大家学习下如何实现一个零开销的benchmark框架!
benchamrk 实现
对于有timeout的benchamrk,每次都去比较 time.Before(timeout) 开销非常的大,而且 benchmark 的对外函数也不能是一个空函数一定要带 count ,因为函数调用大概会劣化 ns 级别!
所以一般的benchmark算法都是梯度benchmark,即 1,10,100,1000,10000,100000,1000000 ... 的数量级去benchmark,好处就是避免了大量的time.Since 计算开销,因为time.Since单次在 30ns 左右(Linux环境下),开销非常大的!
go
package pprof
import (
"fmt"
"sync"
"sync/atomic"
"time"
)
func ParallelBenchmark(name string, thread int, duration time.Duration, execute func(count int)) {
wg := sync.WaitGroup{}
wg.Add(thread)
totalCount := uint64(0)
totalSpend := uint64(0)
for i := 0; i < thread; i++ {
go func() {
defer wg.Done()
spend, count := Benchmark(duration, execute)
atomic.AddUint64(&totalSpend, uint64(spend))
atomic.AddUint64(&totalCount, uint64(count))
}()
}
wg.Wait()
fmt.Printf("name=%s thread=%d duration=%s total=%d avg=%s\n", name, thread, duration, totalCount, Avg(time.Duration(totalSpend), int(totalCount)))
}
func Avg(spend time.Duration, count int) string {
avg := float64(spend) / float64(count)
if avg > 100 {
return time.Duration(avg).String()
}
return fmt.Sprintf("%.4fns", avg)
}
func Benchmark(duration time.Duration, bench func(count int)) (time.Duration, int) {
const maxTotalCount = 1000000000 // 10E
count := 1
totalSpend := time.Duration(0)
totalCount := 0
for {
start := time.Now()
bench(count)
spend := time.Since(start)
totalSpend = totalSpend + spend
totalCount = totalCount + count
if totalCount >= maxTotalCount {
break
}
subSpend := duration - totalSpend
if subSpend <= 0 {
break
}
count = totalCount*10 - totalCount
if subCount := int(float64(subSpend) / (float64(totalSpend) / float64(totalCount))); count > subCount {
count = subCount
}
}
return totalSpend, totalCount
}
profile 实现
go
package pprof
import (
"net/http"
_ "net/http/pprof"
"os"
"runtime"
"runtime/pprof"
)
// InitPProf
// go InitPProf()
func InitPProf() {
err := http.ListenAndServe(":12345", http.DefaultServeMux)
if err != nil {
panic(err)
}
}
func StartCPUProfile(fileName string) (stop func()) {
f, err := os.Create(fileName)
if err != nil {
panic(err)
}
if err := pprof.StartCPUProfile(f); err != nil {
if err := f.Close(); err != nil {
panic(err)
}
panic(err)
}
return func() {
pprof.StopCPUProfile()
if err := f.Close(); err != nil {
panic(err)
}
}
}
func StartMemProfile(fileName string) (stop func()) {
f, err := os.Create(fileName)
if err != nil {
panic(err)
}
return func() {
defer func() {
if err := f.Close(); err != nil {
panic(err)
}
}()
runtime.GC() // get up-to-date statistics
if err := pprof.WriteHeapProfile(f); err != nil {
panic(err)
}
}
}
例子
go
package main
import (
"github.com/anthony-dong/golang/pkg/pprof"
"sync"
"time"
)
func main() {
// 记录 cup pprof
//stop := pprof.StartCPUProfile("cpu.out")
//defer stop()
// 并发测试 sync map的性能
mm := sync.Map{}
pprof.ParallelBenchmark("test1", 64, time.Second, func(count int) {
for i := 0; i < count; i++ {
mm.Store(i%10000, 1)
}
})
// name=test1 thread=32 duration=1s total=6708009 avg=4.772µs
// name=test1 thread=64 duration=1s total=6883456 avg=9.3µs
}
=