Go高性能编程-不要使用Map缓存大量数据
1、起因
阿森在写代码的时候,有一个通知某个服务器上的所有在线用户的需求,这些用户的信息我是缓存在了Mmap中,大概的代码如下,我在遍历的时候,先是获取读锁,然后for range 遍历。但是这个时候这个节点的用户的上下线还是比较频繁的,大概有几百的qps,这些请求都会获取写锁。这个时候问题就来了。 遍历30w的map居然花了大概10ms,导致后续的写请求都被阻塞。
go
//简单缓存
type User struct {
ID int64
xxxx
}
type Cache Struct{
map[int64]*User
mutex
}
所以我看了下go的map的源码
go/src/runtime/map.go at master · golang/go (github.com)
发现go map的遍历效率相比数组来说是比较差的
-
遍历新 buckets
- 确认新buckets对应的旧buckets是否还有数据。
- 如果有,则遍历旧buckets中将分配到该新buckets中的数据;
- 如果没有,则遍历该新buckets
- 遍历bucket 中的所有 cell。每个 bucket 中包含 8 个 cell,从有 key 的 cell 中取出 key 和 value
-
遍历新 buckets 中的下一个 bucket,继续以上操作
go
type hmap struct {
// Note: the format of the hmap is also encoded in cmd/compile/internal/reflectdata/reflect.go.
// Make sure this stays in sync with the compiler's definition.
count int // # live cells == size of map. Must be first (used by len() builtin)
flags uint8
B uint8 // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
hash0 uint32 // hash seed
buckets unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
nevacuate uintptr // progress counter for evacuation (buckets less than this have been evacuated)
extra *mapextra // optional fields
}
type bmap struct {
tophash [bucketCnt]uint8
}
func mapiterinit(t *maptype, h *hmap, it *hiter) {
if raceenabled && h != nil {
callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiterinit))
}
it.t = t
if h == nil || h.count == 0 {
return
}
if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
}
it.h = h
// grab snapshot of bucket state
it.B = h.B
it.buckets = h.buckets
if !t.Bucket.Pointers() {
// Allocate the current slice and remember pointers to both current and old.
// This preserves all relevant overflow buckets alive even if
// the table grows and/or overflow buckets are added to the table
// while we are iterating.
h.createOverflow()
it.overflow = h.extra.overflow
it.oldoverflow = h.extra.oldoverflow
}
// decide where to start
r := uintptr(rand())
it.startBucket = r & bucketMask(h.B)
it.offset = uint8(r >> h.B & (abi.MapBucketCount - 1))
// iterator state
it.bucket = it.startBucket
// Remember we have an iterator.
// Can run concurrently with another mapiterinit().
if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
atomic.Or8(&h.flags, iterator|oldIterator)
}
mapiternext(it)
}
func mapiternext(it *hiter) {
h := it.h
if raceenabled {
callerpc := getcallerpc()
racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiternext))
}
if h.flags&hashWriting != 0 {
fatal("concurrent map iteration and map write")
}
t := it.t
bucket := it.bucket
b := it.bptr
i := it.i
checkBucket := it.checkBucket
next:
if b == nil {
if bucket == it.startBucket && it.wrapped {
// end of iteration
it.key = nil
it.elem = nil
return
}
if h.growing() && it.B == h.B {
// Iterator was started in the middle of a grow, and the grow isn't done yet.
// If the bucket we're looking at hasn't been filled in yet (i.e. the old
// bucket hasn't been evacuated) then we need to iterate through the old
// bucket and only return the ones that will be migrated to this bucket.
oldbucket := bucket & it.h.oldbucketmask()
b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.BucketSize)))
if !evacuated(b) {
checkBucket = bucket
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.BucketSize)))
checkBucket = noCheck
}
} else {
b = (*bmap)(add(it.buckets, bucket*uintptr(t.BucketSize)))
checkBucket = noCheck
}
bucket++
if bucket == bucketShift(it.B) {
bucket = 0
it.wrapped = true
}
i = 0
}
for ; i < abi.MapBucketCount; i++ {
offi := (i + it.offset) & (abi.MapBucketCount - 1)
if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty {
// TODO: emptyRest is hard to use here, as we start iterating
// in the middle of a bucket. It's feasible, just tricky.
continue
}
k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.KeySize))
if t.IndirectKey() {
k = *((*unsafe.Pointer)(k))
}
e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+uintptr(offi)*uintptr(t.ValueSize))
if checkBucket != noCheck && !h.sameSizeGrow() {
// Special case: iterator was started during a grow to a larger size
// and the grow is not done yet. We're working on a bucket whose
// oldbucket has not been evacuated yet. Or at least, it wasn't
// evacuated when we started the bucket. So we're iterating
// through the oldbucket, skipping any keys that will go
// to the other new bucket (each oldbucket expands to two
// buckets during a grow).
if t.ReflexiveKey() || t.Key.Equal(k, k) {
// If the item in the oldbucket is not destined for
// the current new bucket in the iteration, skip it.
hash := t.Hasher(k, uintptr(h.hash0))
if hash&bucketMask(it.B) != checkBucket {
continue
}
} else {
// Hash isn't repeatable if k != k (NaNs). We need a
// repeatable and randomish choice of which direction
// to send NaNs during evacuation. We'll use the low
// bit of tophash to decide which way NaNs go.
// NOTE: this case is why we need two evacuate tophash
// values, evacuatedX and evacuatedY, that differ in
// their low bit.
if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
continue
}
}
}
if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
!(t.ReflexiveKey() || t.Key.Equal(k, k)) {
// This is the golden data, we can return it.
// OR
// key!=key, so the entry can't be deleted or updated, so we can just return it.
// That's lucky for us because when key!=key we can't look it up successfully.
it.key = k
if t.IndirectElem() {
e = *((*unsafe.Pointer)(e))
}
it.elem = e
} else {
// The hash table has grown since the iterator was started.
// The golden data for this key is now somewhere else.
// Check the current hash table for the data.
// This code handles the case where the key
// has been deleted, updated, or deleted and reinserted.
// NOTE: we need to regrab the key as it has potentially been
// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
rk, re := mapaccessK(t, h, k)
if rk == nil {
continue // key has been deleted
}
it.key = rk
it.elem = re
}
it.bucket = bucket
if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
it.bptr = b
}
it.i = i + 1
it.checkBucket = checkBucket
return
}
b = b.overflow(t)
i = 0
goto next
}
数组的遍历就很好理解了,这里就不上源码了,大概就是找到数组的开始位置直接在连续内存块上扫描
在存储50w数据的时候,数组的遍历效率大概是map的10倍,数量越大效率越高
2、延展
既然数组的遍历效率比map高这么多,那么是否还有其他的性能影响呢?
这个时候我比较了下同样的数量的[]User 和map[int64] User的gc耗时
发现在储存500000
数据量的场景下 数组的gc的耗时居然也比Map快上了5倍
*cpu: m1 pro
*memory: 32G
*Go map gc x 10 cost: 235.258125ms.
*Slice map gc x 10 cost: 43.631375ms.
3、改造
很明显,我们直接用map缓存大量数据的时候,无论是gc还是遍历效率,都是非常低的。
所以我计划用数组和map结合实现一个遍历效率和数组一样,而且插入更新操作和Map一样
go
type KV[K comparable] struct {
Key K
Value any
}
func (kv *KV[K]) GetKey() K {
return kv.Key
}
type SliceMap[K comparable] struct {
DataSlice []*KV[K]
IndexMap map[K]int
maxIndex int
maxMaxNoUseCount int
maxNoUsePercent float64
}
数组里面储存实际的指针,map里面存储key对应的数组的index这样既能获得数组的高效便利和更快GC,还能获得map的快速插入删除更新。代价也很明显内存cost变多了。
你可以在此查看代码和文档:hswell/slicemap (github.com)
能点个赞就最好了