# Go高性能编程-不要使用Map缓存大量数据

Go高性能编程-不要使用Map缓存大量数据

1、起因

阿森在写代码的时候,有一个通知某个服务器上的所有在线用户的需求,这些用户的信息我是缓存在了Mmap中,大概的代码如下,我在遍历的时候,先是获取读锁,然后for range 遍历。但是这个时候这个节点的用户的上下线还是比较频繁的,大概有几百的qps,这些请求都会获取写锁。这个时候问题就来了。 遍历30w的map居然花了大概10ms,导致后续的写请求都被阻塞。

go 复制代码
//简单缓存
type User struct {
	ID int64 
	xxxx
}

type Cache Struct{
	map[int64]*User
	mutex 
}

所以我看了下go的map的源码

go/src/runtime/map.go at master · golang/go (github.com)

发现go map的遍历效率相比数组来说是比较差的

  1. 遍历新 buckets

    1. 确认新buckets对应的旧buckets是否还有数据。
    2. 如果有,则遍历旧buckets中将分配到该新buckets中的数据;
    3. 如果没有,则遍历该新buckets
    4. 遍历bucket 中的所有 cell。每个 bucket 中包含 8 个 cell,从有 key 的 cell 中取出 key 和 value
  2. 遍历新 buckets 中的下一个 bucket,继续以上操作

go 复制代码
type hmap struct {
	// Note: the format of the hmap is also encoded in cmd/compile/internal/reflectdata/reflect.go.
	// Make sure this stays in sync with the compiler's definition.
	count     int // # live cells == size of map.  Must be first (used by len() builtin)
	flags     uint8
	B         uint8  // log_2 of # of buckets (can hold up to loadFactor * 2^B items)
	noverflow uint16 // approximate number of overflow buckets; see incrnoverflow for details
	hash0     uint32 // hash seed

	buckets    unsafe.Pointer // array of 2^B Buckets. may be nil if count==0.
	oldbuckets unsafe.Pointer // previous bucket array of half the size, non-nil only when growing
	nevacuate  uintptr        // progress counter for evacuation (buckets less than this have been evacuated)

	extra *mapextra // optional fields
}

type bmap struct {
	tophash [bucketCnt]uint8
}

func mapiterinit(t *maptype, h *hmap, it *hiter) {
	if raceenabled && h != nil {
		callerpc := getcallerpc()
		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiterinit))
	}

	it.t = t
	if h == nil || h.count == 0 {
		return
	}

	if unsafe.Sizeof(hiter{})/goarch.PtrSize != 12 {
		throw("hash_iter size incorrect") // see cmd/compile/internal/reflectdata/reflect.go
	}
	it.h = h

	// grab snapshot of bucket state
	it.B = h.B
	it.buckets = h.buckets
	if !t.Bucket.Pointers() {
		// Allocate the current slice and remember pointers to both current and old.
		// This preserves all relevant overflow buckets alive even if
		// the table grows and/or overflow buckets are added to the table
		// while we are iterating.
		h.createOverflow()
		it.overflow = h.extra.overflow
		it.oldoverflow = h.extra.oldoverflow
	}

	// decide where to start
	r := uintptr(rand())
	it.startBucket = r & bucketMask(h.B)
	it.offset = uint8(r >> h.B & (abi.MapBucketCount - 1))

	// iterator state
	it.bucket = it.startBucket

	// Remember we have an iterator.
	// Can run concurrently with another mapiterinit().
	if old := h.flags; old&(iterator|oldIterator) != iterator|oldIterator {
		atomic.Or8(&h.flags, iterator|oldIterator)
	}

	mapiternext(it)
}

func mapiternext(it *hiter) {
	h := it.h
	if raceenabled {
		callerpc := getcallerpc()
		racereadpc(unsafe.Pointer(h), callerpc, abi.FuncPCABIInternal(mapiternext))
	}
	if h.flags&hashWriting != 0 {
		fatal("concurrent map iteration and map write")
	}
	t := it.t
	bucket := it.bucket
	b := it.bptr
	i := it.i
	checkBucket := it.checkBucket

next:
	if b == nil {
		if bucket == it.startBucket && it.wrapped {
			// end of iteration
			it.key = nil
			it.elem = nil
			return
		}
		if h.growing() && it.B == h.B {
			// Iterator was started in the middle of a grow, and the grow isn't done yet.
			// If the bucket we're looking at hasn't been filled in yet (i.e. the old
			// bucket hasn't been evacuated) then we need to iterate through the old
			// bucket and only return the ones that will be migrated to this bucket.
			oldbucket := bucket & it.h.oldbucketmask()
			b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.BucketSize)))
			if !evacuated(b) {
				checkBucket = bucket
			} else {
				b = (*bmap)(add(it.buckets, bucket*uintptr(t.BucketSize)))
				checkBucket = noCheck
			}
		} else {
			b = (*bmap)(add(it.buckets, bucket*uintptr(t.BucketSize)))
			checkBucket = noCheck
		}
		bucket++
		if bucket == bucketShift(it.B) {
			bucket = 0
			it.wrapped = true
		}
		i = 0
	}
	for ; i < abi.MapBucketCount; i++ {
		offi := (i + it.offset) & (abi.MapBucketCount - 1)
		if isEmpty(b.tophash[offi]) || b.tophash[offi] == evacuatedEmpty {
			// TODO: emptyRest is hard to use here, as we start iterating
			// in the middle of a bucket. It's feasible, just tricky.
			continue
		}
		k := add(unsafe.Pointer(b), dataOffset+uintptr(offi)*uintptr(t.KeySize))
		if t.IndirectKey() {
			k = *((*unsafe.Pointer)(k))
		}
		e := add(unsafe.Pointer(b), dataOffset+abi.MapBucketCount*uintptr(t.KeySize)+uintptr(offi)*uintptr(t.ValueSize))
		if checkBucket != noCheck && !h.sameSizeGrow() {
			// Special case: iterator was started during a grow to a larger size
			// and the grow is not done yet. We're working on a bucket whose
			// oldbucket has not been evacuated yet. Or at least, it wasn't
			// evacuated when we started the bucket. So we're iterating
			// through the oldbucket, skipping any keys that will go
			// to the other new bucket (each oldbucket expands to two
			// buckets during a grow).
			if t.ReflexiveKey() || t.Key.Equal(k, k) {
				// If the item in the oldbucket is not destined for
				// the current new bucket in the iteration, skip it.
				hash := t.Hasher(k, uintptr(h.hash0))
				if hash&bucketMask(it.B) != checkBucket {
					continue
				}
			} else {
				// Hash isn't repeatable if k != k (NaNs).  We need a
				// repeatable and randomish choice of which direction
				// to send NaNs during evacuation. We'll use the low
				// bit of tophash to decide which way NaNs go.
				// NOTE: this case is why we need two evacuate tophash
				// values, evacuatedX and evacuatedY, that differ in
				// their low bit.
				if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {
					continue
				}
			}
		}
		if (b.tophash[offi] != evacuatedX && b.tophash[offi] != evacuatedY) ||
			!(t.ReflexiveKey() || t.Key.Equal(k, k)) {
			// This is the golden data, we can return it.
			// OR
			// key!=key, so the entry can't be deleted or updated, so we can just return it.
			// That's lucky for us because when key!=key we can't look it up successfully.
			it.key = k
			if t.IndirectElem() {
				e = *((*unsafe.Pointer)(e))
			}
			it.elem = e
		} else {
			// The hash table has grown since the iterator was started.
			// The golden data for this key is now somewhere else.
			// Check the current hash table for the data.
			// This code handles the case where the key
			// has been deleted, updated, or deleted and reinserted.
			// NOTE: we need to regrab the key as it has potentially been
			// updated to an equal() but not identical key (e.g. +0.0 vs -0.0).
			rk, re := mapaccessK(t, h, k)
			if rk == nil {
				continue // key has been deleted
			}
			it.key = rk
			it.elem = re
		}
		it.bucket = bucket
		if it.bptr != b { // avoid unnecessary write barrier; see issue 14921
			it.bptr = b
		}
		it.i = i + 1
		it.checkBucket = checkBucket
		return
	}
	b = b.overflow(t)
	i = 0
	goto next
}

数组的遍历就很好理解了,这里就不上源码了,大概就是找到数组的开始位置直接在连续内存块上扫描

在存储50w数据的时候,数组的遍历效率大概是map的10倍,数量越大效率越高

2、延展

既然数组的遍历效率比map高这么多,那么是否还有其他的性能影响呢?

这个时候我比较了下同样的数量的[]User 和map[int64] User的gc耗时

发现在储存500000​数据量的场景下 数组的gc的耗时居然也比Map快上了5倍

​*cpu: m1 pro

​*memory: 32G

​*Go map gc x 10 cost: 235.258125ms.

​*Slice map gc x 10 cost: 43.631375ms.

3、改造

很明显,我们直接用map缓存大量数据的时候,无论是gc还是遍历效率,都是非常低的。

所以我计划用数组和map结合实现一个遍历效率和数组一样,而且插入更新操作和Map一样

go 复制代码
type KV[K comparable] struct {
	Key   K
	Value any
}

func (kv *KV[K]) GetKey() K {
	return kv.Key
}

type SliceMap[K comparable] struct {
	DataSlice        []*KV[K]
	IndexMap         map[K]int
	maxIndex         int
	maxMaxNoUseCount int
	maxNoUsePercent  float64
}

数组里面储存实际的指针,map里面存储key对应的数组的index这样既能获得数组的高效便利和更快GC,还能获得map的快速插入删除更新。代价也很明显内存cost变多了。

你可以在此查看代码和文档:hswell/slicemap (github.com)

能点个赞就最好了

相关推荐
梦想很大很大9 小时前
使用 Go + Gin + Fx 构建工程化后端服务模板(gin-app 实践)
前端·后端·go
lekami_兰14 小时前
MySQL 长事务:藏在业务里的性能 “隐形杀手”
数据库·mysql·go·长事务
却尘17 小时前
一篇小白也能看懂的 Go 字符串拼接 & Builder & cap 全家桶
后端·go
ん贤18 小时前
一次批量删除引发的死锁,最终我选择不加锁
数据库·安全·go·死锁
mtngt111 天前
AI DDD重构实践
go
Grassto3 天前
12 go.sum 是如何保证依赖安全的?校验机制源码解析
安全·golang·go·哈希算法·go module
Grassto4 天前
11 Go Module 缓存机制详解
开发语言·缓存·golang·go·go module
程序设计实验室5 天前
2025年的最后一天,分享我使用go语言开发的电子书转换工具网站
go
我的golang之路果然有问题5 天前
使用 Hugo + GitHub Pages + PaperMod 主题 + Obsidian 搭建开发博客
golang·go·github·博客·个人开发·个人博客·hugo
啊汉7 天前
古文观芷App搜索方案深度解析:打造极致性能的古文搜索引擎
go·软件随想