Go的数据结构与实现【Set】

介绍

Set是值的集合，可以迭代这些值、添加新值、删除值并清除集合、获取集合大小并检查集合是否包含值，集合中的一个值只存储一次，不能重复。

本文代码地址为go-store

简单实现

这是集合的一个简单实现，还不是并发安全的，为了简单和容易理解而没有引入锁资源。

set.go

go 复制代码

package set

type T int

type Set struct {
   sets map[T]bool
}

// Add adds a new element to the Set. Returns true if t is not in set.
func (s *Set) Add(t T) bool {
   if s.sets == nil {
      s.sets = make(map[T]bool)
   }

   _, ok := s.sets[t]
   if !ok {
      s.sets[t] = true
   }

   return !ok
}

// Clear removes all elements from the Set
func (s *Set) Clear() {
   s.sets = make(map[T]bool)
}

// Delete removes the Item from the Set and returns true if t is in set
func (s *Set) Delete(t T) bool {
   _, ok := s.sets[t]
   if ok {
      delete(s.sets, t)
   }

   return ok
}

// Contains returns true if the Set contains the t
func (s *Set) Contains(t T) bool {
   _, ok := s.sets[t]
   return ok
}

// All returns the all items stored
func (s *Set) All() []T {
   var ret []T
   for t := range s.sets {
      ret = append(ret, t)
   }

   return ret
}

// Size returns the size of the set
func (s *Set) Size() int {
   return len(s.sets)
}

单元测试

这是上面代码的单元测试，它详细解释了如何使用它，以及任何操作的预期结果：

set_test.go

go 复制代码

package set

import "testing"

var (
   t1 T = 1
   t2 T = 2
   t3 T = 3
)

func InitSet() *Set {
   set := &Set{}
   set.Add(t1)
   set.Add(t2)
   return set
}

func TestSet_Add(t *testing.T) {
   set := InitSet()
   ok := set.Add(t1)
   if ok {
      t.Errorf("There is already %d in set!", t1)
   }

   ok = set.Add(t3)
   if !ok {
      t.Errorf("There should be %d in set!", t3)
   }
}

func TestSet_Clear(t *testing.T) {
   set := InitSet()
   set.Clear()
   if size := set.Size(); size != 0 {
      t.Errorf("wrong count, expected 0 and got %d", size)
   }
}

func TestSet_Delete(t *testing.T) {
   set := InitSet()
   ok := set.Delete(t1)
   if !ok {
      t.Errorf("There should be %d in set!", t1)
   }

   ok = set.Delete(t3)
   if ok {
      t.Errorf("There should not be %d in set!", t3)
   }
}

func TestSet_Contains(t *testing.T) {
   set := InitSet()
   ok := set.Contains(t1)
   if !ok {
      t.Errorf("There should be %d in set!", t1)
   }

   ok = set.Contains(t2)
   if !ok {
      t.Errorf("There should be %d in set!", t2)
   }

   ok = set.Contains(t3)
   if ok {
      t.Errorf("There should not be %d in set!", t3)
   }
}

func TestSet_All(t *testing.T) {
   set := InitSet()
   items := set.All()
   if len(items) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", len(items))
   }

   if items[0] != t1 && items[1] != t2 {
      t.Errorf("There should be %d and %d in set!", t1, t2)
   }
}

func TestSet_Size(t *testing.T) {
   set := InitSet()
   size := set.Size()
   if size != 2 {
      t.Errorf("wrong count, expected 2 and got %d", size)
   }

   set.Add(t3)
   size = set.Size()
   if size != 3 {
      t.Errorf("wrong count, expected 3 and got %d", size)
   }

   set.Delete(t3)
   size = set.Size()
   if size != 2 {
      t.Errorf("wrong count, expected 2 and got %d", size)
   }

   set.Delete(t2)
   size = set.Size()
   if size != 1 {
      t.Errorf("wrong count, expected 1 and got %d", size)
   }

   set.Delete(t1)
   size = set.Size()
   if size != 0 {
      t.Errorf("wrong count, expected 0 and got %d", size)
   }
}

并发安全

第一个版本不是并发安全的，因为一个goroutine可能将一个值添加到集合中，而另一个goroutine正在获取集合列表或大小。

以下代码在数据结构中添加了一个sync.RWMutex，使其并发安全，实现非常简单，只需要在每个方法中添加Lock()和defer UnLock()。上面的测试运行良好，无需对此实现进行任何修改。

go 复制代码

package set

import (
   "sync"
)

type T int

type Set struct {
   sync.RWMutex
   sets map[T]bool
}

// Add adds a new element to the Set. Returns true if t is not in set.
func (s *Set) Add(t T) bool {
   s.Lock()
   defer s.Unlock()

   if s.sets == nil {
      s.sets = make(map[T]bool)
   }

   _, ok := s.sets[t]
   if !ok {
      s.sets[t] = true
   }

   return !ok
}

// Clear removes all elements from the Set
func (s *Set) Clear() {
   s.Lock()
   defer s.Unlock()

   s.sets = make(map[T]bool)
}

// Delete removes the Item from the Set and returns true if t is in set
func (s *Set) Delete(t T) bool {
   s.Lock()
   defer s.Unlock()

   _, ok := s.sets[t]
   if ok {
      delete(s.sets, t)
   }

   return ok
}

// Contains returns true if the Set contains the t
func (s *Set) Contains(t T) bool {
   s.RLock()
   defer s.RUnlock()

   _, ok := s.sets[t]
   return ok
}

// All returns the all items stored
func (s *Set) All() []T {
   s.RLock()
   defer s.RUnlock()

   var ret []T
   for t := range s.sets {
      ret = append(ret, t)
   }

   return ret
}

// Size returns the size of the set
func (s *Set) Size() int {
   s.RLock()
   defer s.RUnlock()

   return len(s.sets)
}

// Union returns a new set with elements from both
// the given sets
func (s *Set) Union(t *Set) *Set {
   ret := &Set{}

   s.RLock()
   for i := range s.sets {
      ret.sets[i] = true
   }
   s.RUnlock()

   t.RLock()
   for i := range t.sets {
      if _, ok := ret.sets[i]; !ok {
         ret.sets[i] = true
      }
   }
   t.RUnlock()

   return ret
}

添加更多集合操作

Set还可以通过实现一些常见的数学集合操作得到更多改进：并集、交集、差集和子集。

并集

go 复制代码

// Union returns a new set with elements from both
// the given sets
func (s *Set) Union(t *Set) *Set {
   ret := &Set{}

   s.RLock()
   for i := range s.sets {
      ret.sets[i] = true
   }
   s.RUnlock()

   t.RLock()
   for i := range t.sets {
      if _, ok := ret.sets[i]; !ok {
         ret.sets[i] = true
      }
   }
   t.RUnlock()

   return ret
}

单元测试：

go 复制代码

func TestSet_Union(t *testing.T) {
   set1 := InitSet(t1, t2)
   set2 := InitSet(t1, t3)

   set3 := set1.Union(set2)
   if len(set3.All()) != 3 {
      t.Errorf("wrong count, expected 3 and got %d", set3.Size())
   }
   //don't edit original sets
   if len(set1.All()) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", set1.Size())
   }
   if len(set2.All()) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", set2.Size())
   }
}

交集

go 复制代码

func (s *Set) Intersection(t *Set) *Set {
   ret := &Set{}
   ret.sets = make(map[T]bool)

   s.RLock()
   t.RLock()
   defer s.RUnlock()
   defer t.RUnlock()

   for i := range t.sets {
      if _, ok := s.sets[i]; ok {
         ret.sets[i] = true
      }
   }

   return ret
}

单元测试：

go 复制代码

func TestSet_Intersection(t *testing.T) {
   set1 := InitSet(t1, t2)
   set2 := InitSet(t1, t3)

   set3 := set1.Intersection(set2)
   if len(set3.All()) != 1 {
      t.Errorf("wrong count, expected 1 and got %d", set3.Size())
   }
   //don't edit original sets
   if len(set1.All()) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", set1.Size())
   }
   if len(set2.All()) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", set2.Size())
   }
}

差集

go 复制代码

func (s *Set) Difference(t *Set) *Set {
   ret := &Set{}
   ret.sets = make(map[T]bool)

   s.RLock()
   t.RLock()
   defer s.RUnlock()
   defer t.RUnlock()

   for i := range t.sets {
      if _, ok := s.sets[i]; !ok {
         ret.sets[i] = true
      }
   }

   return ret
}

单元测试：

go 复制代码

func TestSet_Difference(t *testing.T) {
   set1 := InitSet(t1, t2)
   set2 := InitSet(t1, t3)

   set3 := set1.Difference(set2)
   if len(set3.All()) != 1 {
      t.Errorf("wrong count, expected 1 and got %d", set3.Size())
   }
   //don't edit original sets
   if len(set1.All()) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", set1.Size())
   }
   if len(set2.All()) != 2 {
      t.Errorf("wrong count, expected 2 and got %d", set2.Size())
   }
}

子集

go 复制代码

func (s *Set) Subset(t *Set) bool {
   s.RLock()
   t.RLock()
   defer s.RUnlock()
   defer t.RUnlock()

   for i := range s.sets {
      if _, ok := t.sets[i]; !ok {
         return false
      }
   }

   return true
}

单元测试：

go 复制代码

func TestSet_Subset(t *testing.T) {
   set1 := InitSet(t1, t2)
   set2 := InitSet(t1, t3)

   ret := set2.Subset(set1)
   if ret {
      t.Errorf("(t1, t2) is not the subset of (t1, t3), but got %t", ret)
   }

   set3 := InitSet(t1)
   ret = set3.Subset(set1)
   if !ret {
      t.Errorf("(t1) is the subset of (t1, t3), but got %t", ret)
   }
}