unordered_set/map封装实现
- [1. 源码及框架分析](#1. 源码及框架分析)
- [2 模拟实现unordered_map和unordered_set](#2 模拟实现unordered_map和unordered_set)
-
- [2.1 实现出复⽤哈希表的框架,并⽀持insert](#2.1 实现出复⽤哈希表的框架,并⽀持insert)
- [3 ⽀持iterator的实现](#3 ⽀持iterator的实现)
- [4 map⽀持[]](#4 map⽀持[])
- [5 unordered_set/map模拟实现](#5 unordered_set/map模拟实现)

1. 源码及框架分析
SGI-STL30版本源代码中没有unordered_map和unordered_set,SGI-STL30版本是C++11之前的STL版本,这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表,只容器的名字是hash_map和hash_set,他是作为⾮标准的容器出现的,⾮标准是指⾮C++标准规定必须实现的。
cpp
// stl_hash_set
template <class Value, class HashFcn = hash<Value>,
class EqualKey = equal_to<Value>,
class Alloc = alloc>
class hash_set
{
private:
typedef hashtable<Value, Value, HashFcn, identity<Value>,
EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::const_iterator iterator;
typedef typename ht::const_iterator const_iterator;
hasher hash_funct() const { return rep.hash_funct(); }
key_equal key_eq() const { return rep.key_eq(); }
};
// stl_hash_map
template <class Key, class T, class HashFcn = hash<Key>,
class EqualKey = equal_to<Key>,
class Alloc = alloc>
class hash_map
{
private:
typedef hashtable<pair<const Key, T>, Key, HashFcn,
select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
ht rep;
public:
typedef typename ht::key_type key_type;
typedef T data_type;
typedef T mapped_type;
typedef typename ht::value_type value_type;
typedef typename ht::hasher hasher;
typedef typename ht::key_equal key_equal;
typedef typename ht::iterator iterator;
typedef typename ht::const_iterator const_iterator;
};
// stl_hashtable.h
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey,
class Alloc>
class hashtable {
public:
typedef Key key_type;
typedef Value value_type;
typedef HashFcn hasher;
typedef EqualKey key_equal;
private:
hasher hash;
key_equal equals;
ExtractKey get_key;
typedef __hashtable_node<Value> node;
vector<node*,Alloc> buckets;
size_type num_elements;
public:
typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey,
Alloc> iterator;
pair<iterator, bool> insert_unique(const value_type& obj);
const_iterator find(const key_type& key) const;
};
template <class Value>
struct __hashtable_node
{
__hashtable_node* next;
Value val;
};
•通过源码可以看到,结构上hash_map和hash_set跟map和set的完
全类似,复⽤同⼀个hashtable实现key和key/value结构,hash_set传给hash_table的是两个 key,hash_map传给hash_table的是pair<const key, value>
• 需要注意的是源码⾥⾯跟map/set源码类似,命名⻛格⽐较乱,这⾥⽐map和set还乱,hash_set模板参数居然⽤的Value命名,hash_map⽤的是Key和T命名。
2 模拟实现unordered_map和unordered_set
2.1 实现出复⽤哈希表的框架,并⽀持insert
•参考源码框架,unordered_map和unordered_set复⽤之前我们实现的哈希表。
•我们这⾥相⽐源码调整⼀下,key参数就⽤K,value参数就⽤V,哈希表中的数据类型,我们使⽤ T。
•其次跟map和set相⽐⽽⾔unordered_map和unordered_set的模拟实现类结构更复杂⼀点,但是⼤框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数导致是K,还是pair<K, V>,那么insert内部进⾏插⼊时要⽤K对象转换成整形取模和K⽐较相等,因为pair的value不参与计算取模,且默认⽀持的是key和value⼀起⽐较相等,我们需要时的任何时候只需要⽐较K对象,所以我们在unordered_map和unordered_set层分别实现⼀个MapKeyOfT和SetKeyOfT的仿函数传给HashTable的KeyOfT,然后HashTable中通过KeyOfT仿函数取出T类型对象中的K对象,再转换成整形取模和K⽐较相等,具体细节参考如下代码实现。
cpp
//unordered_map
template<class K, class T, class Hashturn = Hashturn<K>>
class unordered_map
{
struct KeyofT
{
const K& operator()(const pair<K, T>& kv)
{
return kv.first;
}
};
public:
bool insert(const pair<K, T>& kv)
{
return _ht.insert(kv);
}
private:
hash_bucket<K, pair<const K, T>, KeyofT, Hashturn> _ht;
};
cpp
//unordered_set
template<class K,class Hashturn = Hashturn<K>>
class unordered_set
{
struct KeyofT
{
const K& operator()(const K& kv)
{
return kv;
}
};
public:
bool insert(const K& kv)
{
return _ht.insert(kv);
}
private:
hash_bucket<K, const K, KeyofT, Hashturn> _ht;
};
cpp
//hash_bucket
inline unsigned long __stl_next_prime(unsigned long n)
{
// Note: assumes long is at least 32 bits.
static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] =
{
53, 97, 193, 389, 769,
1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433,
1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457,
1610612741, 3221225473, 4294967291
};
const unsigned long* first = __stl_prime_list;
const unsigned long* last = __stl_prime_list +
__stl_num_primes;
const unsigned long* pos = lower_bound(first, last, n);
return pos == last ? *(last - 1) : *pos;
}
template<class T>
struct Buket_Node
{
T _data;
Buket_Node<T>* _next;
Buket_Node(const T& data)
:_data(data)
,_next(nullptr)
{}
};
//如果要支持Key的比较,那么就需要仿函数来实现,如string 映射,在映射到哈希表中
//默认的哈希仿函数,float,负数,转为无符号整形
template<class K>
struct Hashturn
{
size_t operator()(const K& key)
{
return (size_t)key;
}
};
//如果是sring类型,那么我们就可以单独的给string,建立一个仿函数实现第一层映射
//走特化
template<>
struct Hashturn<string>
{
size_t operator()(const string& key)
{
size_t m = 0;
//把ASCALL码值加起来
for (auto& e : key)
{
//用ASCALL来实现,字符转数字
m += e;
}
return m;
}
};
template<class K, class T, class KeyofT, class Hashturn>
class hash_bucket;
template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct Hash_Iterator
{
typedef Buket_Node<T> Node;
typedef hash_bucket<K, T, KeyOfT, Hash> HT;
typedef Hash_Iterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
Node* _node;
const HT* _ht;
Hash_Iterator(Node* node, const HT* ht)
:_node(node)
, _ht(ht)
{}
Ref operator*()
{
return _node->_data;
}
Ptr operator->()
{
return &_node->_data;
}
bool operator!=(const Self& s)
{
return _node != s._node;
}
// 16:46
Self& operator++()
{
if (_node->_next)
{
// 当前桶还有数据,走到当前桶下一个节点
_node = _node->_next;
}
else
{
// 当前桶走完了,找下一个不为空的桶
KeyOfT kot;
Hash hash;
size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
++hashi;
while (hashi < _ht->_tables.size())
{
_node = _ht->_tables[hashi];
if (_node)
break;
else
++hashi;
}
// 所有桶都走完了,end()给的空标识的_node
if (hashi == _ht->_tables.size())
{
_node = nullptr;
}
}
return *this;
}
};
template<class K,class T,class KeyofT,class Hashturn>
class hash_bucket
{
template<class K, class T, class ref, class ptr, class KeyofT, class Hashturn>
friend struct Hash_Iterator;
typedef Buket_Node<T> Node;
public:
typedef Hash_Iterator<K, T, T&, T*, KeyofT, Hashturn> Iterator;
typedef Hash_Iterator<K, T, const T&, const T*, KeyofT, Hashturn> Const_Iterator;
//Iterator begin()
//{
// if()
// for (int i = 0;i < _tables.size();i++)
// {
// Node* cur = _tables[i];
// if (cur)
// {
// return Iterator(cur, this);
// }
// }
//}
hash_bucket()
:_n(0)
, _tables(__stl_next_prime(0))
{}
Iterator begin()
{
if (_n == 0)
{
return Iterator(nullptr, this);
}
for (int i = 0;i < _tables.size();i++)
{
Node* cur = _tables[i];
if (cur)
{
return Iterator(cur, this);
}
}
}
Iterator end()
{
return Iterator(nullptr, this);
}
Const_Iterator end() const
{
return Const_Iterator(nullptr, this);
}
Const_Iterator begin()const
{
if (_n == 0)
{
return Const_Iterator(nullptr, this);
}
for (int i = 0;i < _tables.size();i++)
{
Node* cur = _tables[i];
if (cur)
return Const_Iterator(cur, this);
}
}
pair<Iterator, bool> insert(const T& data)
{
KeyofT kot;
Hashturn hash;
//判断负载因子
if (1 == (_n / _tables.size()))
{
Iterator it = Find(kot(data));
if (it != end())
return { it, false };
vector<Node*> newtables(__stl_next_prime(_tables.size() + 1));
for (int i = 0;i < _tables.size();i++)
{
Node* cur = _tables[i];
while (cur)
{
Node* next = cur->_next;
//头插
size_t hashi = hash(kot(cur->_data)) % newtables.size();
cur->_next = newtables[hashi];
newtables[hashi] = cur;
cur = next;
}
_tables[i] = nullptr;
}
_tables.swap(newtables);
}
size_t hashi = hash(kot(data)) % _tables.size();
// 头插
Node* newnode = new Node(data);
newnode->_next = _tables[hashi];
_tables[hashi] = newnode;
++_n;
return { Iterator(newnode, this), false };
}
Iterator Find(const K& key)
{
KeyofT kot;
Hashturn hash;
size_t hashi = hash(key) % _tables.size();
Node* cur = _tables[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
return Iterator(cur, this);
}
}
return Iterator(nullptr, this);
}
bool erase(const K& key)
{
KeyofT kot;
Hashturn hash;
Node* prev = nullptr;
size_t hashi = hash(key) % _tables.size();
Node* cur = _tables[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
if (prev)
{
//prev不为空
prev->_next = cur->_next;
}
else
{
//prev为空
_tables[hashi] = prev;
}
delete cur;
_n--;
return true;
}
else
{
prev = cur;
cur = cur->_next;
}
}
return false;
}
private:
size_t _n = 0;
vector<Node*> _tables;
};
3 ⽀持iterator的实现
terator核⼼源代码:
cpp
template <class Value, class Key, class HashFcn,
class ExtractKey, class EqualKey, class Alloc>
struct __hashtable_iterator {
typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>
hashtable;
typedef __hashtable_iterator<Value, Key, HashFcn,
ExtractKey, EqualKey, Alloc>
iterator;
typedef __hashtable_const_iterator<Value, Key, HashFcn,
ExtractKey, EqualKey, Alloc>
const_iterator;
typedef __hashtable_node<Value> node;
typedef forward_iterator_tag iterator_category;
typedef Value value_type;
node* cur;
hashtable* ht;
__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}
__hashtable_iterator() {}
reference operator*() const { return cur->val; }
#ifndef __SGI_STL_NO_ARROW_OPERATOR
pointer operator->() const { return &(operator*()); }
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
iterator& operator++();
iterator operator++(int);
bool operator==(const iterator& it) const { return cur == it.cur; }
bool operator!=(const iterator& it) const { return cur != it.cur; }
};
template <class V, class K, class HF, class ExK, class EqK, class A>
__hashtable_iterator<V, K, HF, ExK, EqK, A>&
__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++()
{
const node* old = cur;
cur = cur->next;
if (!cur) {
size_type bucket = ht->bkt_num(old->val);
while (!cur && ++bucket < ht->buckets.size())
cur = ht->buckets[bucket];
}
return *this;
}
模拟实现iterator实现思路分析:
•iterator实现的⼤框架跟list的iterator思路是⼀致的,⽤⼀个类型封装结点的指针,再通过重载运算符实现,迭代器像指针⼀样访问的⾏为,要注意的是哈希表的迭代器是单向迭代器。
•这⾥的难点是operator++的实现。iterator中有⼀个指向结点的指针,如果当前桶下⾯还有结点,则结点的指针指向下⼀个结点即可。如果当前桶⾛完了,则需要想办法计算找到下⼀个桶。这⾥的难点是反⽽是结构设计的问题,参考上⾯的源码,我们可以看到iterator中除了有结点的指针,还有哈希表对象的指针,这样当前桶⾛完了,要计算下⼀个桶就相对容易多了,⽤key值计算出当前桶位置,依次往后找下⼀个不为空的桶即可。
• begin()返回第⼀个桶中第⼀个节点指针构造的迭代器,这⾥end()返回迭代器可以⽤空表⽰。
•unordered_set的iterator也不⽀持修改,我们把unordered_set的第⼆个模板参数改成const K即可,HashTable<K, const K, SetKeyOfT, Hash> _ht;
•unordered_map的iterator不⽀持修改key但是可以修改value,我们把unordered_map的第⼆个模板参数pair的第⼀个参数改成constK即可, HashTable<K, pair<const K, V>,MapKeyOfT, Hash> _ht;
•⽀持完整的迭代器还有很多细节需要修改,具体参考下⾯题的代码。
【说明】:
在这里我们用链地址法来实现哈希表

模拟代码实现:
要点1:哈希的迭代器与list迭代器相似
要点2:哈希表的迭代器是单向迭代器
要点3:哈希表的迭代器的成员有哈希结点、和哈希表。
cpp
template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct Hash_Iterator
{
typedef Buket_Node<T> Node;
typedef hash_bucket<K, T, KeyOfT, Hash> HT;
typedef Hash_Iterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
Node* _node;
const HT* _ht;
Hash_Iterator(Node* node, const HT* ht)
:_node(node)
, _ht(ht)
{}
Ref operator*()
{
return _node->_data;
}
Ptr operator->()
{
return &_node->_data;
}
bool operator!=(const Self& s)
{
return _node != s._node;
}
// 16:46
Self& operator++()
{
if (_node->_next)
{
// 当前桶还有数据,走到当前桶下一个节点
_node = _node->_next;
}
else
{
// 当前桶走完了,找下一个不为空的桶
KeyOfT kot;
Hash hash;
size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
++hashi;
while (hashi < _ht->_tables.size())
{
_node = _ht->_tables[hashi];
if (_node)
break;
else
++hashi;
}
// 所有桶都走完了,end()给的空标识的_node
if (hashi == _ht->_tables.size())
{
_node = nullptr;
}
}
return *this;
}
};
4 map⽀持[]
• unordered_map要⽀持[]主要需要修改insert返回值⽀持,修改HashTable中的insert返回值为pair<Iterator, bool> Insert(const T& data)
•有了insert⽀持[]实现就很简单了,具体参考下⾯代码实现
cpp
T& operator[](const K& key)
{
pair<iterator, bool> ret = insert({ key, T() });
return ret.first->second;
}
5 unordered_set/map模拟实现
unordered_set
cpp
template<class K,class Hashturn = Hashturn<K>>
class unordered_set
{
struct KeyofT
{
const K& operator()(const K& kv)
{
return kv;
}
};
public:
typedef typename hash_bucket<K,const K,KeyofT, Hashturn>::Iterator iterator;
typedef typename hash_bucket<K,const K,KeyofT, Hashturn>::Const_Iterator const_iterator;
iterator begin()
{
return _ht.begin();
}
iterator end()
{
return _ht.end();
}
const_iterator begin() const
{
return _ht.begin();
}
const_iterator end() const
{
return _ht.end();
}
pair<iterator, bool> insert(const K& kv)
{
return _ht.insert(kv);
}
iterator Find(const K& key)
{
return _ht.Find(key);
}
bool Erase(const K& key)
{
return _ht.erase(key);
}
private:
hash_bucket<K, const K, KeyofT, Hashturn> _ht;
};
unordered_map
cpp
template<class K, class T, class Hashturn = Hashturn<K>>
class unordered_map
{
struct KeyofT
{
const K& operator()(const pair<K, T>& kv)
{
return kv.first;
}
};
public:
typedef typename hash_bucket<K, pair< const K, T>, KeyofT, Hashturn>::Iterator iterator;
typedef typename hash_bucket<K, pair< const K, T>, KeyofT, Hashturn>::Const_Iterator const_iterator;
iterator begin()
{
return _ht.begin();
}
iterator end()
{
return _ht.end();
}
const_iterator begin() const
{
return _ht.begin();
}
const_iterator end() const
{
return _ht.end();
}
T& operator[](const K& key)
{
pair<iterator, bool> ret = insert({ key, T() });
return ret.first->second;
}
pair<iterator, bool> insert(const pair<K, T>& kv)
{
return _ht.insert(kv);
}
iterator Find(const K& key)
{
return _ht.Find(key);
}
bool Erase(const K& key)
{
return _ht.erase(key);
}
private:
hash_bucket<K, pair<const K, T>, KeyofT, Hashturn> _ht;
};
hash_bucket
cpp
```inline unsigned long __stl_next_prime(unsigned long n)
{
// Note: assumes long is at least 32 bits.
static const int __stl_num_primes = 28;
static const unsigned long __stl_prime_list[__stl_num_primes] =
{
53, 97, 193, 389, 769,
1543, 3079, 6151, 12289, 24593,
49157, 98317, 196613, 393241, 786433,
1572869, 3145739, 6291469, 12582917, 25165843,
50331653, 100663319, 201326611, 402653189, 805306457,
1610612741, 3221225473, 4294967291
};
const unsigned long* first = __stl_prime_list;
const unsigned long* last = __stl_prime_list +
__stl_num_primes;
const unsigned long* pos = lower_bound(first, last, n);
return pos == last ? *(last - 1) : *pos;
}
template<class T>
struct Buket_Node
{
T _data;
Buket_Node<T>* _next;
Buket_Node(const T& data)
:_data(data)
,_next(nullptr)
{}
};
//如果要支持Key的比较,那么就需要仿函数来实现,如string 映射,在映射到哈希表中
//默认的哈希仿函数,float,负数,转为无符号整形
template<class K>
struct Hashturn
{
size_t operator()(const K& key)
{
return (size_t)key;
}
};
//如果是sring类型,那么我们就可以单独的给string,建立一个仿函数实现第一层映射
//走特化
template<>
struct Hashturn<string>
{
size_t operator()(const string& key)
{
size_t m = 0;
//把ASCALL码值加起来
for (auto& e : key)
{
//用ASCALL来实现,字符转数字
m += e;
}
return m;
}
};
template<class K, class T, class KeyofT, class Hashturn>
class hash_bucket;
template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
struct Hash_Iterator
{
typedef Buket_Node<T> Node;
typedef hash_bucket<K, T, KeyOfT, Hash> HT;
typedef Hash_Iterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;
Node* _node;
const HT* _ht;
Hash_Iterator(Node* node, const HT* ht)
:_node(node)
, _ht(ht)
{}
Ref operator*()
{
return _node->_data;
}
Ptr operator->()
{
return &_node->_data;
}
bool operator!=(const Self& s)
{
return _node != s._node;
}
// 16:46
Self& operator++()
{
if (_node->_next)
{
// 当前桶还有数据,走到当前桶下一个节点
_node = _node->_next;
}
else
{
// 当前桶走完了,找下一个不为空的桶
KeyOfT kot;
Hash hash;
size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
++hashi;
while (hashi < _ht->_tables.size())
{
_node = _ht->_tables[hashi];
if (_node)
break;
else
++hashi;
}
// 所有桶都走完了,end()给的空标识的_node
if (hashi == _ht->_tables.size())
{
_node = nullptr;
}
}
return *this;
}
};
template<class K,class T,class KeyofT,class Hashturn>
class hash_bucket
{
template<class K, class T, class ref, class ptr, class KeyofT, class Hashturn>
friend struct Hash_Iterator;
typedef Buket_Node<T> Node;
public:
typedef Hash_Iterator<K, T, T&, T*, KeyofT, Hashturn> Iterator;
typedef Hash_Iterator<K, T, const T&, const T*, KeyofT, Hashturn> Const_Iterator;
//Iterator begin()
//{
// if()
// for (int i = 0;i < _tables.size();i++)
// {
// Node* cur = _tables[i];
// if (cur)
// {
// return Iterator(cur, this);
// }
// }
//}
hash_bucket()
:_n(0)
, _tables(__stl_next_prime(0))
{}
Iterator begin()
{
if (_n == 0)
{
return Iterator(nullptr, this);
}
for (int i = 0;i < _tables.size();i++)
{
Node* cur = _tables[i];
if (cur)
{
return Iterator(cur, this);
}
}
}
Iterator end()
{
return Iterator(nullptr, this);
}
Const_Iterator end() const
{
return Const_Iterator(nullptr, this);
}
Const_Iterator begin()const
{
if (_n == 0)
{
return Const_Iterator(nullptr, this);
}
for (int i = 0;i < _tables.size();i++)
{
Node* cur = _tables[i];
if (cur)
return Const_Iterator(cur, this);
}
}
pair<Iterator, bool> insert(const T& data)
{
KeyofT kot;
Hashturn hash;
//判断负载因子
if (1 == (_n / _tables.size()))
{
Iterator it = Find(kot(data));
if (it != end())
return { it, false };
vector<Node*> newtables(__stl_next_prime(_tables.size() + 1));
for (int i = 0;i < _tables.size();i++)
{
Node* cur = _tables[i];
while (cur)
{
Node* next = cur->_next;
//头插
size_t hashi = hash(kot(cur->_data)) % newtables.size();
cur->_next = newtables[hashi];
newtables[hashi] = cur;
cur = next;
}
_tables[i] = nullptr;
}
_tables.swap(newtables);
}
size_t hashi = hash(kot(data)) % _tables.size();
// 头插
Node* newnode = new Node(data);
newnode->_next = _tables[hashi];
_tables[hashi] = newnode;
++_n;
return { Iterator(newnode, this), false };
}
Iterator Find(const K& key)
{
KeyofT kot;
Hashturn hash;
size_t hashi = hash(key) % _tables.size();
Node* cur = _tables[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
return Iterator(cur, this);
}
}
return Iterator(nullptr, this);
}
bool erase(const K& key)
{
KeyofT kot;
Hashturn hash;
Node* prev = nullptr;
size_t hashi = hash(key) % _tables.size();
Node* cur = _tables[hashi];
while (cur)
{
if (kot(cur->_data) == key)
{
if (prev)
{
//prev不为空
prev->_next = cur->_next;
}
else
{
//prev为空
_tables[hashi] = prev;
}
delete cur;
_n--;
return true;
}
else
{
prev = cur;
cur = cur->_next;
}
}
return false;
}
private:
size_t _n = 0;
vector<Node*> _tables;
};
再见,并非终点,而是另一段旅程的起点。愿我们在各自的旅途中,都能遇见更美的风景,书写更加精彩的人生篇章!!!