31.C++进阶：⽤哈希表封装myunordered_map和 myunordered_set

源码及框架分析

SGI-STL30版本源代码中没有unordered_map和unordered_set，SGI-STL30版本是C++11之前的STL版本，这两个容器是C++11之后才更新的。但是SGI-STL30实现了哈希表，只容器的名字是hash_map和hash_set，他是作为⾮标准的容器出现的，⾮标准是指⾮C++标准规定必须实现的，源代码在hash_map/hash_set/stl_hash_map/stl_hash_set/stl_hashtable.h中hash_map和hash_set的实现结构框架核⼼部分截取出来如下：

c++ 复制代码

// stl_hash_set  
template <class Value, class HashFcn = hash<Value>,  
		  class EqualKey = equal_to<Value>,  
		  class Alloc = alloc>  
		  class hash_set  
{ 
private:  
	typedef hashtable<Value, Value, HashFcn, identity<Value>, EqualKey, Alloc> ht;  
	ht rep;  
public:  
	typedef typename ht::key_type key_type;  
	typedef typename ht::value_type value_type;  
	typedef typename ht::hasher hasher;  
	typedef typename ht::key_equal key_equal;  
	typedef typename ht::const_iterator iterator;  
	typedef typename ht::const_iterator const_iterator;  
	hasher hash_funct() const { return rep.hash_funct(); }  
	key_equal key_eq() const { return rep.key_eq(); }  
};  

// stl_hash_map  
template <class Key, class T, class HashFcn = hash<Key>,  
		  class EqualKey = equal_to<Key>,  
		  class Alloc = alloc>  
		  class hash_map
{ 
private:  
	typedef hashtable<pair<const Key, T>, Key, HashFcn,  
					  select1st<pair<const Key, T> >, EqualKey, Alloc> ht;
	ht rep;  
public:  
	typedef typename ht::key_type key_type;  
	typedef T data_type;  
	typedef T mapped_type;  
	typedef typename ht::value_type value_type;  
	typedef typename ht::hasher hasher;  
	typedef typename ht::key_equal key_equal;  
	typedef typename ht::iterator iterator;  
	typedef typename ht::const_iterator const_iterator;  
};  

// stl_hashtable.h  
template <class Value, class Key, class HashFcn,  
	      class ExtractKey, class EqualKey,  
		  class Alloc>  
class hashtable {  
public:  
	typedef Key key_type;  
	typedef Value value_type;  
	typedef HashFcn hasher;  
	typedef EqualKey key_equal;  
private:  
	hasher hash;  
	key_equal equals;  
	ExtractKey get_key;  
	typedef __hashtable_node<Value> node;  
	vector<node*,Alloc> buckets;  
	size_type num_elements;  
public:  
	typedef __hashtable_iterator<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc> iterator;  
	pair<iterator, bool> insert_unique(const value_type& obj);  
	const_iterator find(const key_type& key) const;  
};  

template <class Value>  
struct __hashtable_node  
{  
	__hashtable_node* next;
	Value val;  
};

通过源码可以看到，结构上hash_map和hash_set跟map和set的完全类似，复⽤同⼀个hashtable实现key和key/value结构，hash_set传给hash_table的是两个key，hash_map传给hash_table的是pair<const key, value>
需要注意的是源码⾥⾯跟map/set源码类似，命名⻛格⽐较乱，这⾥⽐map和set还乱，hash_set模板参数居然⽤的Value命名，hash_map⽤的是Key和T命名

模拟实现unordered_map和unordered_set

实现出复⽤哈希表的框架，并⽀持insert

参考源码框架，unordered_map和unordered_set复⽤之前我们实现的哈希表。
这⾥key参数就⽤K，value参数就⽤V，哈希表中的数据类型，我们使⽤T。
其次跟map和set相⽐⽽⾔unordered_map和unordered_set的模拟实现类结构更复杂⼀点，但是⼤框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数导致是K，还是pair<K, V>，那么insert内部进⾏插⼊时要⽤K对象转换成整形取模和K⽐较相等，因为pair的value不参与计算取模，且默认⽀持的是key和value⼀起⽐较相等，需要任何时候只需要⽐较K对象，所以在unordered_map和unordered_set层分别实现⼀个MapKeyOfT和SetKeyOfT的仿函数传给HashTable的KeyOfT，然后HashTable中通过KeyOfT仿函数取出T类型对象中的K对象，再转换成整形取模和K⽐较相等，具体细节参考如下代码实现。

c++ 复制代码

// MyUnorderedSet.h  
namespace bit  
{  
	template<class K, class Hash = HashFunc<K>>  
	class unordered_set  
	{  
		struct SetKeyOfT  
		{  
			const K& operator()(const K& key)  
			{  
				return key;  
			}  
		};  
	public:  
		bool insert(const K& key)  
		{
			return _ht.Insert(key);  
		}  
	private:  
		hash_bucket::HashTable<K, K, SetKeyOfT, Hash> _ht;  
	};  
} 

// MyUnorderedMap.h  
namespace bit  
{  
	template<class K, class V, class Hash = HashFunc<K>>  
	class unordered_map  
	{  
		struct MapKeyOfT  
		{  
			const K& operator()(const pair<K, V>& kv)  
			{  
				return kv.first;  
			}  
		};  
	public:  
		bool insert(const pair<K, V>& kv)  
		{  
			return _ht.Insert(kv);  
		}  
	private:  
		hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT, Hash> _ht;  
	};  
} 

// HashTable.h  
template<class K>  
struct HashFunc  
{  
	size_t operator()(const K& key)  
	{  
		return (size_t)key;  
	}  
};  

namespace hash_bucket  
{  
	template<class T>  
	struct HashNode  
	{  
		T _data;  
		HashNode<T>* _next;
		
		HashNode(const T& data)  
			:_data(data)  
			,_next(nullptr)  
		{}  
	};  
	// 实现步骤：  
	// 1、实现哈希表  
	// 2、封装unordered_map和unordered_set的框架 解决KeyOfT  
	// 3、iterator  
	// 4、const_iterator  
	// 5、key不⽀持修改的问题  
	// 6、operator[]  
	
	template<class K, class T, class KeyOfT, class Hash>  
	class HashTable  
	{  
		typedef HashNode<T> Node;  
		inline unsigned long __stl_next_prime(unsigned long n)  
		{  
			static const int __stl_num_primes = 28;  
			static const unsigned long __stl_prime_list[__stl_num_primes]=
			{  
				53, 97, 193, 389, 769,  
				1543, 3079, 6151, 12289, 24593,  
				49157, 98317, 196613, 393241, 786433,  
				1572869, 3145739, 6291469, 12582917, 25165843,  
				50331653, 100663319, 201326611, 402653189, 805306457,  
				1610612741, 3221225473, 4294967291  
			};  
			const unsigned long* first = __stl_prime_list;  
			const unsigned long* last = __stl_prime_list + __stl_num_primes;  
			const unsigned long* pos = lower_bound(first, last, n);  
			return pos == last ? *(last - 1) : *pos;  
		}  
	public:  
		HashTable()  
		{  
			_tables.resize(__stl_next_prime(_tables.size()), nullptr);  
		} 
		
		~HashTable()  
		{  
			// 依次把每个桶释放
			for (size_t i = 0; i < _tables.size(); i++)  
			{  
				Node* cur = _tables[i];  
				while (cur)  
				{  
					Node* next = cur->_next;  
					delete cur;  
					cur = next;  
				} 
				_tables[i] = nullptr;  
			}  
		} 
		
		bool Insert(const T& data)  
		{  
			KeyOfT kot;  
			
			if (Find(kot(data)))  
				return false;  
				
			Hash hs;  
			size_t hashi = hs(kot(data)) % _tables.size();  
			// 负载因⼦==1扩容  
			if (_n == _tables.size())  
			{  
				vector<Node*> newtables(__stl_next_prime(_tables.size()),  
				nullptr);  
				for (size_t i = 0; i < _tables.size(); i++)  
				{  
					Node* cur = _tables[i];  
					while (cur)  
					{  
						Node* next = cur->_next;  
						// 旧表中结点，挪动新表重新映射的位置  
						size_t hashi = hs(kot(cur->_data)) % newtables.size();  
						// 头插到新表  
						cur->_next = newtables[hashi];  
						newtables[hashi] = cur;  
						cur = next;  
					} 
					_tables[i] = nullptr;  
				} 
				_tables.swap(newtables);
			} 
			// 头插  
			Node* newnode = new Node(data);  
			newnode->_next = _tables[hashi];  
			_tables[hashi] = newnode;  
			++_n;  
			return true;  
		}  
	
	private:  
		vector<Node*> _tables; // 指针数组  
		size_t _n = 0; // 表中存储数据个数  
	};  
}

⽀持iterator的实现

iterator核⼼源代码

c++ 复制代码

template <class Value, class Key, class HashFcn,  
class ExtractKey, class EqualKey, class Alloc>  
struct __hashtable_iterator {  
	typedef hashtable<Value, Key, HashFcn, ExtractKey, EqualKey, Alloc>  
			hashtable;  
	typedef __hashtable_iterator<Value, Key, HashFcn,  
								ExtractKey, EqualKey, Alloc>  
			iterator;  
	typedef __hashtable_const_iterator<Value, Key, HashFcn,  
										ExtractKey, EqualKey, Alloc>  
			const_iterator;  
	typedef __hashtable_node<Value> node;  
	
	typedef forward_iterator_tag iterator_category;  
	typedef Value value_type;  
	
	node* cur;  
	hashtable* ht;  
	
	__hashtable_iterator(node* n, hashtable* tab) : cur(n), ht(tab) {}  
	__hashtable_iterator() {}  
	reference operator*() const { return cur->val; }  
#ifndef __SGI_STL_NO_ARROW_OPERATOR  
	pointer operator->() const { return &(operator*()); }  
#endif /* __SGI_STL_NO_ARROW_OPERATOR */
	iterator& operator++();  
	iterator operator++(int);  
	bool operator==(const iterator& it) const { return cur == it.cur; }  
	bool operator!=(const iterator& it) const { return cur != it.cur; }  
};  

template <class V, class K, class HF, class ExK, class EqK, class A>  
__hashtable_iterator<V, K, HF, ExK, EqK, A>&  
__hashtable_iterator<V, K, HF, ExK, EqK, A>::operator++()  
{  
	const node* old = cur;  
	cur = cur->next;  
	if (!cur) {  
		size_type bucket = ht->bkt_num(old->val);  
		while (!cur && ++bucket < ht->buckets.size())  
			cur = ht->buckets[bucket];  
	} 
	return *this;  
}

iterator实现思路分析

iterator实现的⼤框架跟list的iterator思路是⼀致的，⽤⼀个类型封装结点的指针，再通过重载运算符实现，迭代器像指针⼀样访问的⾏为，要注意的是哈希表的迭代器是单向迭代器。
这⾥的难点是operator++的实现。iterator中有⼀个指向结点的指针，如果当前桶下⾯还有结点，则结点的指针指向下⼀个结点即可。如果当前桶⾛完了，则需要想办法计算找到下⼀个桶。这⾥的难点是反⽽是结构设计的问题，参考上⾯的源码，我们可以看到iterator中除了有结点的指针，还有哈希表对象的指针，这样当前桶⾛完了，要计算下⼀个桶就相对容易多了，⽤key值计算出当前桶位置，依次往后找下⼀个不为空的桶即可。
begin()返回第⼀个桶中第⼀个节点指针构造的迭代器，这⾥end()返回迭代器可以⽤空表⽰。
unordered_set的iterator也不⽀持修改，我们把unordered_set的第⼆个模板参数改成const K即可， HashTable<K, const K, SetKeyOfT, Hash> _ht;
unordered_map的iterator不⽀持修改key但是可以修改value，我们把unordered_map的第⼆个模板参数pair的第⼀个参数改成const K即可， HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
⽀持完整的迭代器还有很多细节需要修改，具体参考下⾯题的代码。

map⽀持[]

unordered_map要⽀持[]主要需要修改insert返回值⽀持，修改HashTable中的insert返回值为pair<Iterator, bool> Insert(const T& data)
有了insert⽀持[]实现就很简单了，具体参考下⾯代码实现

bit::unordered_map和bit::unordered_set代码实现

c++ 复制代码

// MyUnorderedSet.h  
namespace bit  
{  
	template<class K, class Hash = HashFunc<K>>  
	class unordered_set  
	{  
		struct SetKeyOfT  
		{  
			const K& operator()(const K& key)  
			{  
				return key;  
			}  
		};  
	public:  
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT,  
						 Hash>::Iterator iterator;  
		typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT,  
						 Hash>::ConstIterator const_iterator;  
						 
		iterator begin()  
		{  
			return _ht.Begin();  
		} 
		
		iterator end()
		{  
			return _ht.End();  
		} 
		
		const_iterator begin() const  
		{  
			return _ht.Begin();  
		} 
		
		const_iterator end() const  
		{  
			return _ht.End();  
		} 
		
		pair<iterator, bool> insert(const K& key)  
		{  
			return _ht.Insert(key);  
		} 
		
		iterator Find(const K& key)  
		{  
			return _ht.Find(key);  
		} 
		
		bool Erase(const K& key)  
		{  
			return _ht.Erase(key);  
		}  
	private:  
		hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;  
	};  
	
	void test_set()  
	{  
		unordered_set<int> s;  
		int a[] = { 4, 2, 6, 1, 3, 5, 15, 7, 16, 14, 3,3,15 };  
		for (auto e : a)  
		{  
			s.insert(e);  
		} 
		for (auto e : s)  
		{  
			cout << e << " ";  
		} 
		cout << endl;
		unordered_set<int>::iterator it = s.begin();  
		while (it != s.end())  
		{  
			// 不⽀持修改  
			//*it += 1;  
			cout << *it << " ";  
			++it;  
		} 
		cout << endl;  
	}  
} 

// MyUnorderedMap.h  
namespace bit  
{  
	template<class K, class V, class Hash = HashFunc<K>>  
	class unordered_map  
	{  
		struct MapKeyOfT  
		{  
			const K& operator()(const pair<K, V>& kv)  
			{  
				return kv.first;  
			}  
		};  
	public:  
		typedef typename hash_bucket::HashTable<K, pair<const K, V>,  
						 MapKeyOfT, Hash>::Iterator iterator;  
		typedef typename hash_bucket::HashTable<K, pair<const K, V>,  
						 MapKeyOfT, Hash>::ConstIterator const_iterator;
						   
		iterator begin()  
		{  
			return _ht.Begin();  
		} 
		
		iterator end()  
		{  
			return _ht.End();  
		} 
		
		const_iterator begin() const  
		{  
			return _ht.Begin();  
		}
		
		const_iterator end() const  
		{  
			return _ht.End();  
		} 
		
		pair<iterator, bool> insert(const pair<K, V>& kv)  
		{  
			return _ht.Insert(kv);  
		} 
		
		V& operator[](const K& key)  
		{  
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));  
			return ret.first->second;  
		} 
		
		iterator Find(const K& key)  
		{  
			return _ht.Find(key);  
		} 
		
		bool Erase(const K& key)  
		{  
			return _ht.Erase(key);  
		}  
		
	private:  
		hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;  
	};  
	
	void test_map()  
	{  
		unordered_map<string, string> dict;  
		dict.insert({ "sort", "排序" });  
		dict.insert({ "left", "左边" });  
		dict.insert({ "right", "右边" });  
		dict["left"] = "左边，剩余";  
		dict["insert"] = "插⼊";  
		dict["string"];  
		unordered_map<string, string>::iterator it = dict.begin();  
		while (it != dict.end())  
		{  
			// 不能修改first，可以修改second  
			//it->first += 'x';
			it->second += 'x';  
			cout << it->first << ":" << it->second << endl;  
			++it;  
		} 
		cout << endl;  
	}  
} 

// HashTable.h  
template<class K>  
struct HashFunc  
{  
	size_t operator()(const K& key)  
	{  
		return (size_t)key;  
	}  
};  

// 特化  
template<>  
struct HashFunc<string>  
{  
	size_t operator()(const string& key)  
	{  
		size_t hash = 0;  
		for (auto e : key)  
		{  
			hash *= 131;  
			hash += e;  
		} 
		return hash;  
	}  
};  

namespace hash_bucket  
{  
	template<class T>  
	struct HashNode  
	{  
		T _data;  
		HashNode<T>* _next; 
		 
		HashNode(const T& data)  
			:_data(data)  
			,_next(nullptr)
		{}  
	};  
	
	// 前置声明  
	template<class K, class T, class KeyOfT, class Hash>  
	class HashTable;  
	
	template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash>  
	struct HTIterator  
	{  
		typedef HashNode<T> Node;  
		typedef HTIterator<K, T, Ptr, Ref, KeyOfT, Hash> Self;  
		Node* _node;  
		const HashTable<K, T, KeyOfT, Hash>* _pht;  
		HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht)  
			:_node(node)  
			,_pht(pht)  
		{}  
		
		Ref operator*()  
		{  
			return _node->_data;  
		} 
		
		Ptr operator->()  
		{  
			return &_node->_data;  
		} 
		
		bool operator!=(const Self& s)  
		{  
			return _node != s._node;  
		} 
		
		Self& operator++()  
		{  
			if (_node->_next)  
			{  
				// 当前桶还有节点  
				_node = _node->_next;  
			} 
			else  
			{  
				// 当前桶⾛完了，找下⼀个不为空的桶  
				KeyOfT kot;
				Hash hs;  
				size_t hashi = hs(kot(_node->_data)) % _pht-> _tables.size();  
				++hashi;  
				while (hashi < _pht->_tables.size())  
				{  
					if (_pht->_tables[hashi])  
					{  
						break;  
					} 
					++hashi;  
				} 
				//后面没有桶了
				if (hashi == _pht->_tables.size())  
				{  
					_node = nullptr; // end()  
				} 
				else  
				{  
					_node = _pht->_tables[hashi];  
				}  
			} 
			return *this;  
		}  
	};  
	
	template<class K, class T, class KeyOfT, class Hash>  
	class HashTable  
	{  
		// 友元声明  
		template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash>  
		friend struct HTIterator;  
		
		typedef HashNode<T> Node;  
	public:  
		typedef HTIterator<K, T, T*, T&, KeyOfT, Hash> Iterator;  
		typedef HTIterator<K, T, const T*, const T&, KeyOfT, Hash> ConstIterator;  
		
		Iterator Begin()  
		{  
			if (_n == 0)  
			return End();
			for (size_t i = 0; i < _tables.size(); i++)  
			{  
				Node* cur = _tables[i];  
				if (cur)  
				{  
					return Iterator(cur, this);  
				}  
			} 
			return End();  
		} 
		
		Iterator End()  
		{  
			return Iterator(nullptr, this);  
		} 
		
		ConstIterator Begin() const  
		{  
			if (_n == 0)  
			return End();  
			for (size_t i = 0; i < _tables.size(); i++)  
			{  
				Node* cur = _tables[i];  
				if (cur)  
				{  
					return ConstIterator(cur, this);  
				}  
			} 
			return End();  
		} 
		
		ConstIterator End() const  
		{  
			return ConstIterator(nullptr, this);  
		} 
		
		inline unsigned long __stl_next_prime(unsigned long n)  
		{  
			static const int __stl_num_primes = 28;  
			static const unsigned long __stl_prime_list[__stl_num_primes]=
			{  
				53,97,193,389,769,
				1543,3079,6151,12289,24593,
				49157, 98317, 196613, 393241, 786433,  
				1572869, 3145739, 6291469, 12582917, 25165843,  
				50331653, 100663319, 201326611, 402653189, 805306457,  
				1610612741, 3221225473, 4294967291  
			};  
			const unsigned long* first = __stl_prime_list;  
			const unsigned long* last = __stl_prime_list + __stl_num_primes;  
			const unsigned long* pos = lower_bound(first, last, n);  
			return pos == last ? *(last - 1) : *pos;  
		} 
		
		HashTable()  
		{  
			_tables.resize(__stl_next_prime(_tables.size()), nullptr);  
		} 
		
		~HashTable()  
		{  
			// 依次把每个桶释放  
			for (size_t i = 0; i < _tables.size(); i++)  
			{  
				Node* cur = _tables[i];  
				while (cur)  
				{  
					Node* next = cur->_next;  
					delete cur;  
					cur = next;  
				} 
				_tables[i] = nullptr;  
			}  
		} 
		
		pair<Iterator, bool> Insert(const T& data)  
		{  
			KeyOfT kot;  
			Iterator it = Find(kot(data));  
			
			if (it != End())  
				return make_pair(it, false);  
				
			Hash hs;  
			size_t hashi = hs(kot(data)) % _tables.size();  
			// 负载因⼦==1扩容  
			if (_n == _tables.size())  
			{
				vector<Node*>  
				newtables(__stl_next_prime(_tables.size()+1), nullptr);  
				for (size_t i = 0; i < _tables.size(); i++)  
				{  
					Node* cur = _tables[i];  
					while (cur)  
					{  
						Node* next = cur->_next;  
						// 旧表中节点，挪动新表重新映射的位置  
						size_t hashi = hs(kot(cur->_data)) %  
						newtables.size();  
						// 头插到新表  
						cur->_next = newtables[hashi];  
						newtables[hashi] = cur;  
						cur = next;  
					} 
					_tables[i] = nullptr;  
				} 
				_tables.swap(newtables);  
			} 
			// 头插  
			Node* newnode = new Node(data);  
			newnode->_next = _tables[hashi];  
			_tables[hashi] = newnode;  
			++_n;  
			return make_pair(Iterator(newnode, this), true);  
		} 
		
		Iterator Find(const K& key)  
		{  
			KeyOfT kot;  
			Hash hs;  
			size_t hashi = hs(key) % _tables.size();  
			Node* cur = _tables[hashi];  
			while (cur)  
			{  
				if (kot(cur->_data) == key)  
				{  
					return Iterator(cur, this);  
				}
				cur = cur->_next;  
			} 
			return End();  
		} 
		
		bool Erase(const K& key)  
		{  
			KeyOfT kot;  
			Hash hs;  
			size_t hashi = hs(key) % _tables .size();  
			Node* prev = nullptr;  
			Node* cur = _tables[hashi];  
			while (cur)  
			{  
				if (kot(cur->_data) == key)  
				{  
					if (prev == nullptr)  
					{  
						_tables[hashi] = cur->_next;  
					} 
					else  
					{  
						prev->_next = cur->_next;  
					} 
					delete cur;  
					--_n;  
					return true;  
				} 
				prev = cur;  
				cur = cur->_next;  
			} 
			return false;  
		}  
	private:  
		vector<Node*> _tables; // 指针数组  
		size_t _n = 0; // 表中存储数据个数  
	};  
}