[C++进阶] 24. 哈希表封装unordered_map && unordered_set

哈希表封装unordered_map && unordered_set的思路和红黑树封装map && set的思路完全一致，所以改造哈希表同时为unordered_map && unordered_set所用的过程中，为什么这样改，参数为什么这样传递我就不详细写了，可以参考map、set的封装部分：

https://blog.csdn.net/2401_87713146/article/details/159727756?fromshare=blogdetail&sharetype=blogdetail&sharerId=159727756&sharerefer=PC&sharesource=2401_87713146&sharefrom=from_link

一. 修改结构框架

1）节点中存储的数据类型要修改成泛型的，因为同一个哈希表要同时被key搜索结构和key_value搜索结构的两种不同结构使用，所以不能写死成固定的类型。相应的用到节点中数据的部分都要修改成_data。

2）同样是为了实现相同的比较逻辑能同时适合unordered_map和unordered_set，我们引入模板参数KeyOfT，用来取出是数据中的key ，仿函数由上层的unordered_map和unordered_set来传入，因为它们知道自己存储的具体是什么类型的数据，应该如何取出key。相应的原本要取key的位置都用仿函数套一下。

比如Insert的扩容这里套了两层仿函数，效用kot就昂data中的key取出来，再用hs将key转换成支持取模的整型值。

**3）**把上层unordered_set和unordered_map的Insert先套上，测试一下结构修改是否正确。

还有一个要注意的点，我们的key不支持修改，所以传模板参数中的T时应该给key都加上const。

cpp 复制代码

#pragma once
#include "HashTable.h"

namespace laosi
{
	template <class K>
	class unordered_set
	{
		struct setKeyOfT
		{
			K operator()(const K& k)
			{
				return k;
			}
		};

	private:
		hash_bucket::HashTable<K, const K, setKeyOfT> _ht;
	
	public:
		bool insert(const K& key)
		{
			return _ht.Insert(key);
		}

	};

	void test_uset01()
	{
		unordered_set<int> us;
		us.insert(5);
		us.insert(30);
		us.insert(4);
		us.insert(5);
		us.insert(31);
		us.insert(200);
		us.insert(15);
		us.insert(31);
	}
}

cpp 复制代码

#pragma once
#include "HashTable.h"

namespace laosi
{
	template <class K, class V>
	class unordered_map
	{
		struct mapKeyOfT
		{
			K operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	private:
		hash_bucket::HashTable<K, pair<const K, V>, mapKeyOfT> _ht;

	public:
		bool insert(const pair<const K, V>& kv)
		{
			return _ht.Insert(kv);
		}

	};

	void test_umap01()
	{
		unordered_map<int,int> um;
		um.insert({ 5, 5 });
		um.insert({ 30, 30 });
		um.insert({ 4, 4 });
		um.insert({ 5, 5 });
		um.insert({ 31, 31 });
		um.insert({ 200, 200 });
		um.insert({ 15, 15 });
		um.insert({ 31, 31 });
	}
}

二. 普通迭代器

1）iterator实现的大框架跟list的iterator思路是一致的，用一个类型封装结点的指针，再通过重载运算符实现迭代器像指针一样访问的行为。哈希表的迭代器是单向迭代器。

2）难点是operator++的实现 。iterator中有一个指向结点的指针：

① 如果当前桶下面还有结点，则结点的指针指向下一个结点即可。

② 如果当前桶走完了，则需要想办法计算找到下一个桶。这里的难点是结构设计 的问题，参考库中源码的做法，Iterator中除了有结点的指针，还有哈希表对象的指针，这样当前桶走完了，要计算下一个桶就容易多了：用key值计算出当前桶位置，依次往后找下一个不为空的桶即可。

3）Begin()返回第一个桶中第一个节点指针构造的迭代器，这里End()返回迭代器可以用空表示。

cpp 复制代码

// 前置声明，HTIterator和HashTable相互依赖，谁放到前面都不认识另一个
// 声明模板参数的缺省值不用写
template <class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K, class T, class KeyOfT, class Hash>
struct HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef HTIterator<K, T, KeyOfT, Hash> self;

	Node* _node; // 节点的指针，竖着找(链表)
	HT* _pht; // 哈希表的指针，横着找(数组)

	HTIterator(Node* node, HT* pht)
		:_node(node)
		,_pht(pht)
	{}

	T& operator*() const
	{
		return _node->_data;
	}

	T* operator->() const
	{
		return &_node->_data;
	}

	bool operator==(const self& s) const
	{
		return _node == s._node;
	}

	bool operator!=(const self& s) const
	{
		return _node != s._node;
	}

	self& operator++()
	{
		if (_node->_next)
		{
			// 当前hashi位置的链表还没遍历完
			_node = _node->_next;
		}
		else
		{
			Hash hs;
			KeyOfT kot;
			// 找下一个不为空的hashi
			// 不能用哈希表的私有成员_table
			// ① 声明成哈希表的友元类 ② 哈希表提供一个共有成员函数返回_table
			size_t hashi = hs(kot(_node->_data)) % _pht->_table.size();
			hashi++; // 当前位置遍历完了且一定不为空，先往后走一步

			while (hashi < _pht->_table.size())
			{
				if (_pht->_table[hashi])
				{
					_node = _pht->_table[hashi];
					break;
				}
				else
					hashi++;
			}

			// 跳出循环可能是找到了，也可能是走到头也没有不为空的了
			if (hashi == _pht->_table.size())
			{
				// 所有桶都走完了，置为end()
				_node = nullptr;
			}
		}
		return *this;
	}
};


// 哈希桶结构  这里只展示和迭代器有关的部分！！
template <class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	// 友元声明
	template<class K, class T, class KeyOfT, class Hash>
	friend struct HTIterator;

public:
	typedef HTIterator<K, T, KeyOfT, Hash> Iterator;

    Iterator Begin()
    {
	    // 第一个不为空的节点
	    if (_n == 0)
	    	return End();

	    size_t i = 0;
	    while (i < _table.size() && _table[i] == nullptr)
	    	i++;
	    if (i < _table.size())
		    return Iterator(_table[i], this);
    }

    Iterator End()
    {
	    return Iterator(nullptr, this);
    }    
};

4）实现迭代器过程中值得注意的问题

迭代器中有哈希表对象的指针，哈希表中用到迭代器，这两个结构相互依赖，不管把谁放在前面，这个前面的都不认识后面的，所以必须加一个前置声明，告诉编译器你会在后面找到这个结构，先别报错。
在封装迭代器的过程中会用到哈希表的表结构，但是_table是HashTable的私有成员，类外不可访问。解决办法有两个：

① 声明成哈希表的友元类

② 哈希表提供一个公有成员函数返回_table

我们这里的实现采用的是声明成友元类。

5）上层unordered_map和unordered_set对迭代器的封装及测试

cpp 复制代码

#pragma once
#include "HashTable.h"

namespace laosi
{
	template <class K>
	class unordered_set
	{
		struct setKeyOfT
		{
			K operator()(const K& k)
			{
				return k;
			}
		};

	private:
		hash_bucket::HashTable<K, const K, setKeyOfT> _ht;
	public:
		typedef typename hash_bucket::HashTable<K, const K, setKeyOfT>::Iterator iterator;

	public:
		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}
	
		bool insert(const K& key)
		{
			return _ht.Insert(key);
		}

	};

	void test_uset01()
	{
		unordered_set<int> us;
		us.insert(5);
		us.insert(30);
		us.insert(4);
		us.insert(5);
		us.insert(31);
		us.insert(200);
		us.insert(15);
		us.insert(31);

		unordered_set<int>::iterator it = us.begin();
		// key不可修改
		//*it += 100;
		while (it != us.end())
		{
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}
}

cpp 复制代码

#pragma once
#include "HashTable.h"

namespace laosi
{
	template <class K, class V>
	class unordered_map
	{
		struct mapKeyOfT
		{
			K operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	private:
		hash_bucket::HashTable<K, pair<const K, V>, mapKeyOfT> _ht;

	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, mapKeyOfT>::Iterator iterator;

	public:
		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		bool insert(const pair<const K, V>& kv)
		{
			return _ht.Insert(kv);
		}

	};

	void test_umap01()
	{
		unordered_map<int,int> um;
		um.insert({ 5, 5 });
		um.insert({ 30, 30 });
		um.insert({ 4, 4 });
		um.insert({ 5, 5 });
		um.insert({ 31, 31 });
		um.insert({ 200, 200 });
		um.insert({ 15, 15 });
		um.insert({ 31, 31 });

		for (auto& e : um)
		{
			// key不能修改
			//e.first += 1;
			e.second += 1;

			cout << e.first << ":" << e.second << endl;
		}
		cout << endl;


		unordered_map<string, string> um1;
		um1.insert({ "sort", "排序" });
		um1.insert({ "left", "左边" });
		um1.insert({ "right", "右边" });
		um1.insert({ "string", "字符串" });

		for (auto& e : um1)
		{
			// key不能修改
			//e.first += 'x';
			e.second += 'x';

			cout << e.first << ":" << e.second << endl;
		}
		cout << endl;
	}
}

三. const迭代器

1）根据之前的经验，我们不写两份冗余代码，直接为普通迭代器增加两个模板参数，通过上层哈希表传参控制是普通迭代器还是const迭代器。

cpp 复制代码

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	struct HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> self;

		Node* _node; // 节点的指针，竖着找(链表)
		const HT* _pht; // 哈希表的指针，横着找(数组)

		HTIterator(Node* node, const HT* pht)
			:_node(node)
			,_pht(pht)
		{}

		Ref operator*() const // T&  const T&
		{
			return _node->_data;
		}

		Ptr operator->() const // T* const T*
		{
			return &_node->_data;
		}

		bool operator==(const self& s) const
		{
			return _node == s._node;
		}

		bool operator!=(const self& s) const
		{
			return _node != s._node;
		}

		self& operator++()
		{
			if (_node->_next)
			{
				// 当前hashi位置的链表还没遍历完
				_node = _node->_next;
			}
			else
			{
				Hash hs;
				KeyOfT kot;
				// 找下一个不为空的hashi
				// 不能用哈希表的私有成员_table
				// ① 声明成哈希表的友元类 ② 哈希表提供一个共有成员函数返回_table
				size_t hashi = hs(kot(_node->_data)) % _pht->_table.size();
				hashi++; // 当前位置遍历完了且一定不为空，先往后走一步

				while (hashi < _pht->_table.size())
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						break;
					}
					else
						hashi++;
				}

				// 跳出循环可能是找到了，也可能是走到头也没有不为空的了
				if (hashi == _pht->_table.size())
				{
					// 所有桶都走完了，置为end()
					_node = nullptr;
				}
			}
			return *this;
		}
	};

// 哈希桶结构   只展示和迭代器有关的部分
template <class K, class T, class KeyOfT, class Hash = HashFunc<K>>
class HashTable
{
	// 友元声明
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	friend struct HTIterator;

public:
	typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
	typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;

	Iterator Begin()
	{
		// 第一个不为空的节点
		if (_n == 0)
			return End();

		size_t i = 0;
		while (i < _table.size() && _table[i] == nullptr)
			i++;
		//if (i < _table.size())
		// 一定能找到一个不为空的_table[i]
		return Iterator(_table[i], this);
	}

	Iterator End()
	{
		return Iterator(nullptr, this);
	}

	ConstIterator Begin() const
	{
		// 第一个不为空的节点
		if (_n == 0)
			return End();

		size_t i = 0;
		while (i < _table.size() && _table[i] == nullptr)
			i++;
		//if (i < _table.size())
		// 一定能找到一个不为空的_table[i]
		return ConstIterator(_table[i], this);
	}

	ConstIterator End() const
	{
		return ConstIterator(nullptr, this);
	}
};

上层的unordered_set和unordered_map对应的修改：

2）易出现的问题：

四. find，insert，erase

find，insert的返回值改为符合库中逻辑的，并在上层封装find，insert，erase。注意返回值的逻辑。

哈希表中：

cpp 复制代码

		pair<Iterator, bool> Insert(const T& data)
		{
			Hash hs;
			KeyOfT kot;

			// 不允许冗余
			Iterator it = Find(kot(data));
			if (it != End())
				return { it, false };

			// 负载因子超过1则扩容
			if ((double)_n / (double)_table.size() >= 1)
			{
				//// 方法一 + 析构
				//HashTable<K, T, KeyOfT> newtable(__stl_next_prime(_table.size() + 1));
				//for (size_t j = 0; j < _table.size(); j++)
				//{
				//	Node* cur = _table[j];
				//	while (cur)
				//	{
				//		newtable.Insert(cur->_data);
				//		cur = cur->_next;
				//	}
				//}
				//_table.swap(newtable._table);

				// 方法二
				vector<Node*> newtable(__stl_next_prime(_table.size() + 1));
				for (int i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;

						size_t hashi = hs(kot(cur->_data)) % newtable.size();
						cur->_next = newtable[hashi];
						newtable[hashi] = cur;

						cur = next;
					}
					_table[i] = nullptr;
					// _n不用变
				}
				_table.swap(newtable);
			}

			// 除留余数法先算位置
			size_t hashi = hs(kot(data)) % _table.size();

			// 创建节点并插入
			Node* newnode = new Node(data);
			newnode->_next = _table[hashi];
			_table[hashi] = newnode;

			_n++;
			return { Iterator(newnode,this), true };
		}

		Iterator Find(const K& key)
		{
			Hash hs;
			KeyOfT kot;
			// 算位置，拿到链表头
			size_t hashi = hs(key) % _table.size();

			// 遍历链表找，找到返回节点指针
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
					return Iterator(cur, this);
				cur = cur->_next;
			}

			// 找不到返回空
			return End();
		}

		bool Erase(const K& key)
		{
			Hash hs;
			KeyOfT kot;
			// 先找到要删除的位置，以及它的前一个位置
			// 找到了改变相关指针的指向后，释放当前节点
			size_t hashi = hs(key) % _table.size();
			Node* cur = _table[hashi];
			Node* prve = nullptr;

			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prve == nullptr)
						_table[hashi] = cur->_next;
					else
						prve->_next = cur->_next;
					delete cur;
					cur = nullptr;
					_n--;
					return true;
				}
				else
				{
					prve = cur;
					cur = cur->_next;
				}
			}

			// 没找到返回false
			return false;
		}

unordered_map和unordered_set中（代码相同）：

cpp 复制代码

pair<iterator, bool> insert(const pair<const K, V>& kv)
{
	return _ht.Insert(kv);
}

iterator find(const K& key)
{
	return _ht.Find(key);
}

bool erase(const K& key)
{
	return _ht.Erase(key);
}

五. operator\[\]

cpp 复制代码

V& operator[](const K& key)
{
	pair<iterator, bool> ret = insert({ key, V() });
	return ret.first->second;
}

测试代码

cpp 复制代码

void test_umap02() // 统计次数 -- operator[]
{
	string arr[] = { "苹果", "西瓜", "苹果", "西瓜", "苹果",
"苹果", "西瓜","苹果", "香蕉", "苹果", "香蕉" };
	unordered_map<string, int> countmap;

	for (const auto& str : arr)
	{
		countmap[str]++; // 这种写法更简洁
	}

	for (const auto& e : countmap)
		cout << e.first << " : " << e.second << endl;
	cout << endl;
}

void test_umap03() // operator[]插入+修改的功能
{
	unordered_map<string, string> dict;
	dict.insert({ "sort", "排序" });

	// 插入+修改
	dict["left"] = "左边";

	// 插入
	// 调string的默认构造初始化为空字符串
	dict["right"];

	// 修改
	dict["begin"];
	dict["begin"] = "开始";

	// 查找
	cout << dict["sort"] << endl;

	// 想修改用[]或者迭代器，insert没有修改功能
	// insert一个已经存在的值直接就返回了，不会改value的值
	dict.insert({ "left", "右边" });
}