哈希表——unordered_set和unordered_map的封装

struct HashNode//哈希节点
{
	T _data;
	HashNode<T>* _next;
	//......
};
struct HashFun//哈希函数
struct HTIterator//迭代器
{
	//......
	Node* _it;
	HT* _ht;
};
class HashTable//哈希表
{
public:
	//......
private:
	vector<Node*> _tables; 
	size_t _n = 0;			// 数据个数
};

说明2：

1.模板参数的设计

哈希函数用的是除法散列法，也就是对一个数据的key与数组的长度取余，这个余数就是这个数据需要储存的对应下标位置。

要点：++要能取余这个数必须是整数，又因为要把余数作为下标，那么这个余数必须是非负整数。++

因为我们做的是模板要兼容各种内置类型和外置类型，所以有必要设计一个哈希函数，把这数据转换成size_t类型。

又考虑这个数据可能是pair<key,val>类型或其他结构体类型等，所以需要设计一个函数来确定key是谁。

所以在哈希表的设计中作一个这样的模板声明。

K：传入的数据中key的类型，因为在查找和删除时是依据key来完成的，所以必须单独知道key的类型。

T：数据的类型

KeyOfT：通过仿函数来取到key值

Hash：通过仿函数把key值转化为size_t类型

为了兼容多种类型，在哈希桶的内部实现中，映射时都需要用KeyOfT和Hash，具体实现请参照下面的源码，就不再做详细讲解。

二、迭代器封装

哈希表的迭代器实现和list的迭代器类似，还是比较复杂的所以我们把它单独封装成一个类模板，那么迭代器的成员变量必然就是哈希节点，但是考虑到当一个哈希节点走到当前哈希桶的空时，无法找到下一个哈希桶的起始位置，所以迭代器的成员变量还需要添加一个哈希表，这个问题就可以解决。

++为了兼容const迭代器，有一个很实用的小技巧，在迭代器模板参数里添加Ref和Ptr，给Ref和Ptr传入T&和T*就能实现普通迭代器，传入const T&和const T*就能实现const迭代器++

所以模板参数如下：

因为迭代器中成员变量存了哈希表所以模板参数必须有K，T，KeyOfT，Hash。考虑需要兼容const迭代器所以增加Ref和Ptr模板参数。

准备工作如下：

1.迭代器简单功能

cpp 复制代码

Ref operator*()
{
	return _it->_data;
}
Ptr operator->()
{
	return &_it->_data;
}
bool operator!=(Self it)
{
	return it._it != _it;
}

2.迭代器++

对于哈希桶结构它实际就是一个链表数组，上文已经提过，为了使迭代器走到一个链表结尾时方便找到另一个链表的开头，所以迭代器模板中引入了一个哈希表的成员。

对于迭代器++我们分两种情况考虑：

该迭代器不是链表的尾节点：直接让迭代器移动到该链表的下一个节点即可，

该迭代器是链表的尾节点：可以通过哈希映射找到当前迭代器所在的哈希桶，然后从这个哈希桶开始往后找，直到找到头结点不为空的节点为止，该节点即为迭代器++后的结果。

解释：链表的尾节点------next为空的那个节点

代码演示：
cpp 复制代码
Self& operator++()
{
	if (_it->_next)
	{
		_it = _it->_next;
		return *this;
	}
	KeyOfT kot;
	Hash hash;
	size_t index = hash(kot(_it->_data)) % _ht->_tables.size();
	index++;
	while (index < _ht->_tables.size() && !_ht->_tables[index]) index++;
	if (index == _ht->_tables.size()) _it = nullptr;
	else _it = _ht->_tables[index];
    return *this;
}
注意：以上操作访问了哈希表的成员，如果该成员是私有需要在哈希表内把HashIterator做一个友元声明。

三、HashTable内部设计

关于迭代器我们通常都会有普通迭代器和const迭代器，所以有一下设计：

迭代器begin：第一个非空的哈希桶的头
迭代器end：使用nullptr充当

如下：

cpp 复制代码

//普通迭代器
Iterator Begin()
{
	for (int i = 0; i < _tables.size(); i++)
		if (_tables[i] != nullptr) return Iterator(_tables[i], this);
	return Iterator(nullptr, this);
}
Iterator End()
{
	return Iterator(nullptr, this);
}
//const迭代器
ConstIterator Begin() const
{
    for (int i = 0; i < _tables.size(); i++)
	    if (_tables[i] != nullptr) return Iterator(_tables[i], this);
	return Iterator(nullptr, this);
}
ConstIterator End() const
{
	return Iterator(nullptr, this);
}

四、外层的封装

1.My_unordered_map.h的封装

如上为了后续方便操作，我们再次对迭代器重命名，但要注意加typename表明是成员变量，而不是成员函数。 My_unordered_map的成员变量只要一个哈希表就行。

有了上面的铺垫后My_unordered_map的基本功能和迭代器实现就显得十分简单在这里就不再细讲，我会在文尾分享源码。

注：以上Hash.h中的各种类模板均已放置到 hash_bucket 命名空间中。

2.My_unordered_set.h的封装

关于My_unordered_set的封装逻辑和My_unordered_map是一模一样的就不细讲。

非常感谢您能耐心读完这篇文章。倘若您从中有所收获，还望多多支持呀！

五、源码

1.Hash.h

cpp 复制代码

#pragma once
#include<iostream>
#include<vector>
#include<string>
using namespace std;
namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};
	template<class K>
	struct HashFun
	{
		size_t operator()(K key)
		{
			return (size_t)key;
		}
	};
	template<>
	struct HashFun<string>
	{
		size_t operator()(string key)
		{
			size_t hash = 0;
			for (auto val : key) hash += val * 131;
			return hash;
		}
	};
	//前置声明
	template<class K, class T, class KeyOfT, class Hash = HashFun<K>>
	class HashTable;
	template<class K, class T, class KeyOfT, class Hash, class Ref, class Ptr>
	struct HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		typedef HTIterator<K, T, KeyOfT, Hash, Ref, Ptr> Self;
		HTIterator(Node* it,HT* ht)
			:_it(it)
			,_ht(ht)
		{}
		Ref operator*()
		{
			return _it->_data;
		}
		Ptr operator->()
		{
			return &_it->_data;
		}
		Self& operator++()
		{
			if (_it->_next)
			{
				_it = _it->_next;
				return *this;
			}
			KeyOfT kot;
			Hash hash;
			size_t index = hash(kot(_it->_data)) % _ht->_tables.size();
			index++;
			while (index < _ht->_tables.size() && !_ht->_tables[index]) index++;
			if (index == _ht->_tables.size()) _it = nullptr;
			else _it = _ht->_tables[index];
			return *this;
		}
		bool operator!=(Self it)
		{
			return it._it != _it;
		}

		Node* _it;
		HT* _ht;
	};

	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
	public:
		typedef HashNode<T> Node;
		typedef HTIterator<K, T, KeyOfT, Hash, T&, T*> Iterator;
		typedef HTIterator<K, T, KeyOfT, Hash, const T&, const T*> ConstIterator;
	 
		template<class K, class T, class KeyOfT, class Hash, class Ref, class Ptr>
		friend struct HTIterator;
		//friend Iterator;错误的友元声明
		//friend ConstIterator; 
		Iterator Begin()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				if (_tables[i] != nullptr) return Iterator(_tables[i], this);
			}
			return Iterator(nullptr, this);
		}
		Iterator End()
		{
			return Iterator(nullptr, this);
		}
		ConstIterator Begin() const
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				if (_tables[i] != nullptr) return Iterator(_tables[i], this);
			}
			return Iterator(nullptr, this);
		}
		ConstIterator End() const
		{
			return Iterator(nullptr, this);
		}

		HashTable()
		{
			_tables.resize(11, nullptr);
		}
		// 哈希桶的销毁
		~HashTable()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}

		// 插入值为data的元素，如果data存在则不插入
		pair<Iterator,bool> Insert(const T& data)
		{
			Hash hash;
			KeyOfT kot;
			pair<Iterator,bool> ret = Find(kot(data));
			if (ret.second == true)
			{
				ret.second = false;
				return ret;
			}
			size_t index = hash(kot(data)) % _tables.size();
			if (_n == _tables.size())
			{
				//扩容
				vector<Node*> newtable(_tables.size() * 2, nullptr);
				for (int i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						size_t inx = hash(kot(cur->_data)) % newtable.size();
						Node* next = cur->_next;
						cur->_next = newtable[inx];
						newtable[inx] = cur;
						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newtable);
			}
			Node* NewNode = new Node(data);
			NewNode->_next = _tables[index];
			_tables[index] = NewNode;
			return { Iterator(NewNode, this),true };
		}

		// 在哈希桶中查找值为key的元素，存在返回true否则返回false
		pair<Iterator,bool> Find(const K& key)
		{
			Hash hash;
			KeyOfT kot;
			size_t index = hash(key) % _tables.size();
			Node* cur = _tables[index];
			while (cur && kot(cur->_data) != key)
			{
				cur = cur->_next;
			}
			if (!cur) return { Iterator(nullptr, this),false };
			return { Iterator(cur, this),true };
		}

		// 哈希桶中删除key的元素，删除成功返回true，否则返回false
		bool Erase(const K& key)
		{
			if (!Find(key)) return false;
			Hash hash;
			size_t index = hash(key) % _tables.size();
			Node* cur = _tables[index], prev = nullptr;
			while (cur && kot(cur->_data) != key)
			{
				prev = cur;
				cur = cur->_next;
			}
			if (prev == nullptr) _tables[index] = prev; 
			else prev->_next = cur->_next;
			delete cur;
			return true;
		}

	private:
		vector<Node*> _tables;  // 指针数组
		size_t _n = 0;			// 表中存储数据个数
	};
}

2.My_unordered_map.h

cpp 复制代码

#pragma once
#include"Hash.h"
template<class K,class V>
struct MapKeyOfT
{
	K operator()(pair<K,V> data)
	{
		return data.first;
	}
};
template<class K,class V>
class My_unordered_map
{
public:
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT<K,V>, hash_bucket::HashFun<K>>::Iterator iterator;
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT<K,V>, hash_bucket::HashFun<K>>::ConstIterator const_iterator;
	pair<iterator, bool> insert(pair<K, V> kv)
	{
		return _ht.Insert(kv);
	}
	pair<iterator, bool> find(K key)
	{
		return _ht.Find(key);
	}
	bool erase(K key)
	{
		return _ht.Erase(key);
	}
	iterator begin()
	{
		return _ht.Begin();
	}
	iterator end()
	{
		return _ht.End();
	}
	const_iterator begin() const
	{
		return _ht.Begin();
	}
	const_iterator end() const
	{
		return _ht.End();
	}
	V& operator[](K key)
	{
		pair<iterator, bool> ret = _ht.Insert({key, V()});
		return ret.first->second;
	}
private:
	hash_bucket::HashTable<K, pair<const K,V>, MapKeyOfT<K,V>, hash_bucket::HashFun<K>> _ht;
};

3. My_unordered_set.h

cpp 复制代码

#pragma once
#include"Hash.h"
template<class K>//?????
struct SetKeyOfT
{
	K operator()(K key)
	{
		return key;
	}
};
template<class K>
class My_unordered_set
{
public:
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT<K>, hash_bucket::HashFun<K>>::Iterator iterator;
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT<K>, hash_bucket::HashFun<K>>::ConstIterator const_iterator;
	pair<iterator, bool> insert(K kv)
	{
		return _ht.Insert(kv);
	}
	pair<iterator, bool> find(K key)
	{
		return _ht.Find(key);
	}
	bool erase(K key)
	{
		return _ht.Erase(key);
	}
	iterator begin()
	{
		return _ht.Begin();
	}
	iterator end()
	{
		return _ht.End();
	}
	const_iterator begin() const
	{
		return _ht.Begin();
	}
	const_iterator end() const
	{
		return _ht.End();
	}
private:
	hash_bucket::HashTable<K, const K,SetKeyOfT<K>, hash_bucket::HashFun<K>> _ht;
};