unordered_map和unordered_set模拟实现

unordered系列的关联式容器之所以效率比较高，是因为其底层使用了哈希结构。

一、哈希

1.1哈希概念

构造一种存储结构，通过某种函数(hashFunc)使元素的存储位置与它的关键码之间能够建立一一映射的关系，那么在查找时通过该函数可以很快找到该元素。

1.2哈希冲突

不同关键字通过相同哈希哈数计算出相同的哈希地址，该种现象称为哈希冲突或哈希碰撞。

1.3哈希函数

引起哈希冲突的一个原因可能是：哈希函数设计不够合理。

常见的哈希函数：

1.直接定址法：

取关键字的某个线性函数为散列地址：Hash（Key）= A*Key + B

优点：简单、均匀

缺点：需要事先知道关键字的分布情况

使用场景：适合查找比较小且连续的情况

2.除留余数法：

设散列表中允许的地址数为m，取一个不大于m，但最接近或者等于m的质数p作为除数，按照哈希函数：Hash(key) = key% p(p<=m),将关键码转换成哈希地址。

1.4哈希冲突解决

解决哈希冲突两种常见的方法是：闭散列和开散列

1.4.1闭散列

闭散列：也叫开放定址法，当发生哈希冲突时，如果哈希表未被装满，说明在哈希表中必然还有空位置，那么可以把key存放到冲突位置中的"下一个" 空位置中去。

1.线性探测

插入：从发生冲突的位置开始，依次向后探测，直到寻找到下一个空位置为止。

删除：删除时不可以物理删除哈希表中已有的元素，若直接删除元素会影响其他元素的搜索。因为在查找时，遇到空就查找结束。

线性探测的实现：

cpp 复制代码

template<class K>
struct DefaultHashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct DefaultHashFunc<string>
{
	size_t operator()(const string& str)
	{
		size_t n = 0;
		for (auto ch : str)
		{
			n *= 131;
			n += ch;
		}
		return n;
	}
};

namespace open_address//开放地址法
{
	//状态
	enum STATE
	{
		EMPTY, EXIST, DELETE//空，存在，删除
	};

	//数据类型
	template<class K, class V>
	struct HashData
	{
		pair<K, V> _kv;
		STATE _state = EMPTY;
	};

	template<class K, class V, class HashFunc = DefaultHashFunc<K>>
	class HashTable
	{
	public:
		HashTable()
		{
			_table.resize(10);
		}

		bool insert(const pair<K, V>& kv)
		{
			//检查是否已经存在
			if (Find(kv.first))
				return false;
			//判断扩容
			HashFunc ht;
			int fac = (double)_n / _table.size();//负载因子
			if (fac >= 0.7)
			{
				//扩容之后可能会改变数据的映射关系，所以不能直接扩容
				size_t newcapacity = _table.capacity() * 2;
				HashTable<K, V> newHash;
				newHash._table.resize(newcapacity);
				//遍历旧表，插入到新表
				for (int i = 0; i < _table.capacity(); i++)
				{
					//只有exist状态的才插入新表
					if (_table[i]._state == EXIST)
						newHash.insert(_table[i]._kv);
				}
				_table.swap(newHash._table);
			}

			//插入
			size_t hashi = ht(kv.first) % _table.size();
			while (_table[hashi]._state == EXIST)
			{
				hashi++;
				if (hashi == _table.size())//回到0位置
					hashi = 0;
			}
			_table[hashi]._kv = kv;
			_table[hashi]._state = EXIST;
			_n++;
			return true;
		}

		HashData<const K, V>* Find(const K& key)
		{
			HashFunc ht;
			size_t hashi = ht(key) % _table.size();
			while (_table[hashi]._state != EMPTY)
			{
				if (_table[hashi]._state != DELETE && _table[hashi]._kv.first == key)
					return (HashData<const K, V>*) & _table[hashi];
				hashi++;
				//转回去继续找
				if (hashi == _table.size())//回到0位置
					hashi = 0;
			}
			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashData<const K, V>* data = Find(key);
			if (data)
			{
				data->_state = DELETE;
				_n--;
				return true;
			}
			return false;
		}
	private:
		vector<HashData<K, V>> _table;
		size_t _n = 0;//存储数据的有效个数
	};
}

哈希表什么情况下扩容？如何扩容？

2.二次探测

线性探测的缺陷是产生冲突的数据堆积在一块，这与其找下一个空位置有关系，因为找空位置的方式就是挨着往后逐个去找，因此二次探测为了避免该问题，找下一个空位置的方法为：

若当前key与原来key产生相同的哈希地址，则当前key存在该地址后偏移量为（1,-1,2,-2,3,-3...）的二次方地址处

key1：hash(key)+0

key2：hash(key)+1^2

key3：hash(key)+(-1)^2

key4：hash(key)+2^2

key5：hash(key)+(-2)^2

1.4.2开散列

开散列法又叫链地址法(开链法)，首先对关键码集合用散列函数计算散列地址，具有相同地址的关键码归于同一子集合，每一个子集合称为一个桶，各个桶中的元素通过一个单链表链接起来，各链表的头结点存储在哈希表中。

开散列的实现：

cpp 复制代码

template<class K>
struct DefaultHashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct DefaultHashFunc<string>
{
	size_t operator()(const string& str)
	{
		size_t n = 0;
		for (auto ch : str)
		{
			n *= 131;
			n += ch;
		}
		return n;
	}
};


namespace hash_bucket//拉链法------哈希桶
{

	template<class K, class V>
	struct HashNode
	{
		HashNode(const pair<K, V>& kv)
			:_kv(kv)
			,_next(nullptr)
		{}

		pair<K, V> _kv;
		HashNode<K, V>* _next;
	};

	template<class K, class V, class HashFunc = DefaultHashFunc<K>>
	class HashTable
	{
		typedef HashNode<K, V> Node;
	public:
		HashTable()
		{
			_table.resize(10, nullptr);
		}

		~HashTable()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
			_n = 0;
		}

		bool insert(const pair<K, V>& kv)
		{
			//检查是否已经存在
			if (Find(kv.first))
				return false;
			HashFunc ht;
			size_t fac = (double)_n / _table.size();//负载因子
			if (fac >= 1)//扩容
			{
				size_t newcapacity = _table.size() * 2;
				HashTable<K, V> newht;
				newht._table.resize(newcapacity, nullptr);
				//遍历旧的，链入新的
				for (int i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;
						cur->_next = nullptr;//如果用屏蔽的方式，这里要置空
						size_t hashi = ht(cur->_kv.first) % newcapacity;
						//头插

						cur->_next = newht._table[hashi];
						newht._table[hashi] = cur;

						/*if (newht._table[hashi] == nullptr)
						{
							newht._table[hashi] = cur;
						}
						else
						{
							cur->_next = newht._table[hashi];
							newht._table[hashi] = cur;
						}*/
						cur = next;
					}
					_table[i] = nullptr;//这里必须要置空，否则不能释放
				}
				_table.swap(newht._table);
			}
			//头插
			Node* newnode = new Node(kv);
			size_t hashi = ht(kv.first) % _table.size();//找第几个桶

			newnode->_next = _table[hashi];
			_table[hashi] = newnode;

			/*if (_table[hashi] == nullptr)
			{
				_table[hashi] = newnode;
			}
			else
			{
				newnode->_next = _table[hashi];
				_table[hashi] = newnode;

			}*/
			_n++;
			return true;
		}

		Node* Find(const K& key)
		{
			HashFunc ht;
			size_t hashi = ht(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashFunc ht;
			size_t hashi = ht(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					return true;

				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

		void Print()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				printf("[%d] : ", i);
				while (cur)
				{
					cout << "(" << cur->_kv.first << ":" << cur->_kv.second << ")" << "->";
					cur = cur->_next;
				}
				cout << "NULL" << endl;
			}
			cout << endl;
		}

	private:
		vector<Node*> _table;
		size_t _n = 0;//存放有效数据的个数
	};
}

注意：如果我们要存放的数据是整型，可以使用上面的哈希函数来映射，但是如果我们存的是字符串或自定义类型的数据时，问题就来了，我们如何用除留余数法来计算呢？字符串和自定义类型不可以取模。

我们可以使用仿函数，如果是整型就返回整型，如果是字符串或自定义类型，就将其转换成整型再返回：

cpp 复制代码

template<class K>
struct DefaultHashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct DefaultHashFunc<string>
{
	size_t operator()(const string& str)
	{
		size_t n = 0;
		for (auto ch : str)
		{
			n *= 131;
			n += ch;
		}
		return n;
	}
};

二、unordered_map和unordered_set的模拟实现

底层哈希桶：

cpp 复制代码

template<class K>
struct DefaultHashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct DefaultHashFunc<string>
{
	size_t operator()(const string& str)
	{
		size_t n = 0;
		for (auto ch : str)
		{
			n *= 131;
			n += ch;
		}
		return n;
	}
};

namespace hash_bucket//拉链法------哈希桶
{

	template<class T>
	struct HashNode
	{
		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}

		T _data;
		HashNode<T>* _next;
	};

	template<class K, class T, class KofT, class HashFunc>
	class HashTable;//声明一下

	template<class K, class T, class Ref, class Ptr, class KofT, class HashFunc = DefaultHashFunc<K>>
	struct __HT_Iterator
	{
		typedef HashNode<T> Node;
		typedef __HT_Iterator<K, T, Ref, Ptr, KofT, HashFunc> Self;
		typedef __HT_Iterator<K, T, T&, T*, KofT, HashFunc> Iterator;


		__HT_Iterator(Node* node, const HashTable<K, T, KofT, HashFunc>* pht)
			:_node(node)
			,_pht(pht)//_pht为const修饰的指针
		{}

		//当此时这个类是普通迭代器类时，这个函数为拷贝构造
		//当此时这个类是const迭代器类时，这个函数为构造
		__HT_Iterator(const Iterator& it)
			:_node(it._node)
			,_pht(it._pht)
		{}


		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}

		Self operator++()
		{
			
			//走这个桶里的下一个节点
			if (_node->_next)
			{
				_node = _node->_next;
			}
			//找其他桶
			else
			{
				KofT kot;
				HashFunc ht;
				size_t hashi = ht(kot(_node->_data)) % _pht->_table.size();
				++hashi;//找下一个桶
				while (hashi < _pht->_table.size())
				{
					if (_pht->_table[hashi])
					{
						_node = _pht->_table[hashi];
						return *this;
					}
					else
					{
						++hashi;
					}
				}
				_node = nullptr;
			}
			return *this;
		}

		bool operator!=(const Self& it) const 
		{
			return _node != it._node;
		}
		bool operator==(const Self& it) const
		{
			return _node == it._node;
		}


		Node* _node;
		const HashTable<K, T, KofT, HashFunc>* _pht;

	};

	template<class K, class T, class KofT, class HashFunc = DefaultHashFunc<K>>
	class HashTable
	{
		//友元声明
		template<class K, class T, class Ref, class Ptr, class KofT, class HashFunc>
		friend struct __HT_Iterator; 

		typedef HashNode<T> Node;
	public:
		typedef __HT_Iterator<K, T, T&, T*, KofT, HashFunc> iterator;
		typedef __HT_Iterator<K, T, const T&, const T*, KofT, HashFunc> const_iterator;


		iterator begin()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				if (cur)
					return iterator(cur, this);
			}
			return iterator(nullptr, this);
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}

		const_iterator begin() const
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				if (cur)
					return const_iterator(cur, this);
			}
			return const_iterator(nullptr, this);
		}

		const_iterator end() const
		{
			return const_iterator(nullptr, this);//这里的this是const修饰的，所以上面的迭代器里的构造的哈希表参数要设置成const
		}

		HashTable()
		{
			_table.resize(10, nullptr);
		}

		~HashTable()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_table[i] = nullptr;
			}
			_n = 0;
		}

		pair<iterator, bool> insert(const T& data)
		{
			KofT kot;
			//检查是否已经存在
			iterator it = Find(kot(data));
			if (it != end())//找到了就返回此刻的pair
				return make_pair(it ,false);
			HashFunc ht;
			size_t fac = (double)_n / _table.size();//负载因子
			if (fac >= 1)//扩容
			{
				size_t newcapacity = _table.size() * 2;
				HashTable<K, T, KofT> newht;
				newht._table.resize(newcapacity, nullptr);
				//遍历旧的，链入新的
				for (int i = 0; i < _table.size(); i++)
				{
					Node* cur = _table[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = ht(kot(cur->_data)) % newcapacity;
						
						//头插
						cur->_next = newht._table[hashi];
						newht._table[hashi] = cur;

						cur = next;
					}
					_table[i] = nullptr;//这里必须要置空，否则不能释放
				}
				_table.swap(newht._table);
			}
			//头插
			Node* newnode = new Node(data);
			size_t hashi = ht(kot(data)) % _table.size();//找第几个桶

			newnode->_next = _table[hashi];
			_table[hashi] = newnode;

			_n++;

			return make_pair(iterator(newnode, this), true);
		}

		iterator Find(const K& key)
		{
			KofT kot;
			HashFunc ht;
			size_t hashi = ht(key) % _table.size();
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur, this);
				}
				cur = cur->_next;
			}
			return end();
		}

		bool Erase(const K& key)
		{
			KofT kot;
			HashFunc ht;
			size_t hashi = ht(key) % _table.size();
			Node* prev = nullptr;
			Node* cur = _table[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_table[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					_n--;
					return true;

				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return false;
		}

		void Print()
		{
			for (int i = 0; i < _table.size(); i++)
			{
				Node* cur = _table[i];
				printf("[%d] : ", i);
				while (cur)
				{
					cout << "(" << cur->_kv.first << ":" << cur->_kv.second << ")" << "->";
					cur = cur->_next;
				}
				cout << "NULL" << endl;
			}
			cout << endl;
		}

		size_t count(const K& key)
		{
			if (Find(key) != end())
				return 1;
			else
				return 0;
		}
		size_t size()
		{
			return _n;
		}

		bool empty()
		{
			return _n == 0;
		}

	private:
		vector<Node*> _table;
		size_t _n = 0;//存放有效数据的个数
	};
}

封装unordered_map和unordered_set

cpp 复制代码

template<class K, class V>
struct mapKofT
{
	K operator()(const pair<K, V>& kv)
	{
		return kv.first;
	}
};

template<class K, class V>
class Unordered_map
{
public:
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, mapKofT<K, V>>::iterator iterator;
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, mapKofT<K, V>>::const_iterator const_iterator;

	pair<iterator, bool> insert(const pair<K, V>& kv)
	{
		return _ht.insert(kv);
	}

	V& operator[](const K& key)
	{
		return _ht.insert(make_pair(key, V())).first->second;
	}
	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}

	iterator find(const K& key)
	{
		return _ht.Find(key);
	}

	size_t count(const K& key)
	{
		return _ht.count(key);
	}
	size_t size()
	{
		return _ht.size();
	}
	bool empty()
	{
		return _ht.empty();
	}

	iterator begin()
	{
		return _ht.begin();
	}

	iterator end()
	{
		return _ht.end();
	}
	const_iterator begin() const
	{
		return _ht.begin();
	}

	const_iterator end() const
	{
		return _ht.end();
	}

private:
	hash_bucket::HashTable<K, pair<const K, V>, mapKofT<K, V>> _ht;
};

cpp 复制代码

template<class K>
struct setKofT
{
	K operator()(const K& key)
	{
		return key;
	}
};

template<class K>
class Unordered_set
{
public:
	typedef typename hash_bucket::HashTable<K, K, setKofT<K>>::const_iterator iterator;
	typedef typename hash_bucket::HashTable<K, K, setKofT<K>>::const_iterator const_iterator;

	pair<iterator, bool> insert(const K& key)
	{
		return _ht.insert(key);
	}
	
	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}
	size_t size()
	{
		return _ht.size();
	}
	bool empty()
	{
		return _ht.empty();
	}
	size_t count(const K& key)
	{
		return _ht.count(key);
	}
	iterator find(const K& key)
	{
		return _ht.Find(key);
	}

	iterator begin() const
	{
		return _ht.begin();
	}
 	iterator end() const
	{
		return _ht.end();
	}

private:
	hash_bucket::HashTable<K, K, setKofT<K>> _ht;
};