c++(哈希以及封装)

1.哈希概念

哈希(hash)⼜称散列，是⼀种组织数据的方式。从译名来看，有散乱排列的意思。本质就是通过哈希

函数把关键字Key跟存储位置建立⼀个映射关系，查找时通过这个哈希函数计算出Key存储的位置，进

行快速查找。

1.1 直接定址法

当关键字的范围比较集中时，直接定址法就是非常简单高效的方法，比如⼀组关键字都在[0,99]之间，

那么我们开⼀个100个数的数组，每个关键字的值直接就是存储位置的下标。再比如⼀组关键字值都在

unordered_map要⽀持[]主要需要修改insert返回值⽀持，修改HashTable中的insert返回值为

pair<Iterator, bool> Insert(const T& data)

有了insert⽀持[]实现就很简单了，具体参考下⾯代码实现

3.4 bit::unordered_map和bit::unordered_set代码实现

map

c++ 复制代码

#pragma once
#include"Hash.h"
template<class K, class V, class Hash = HashFunc<K>>
class unordered_map
{
	struct MapKeyOfT
	{
		const K& operator()(const pair<K, V>& kv)
		{
			return kv.first;
		}
	};
public:
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;
	typedef typename hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;
	iterator begin()
	{
		return _ht.Begin();
	}
	iterator end()
	{
		return _ht.End();
	}
	const_iterator begin() const
	{
		return _ht.Begin();
	}
	const_iterator end() const
	{
		return _ht.End();
	}
	pair<iterator, bool> insert(const pair<K, V>& kv)
	{
		return _ht.Insert(kv);
	}
	V& operator[](const K& key)
	{
		pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
		return ret.first->second;
	}
	iterator Find(const K& key)
	{
		return _ht.Find(key);
	}
	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}
private:
	hash_bucket::HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
};

set

c++ 复制代码

#pragma once
#include"Hash.h"
template<class K, class Hash = HashFunc<K>>
class unordered_set
{
	struct SetKeyOfT
	{
		const K& operator()(const K& key)
		{
			return key;
		}
	};
public:
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::Iterator iterator;
	typedef typename hash_bucket::HashTable<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;
	iterator begin()
	{
		return _ht.Begin();
	}
	iterator end()
	{
		return _ht.End();
	}
	const_iterator begin() const
	{
		return _ht.Begin();
	}
	const_iterator end() const
	{
		return _ht.End();
	}
	pair<iterator, bool> insert(const K& key)
	{
		return _ht.Insert(key);
	}
	iterator Find(const K& key)
	{
		return _ht.Find(key);
	}
	bool Erase(const K& key)
	{
		return _ht.Erase(key);
	}
private:
	hash_bucket::HashTable<K, const K, SetKeyOfT, Hash> _ht;
};

hash

c++ 复制代码

#pragma once
#include<iostream>
#include<vector>
using namespace std;
enum State
{
	EXIST,
	EMPTY,
	DELETE
};
template<class K, class V>
struct HashData
{
	pair<K, V> _kv;
	State _state = EMPTY;
};


inline unsigned long __stl_next_prime(unsigned long n)
{
	// Note: assumes long is at least 32 bits.
	static const int __stl_num_primes = 28;
	static const unsigned long __stl_prime_list[__stl_num_primes] = {
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		1572869, 3145739, 6291469, 12582917, 25165843,
		50331653, 100663319, 201326611, 402653189, 805306457,
		1610612741, 3221225473, 4294967291
	};
	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	const unsigned long* pos = lower_bound(first, last, n);
	return pos == last ? *(last - 1) : *pos;
}


template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t hash = 0;
		for (auto ch : s)
		{
			hash += ch;
			hash *= 131;
		}

		return hash;
	}
};

namespace openaddress
{
	template < class K, class V ,class Hash = HashFunc<K>>
	class HashTable
	{
	public:
		HashTable()
			:_tables(__stl_next_prime(0)),
			_n(0)
		{}
		bool Insert(const pair<K, V>& kv)
		{
			Hash hash;
			if (Find(kv.first))
			{
				return false;
			}
			if (_n * 10 / _tables.size() >= 7)
			{
				
				HashTable<K, V, Hash> newht;
				newht._tables.resize(__stl_next_prime(_tables.size() + 1));

				for (auto& data : _tables)
				{
					// 旧表的数据映射到新表
					if (data._state == EXIST)
					{
						newht.Insert(data._kv);
					}
				}

				_tables.swap(newht._tables);
			}


			size_t hash0 = hash(kv.first) % _tables.size();
			size_t hashi = hash0;
			int i = 0;
			while (_tables[hashi]._state == EXIST)
			{
				hashi = (hashi + i) % _tables.size();
				i++;
			}
			_tables[hashi]._kv = kv;
			_tables[hashi]._state = EXIST;
			return true;
		}


		HashData<K, V>* Find(const K& key)
		{
			Hash hash;
			size_t hash0 = hash(key) % _tables.size();
			size_t hashi = hash0;
			size_t i = 1;
			while (_tables[hashi]._state != EMPTY)
			{
				if (_tables[hashi]._state == EXIST
					&& _tables[hashi]._kv.first == key)
				{
					return &_tables[hashi];
				}

				// 线性探测
				hashi = (hash0 + i) % _tables.size();
				++i;
			}

			return nullptr;
		}

		bool Erase(const K& key)
		{
			HashData<K, V>* ret = Find(key);
			if (ret)
			{
				ret->_state = DELETE;
				return true;
			}
			else
			{
				return false;
			}
		}
	private:
		vector<HashData<K, V>> _tables;
		size_t _n = 0; // 表中存储数据个数
	};
}

template<class K, class V>
struct HashNode
{
	HashNode(const pair<K, V>& kv)
		:_kv(kv)
		, _next(nullptr)
	{}
	HashData<K, V> node;
	HashNode<K, V>* next;
};

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;
		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{
		}
	};

	template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash>
	struct HTIterator
	{
		typedef HashNode<T> Node;
		typedef HTIterator<K, T, Ptr, Ref, KeyOfT, Hash> Self;
		Node* _node;
		const HashTable<K, T, KeyOfT, Hash>* _pht;
		HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht)
			:_node(node)
			, _pht(pht)
		{
		}
		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;
		}
		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}
		Self& operator++()
		{
			if (_node->_next)
			{
				// 当前桶还有节点
				_node = _node->_next;
			}
			else
			{
				// 当前桶⾛完了，找下⼀个不为空的桶
				KeyOfT kot;

				Hash hs;
				size_t hashi = hs(kot(_node->_data)) % _pht -> _tables.size();
				++hashi;
				while (hashi < _pht->_tables.size())
				{
					if (_pht->_tables[hashi])
					{
						break;
					}
					++hashi;
				}
				if (hashi == _pht->_tables.size())
				{
					_node = nullptr; // end()
				}
				else
				{
					_node = _pht->_tables[hashi];
				}
			}
			return *this;
		}
	};


	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		// 友元声明
		template<class K, class T, class Ptr, class Ref, class KeyOfT, class Hash>  friend struct HTIterator;
		typedef HashNode<T> Node;
	public:
		typedef HTIterator<K, T, T*, T&, KeyOfT, Hash> Iterator;
		typedef HTIterator<K, T, const T*, const T&, KeyOfT, Hash> ConstIterator;


		Iterator Begin()
		{
			if (_n == 0)
				return End();
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return Iterator(cur, this);
				}
			}
			return End();
		}
		Iterator End()
		{
			return Iterator(nullptr, this);
		}
		ConstIterator Begin() const
		{
			if (_n == 0)
				return End();
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return ConstIterator(cur, this);
				}
			}
			return End();
		}
		ConstIterator End() const
		{
			return ConstIterator(nullptr, this);
		}
		typedef HashNode<T> Node;
		inline unsigned long __stl_next_prime(unsigned long n)
		{
			static const int __stl_num_primes = 28;
			static const unsigned long __stl_prime_list[__stl_num_primes] =
			{
			53, 97, 193, 389, 769,
			1543, 3079, 6151, 12289, 24593,
			49157, 98317, 196613, 393241, 786433,
			1572869, 3145739, 6291469, 12582917, 25165843,
			50331653, 100663319, 201326611, 402653189, 805306457,
			1610612741, 3221225473, 4294967291
			};
			const unsigned long* first = __stl_prime_list;
			const unsigned long* last = __stl_prime_list + __stl_num_primes;
			const unsigned long* pos = lower_bound(first, last, n);
			return pos == last ? *(last - 1) : *pos;
		}
	public:
		HashTable()
		{
			_tables.resize(__stl_next_prime(_tables.size()), nullptr);
		}
		~HashTable()
		{
			// 依次把每个桶释放
			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}
				_tables[i] = nullptr;
			}
		}
		bool Insert(const T& data)
		{
			KeyOfT kot;
			if (Find(kot(data)))
				return false;
			Hash hs;
			size_t hashi = hs(kot(data)) % _tables.size();
			// 负载因⼦==1扩容
			if (_n == _tables.size())
			{
				vector<Node*> newtables(__stl_next_prime(_tables.size()),
					nullptr);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						// 旧表中结点，挪动新表重新映射的位置
						size_t hashi = hs(kot(cur->_data)) % newtables.size();
						// 头插到新表
						cur->_next = newtables[hashi];
						newtables[hashi] = cur;
						cur = next;
					}
					_tables[i] = nullptr;
				}
				_tables.swap(newtables);
			}
			// 头插
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;
			return true;
		}
		Node* Find(const K& key)
		{
			Hash hs;
			size_t hashi = hs(key) % _tables.size();

			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					return cur;
				}
				cur = cur->_next;
			}
			return nullptr;
		}
		bool Erase(const K& key)
		{
			Hash hs;
			size_t hashi = hs(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (cur->_kv.first == key)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					--_n;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}
			return false;
		}
	private:
		vector<Node*> _tables; // 指针数组
		size_t _n = 0; // 表中存储数据个数
	};
}