C++—vector：vecor使用及模拟实现

|----------------------------------------------------------------|-----------------|
| 函数名称 | 接口说明 |
| vector() | 无参构造 |
| vector(size_type n, const value_type& val = value_type()) | 构造并初始化n个val |
| vector(InputIterator ﬁrst, InputIterator last)) | 用迭代器进行初始化构造 |
| vector(initailizer_list<value_type> il) | 用初始化列表初始化构造 |
| vector (const vector& x) | 拷贝构造 |
| ~vector() | 销毁容器对象 |

1.1.2 函数的使用

cpp 复制代码

#include<vector>
#include<iostream>
using namespace std;

void test_vector1()
{
	vector<int> v1;
	vector<int> v2(10, 1);
	vector<int> v3(v2.begin() + 1, v2.end());
	
    string s1("xxxxxxxxx");
	vector<int> v4(s1.begin(), s1.end()); // 迭代器区间的数据和这个是能转换的(char能隐式类型转换为int)，就能用别的类型的迭代器
	vector<int> v5(v4); // 拷贝构造

	// C++11支持这样初始化（initializer_list），string也有这个，但是在string里面无用
	// vector<int> v6 = ({ 1,2,3,4,5,6 }); // 严格来说是这样写的，下面那样是隐式类型转换
	vector<int> v6 = { 1,2,3,4,5,6 };
	vector<int> v7 = { 2,4,6,8,10 };

	// vector没有支持流插入和流提取，输出数据需要我们自己写
	// cout << v2 << endl; // err


	// initializer_list底层是指针，有迭代器，所以支持范围for
	// initializer_list<int> li = { 1,2,3 };
	auto il = { 1,2,3,4,5,6,7,8 };
	for (auto i : il)
	{
		cout << i << " ";
	}
}

vector的3种遍历方式

cpp 复制代码

void Print(const vector<int>& v)
{
	// 1、范围for
	for (auto e : v)
	{
		cout << e << " ";
	}
	cout << endl;

	// 2、迭代器
	vector<int>::const_iterator it = v.begin();
	while (it != v.end())
	{
		cout << *it << " ";
		++it;
	}
	cout << endl;

	// 3、下标+[]
	for (size_t i = 0; i < v.size(); i++)
	{
		cout << v[i] << " ";
	}
	cout << endl;
}

1.2 操作容量的接口

1.2.1 函数名称及功能介绍

|-------------------------------------------------------|-----------------------|
| 函数名称 | 接口说明 |
| size() | 获取数据个数 |
| capacity() | 获取容量大小 |
| empty() | 判断是否为空 |
| resize(size_type n, const value_type& val = T()) | 改变vector的size |
| reserve(size_type n) | 改变vector的capacity |

1.2.2 函数的使用

cpp 复制代码

void test_vector2()
{
	vector<int> v1;
	// const int n = 1000000;
	const int n = 100;

	// 看扩容机制（1.5倍扩容）
	// string 刚开始是15，是存储在Buffer里面的，第一次扩容是到了堆上，直接扩了2倍，之后就稳定1.5倍
	v1.reserve(n); // 已知需要多大的空间，可以直接扩容；reserve不会缩容
	size_t old_capacty = v1.capacity();
	cout << v1.capacity() << endl;
	size_t begin = clock();
	for (size_t i = 0; i < n; i++)
	{
		v1.push_back(i);
		if (old_capacty != v1.capacity())
		{
			cout << v1.capacity() << endl;
			old_capacty = v1.capacity();
		}
	}
	size_t end = clock();
	cout << end - begin << endl; // 查看reserve对效率的提升，发现reserve提升并不是特别大，但能写还是要写上

	vector<int> v2;
	v2.resize(100, 1);
    cout << v2.size() << endl;
	cout << v2.capacity() << endl;
	Print(v2);
}

注意：

capacity的代码在vs和g++下分别运行会发现，vs下capacity是按1.5倍增长的，g++是按2 倍增长的。这个问题经常会考察，不要固化的认为，vector增容都是2倍，具体增长多少是根据具体的需求定义的。vs是PJ版本STL，g++是SGI版本STL。
reserve只负责开辟空间，如果确定知道需要用多少空间，reserve可以缓解vector增容的代价缺陷问题。
resize在开空间的同时还会进行初始化，影响size。

1.3 操作数据的接口

1.3.1 函数名称及功能介绍

|---------------------------------------------------------|-------------------------------|
| 函数名称 | 接口说明 |
| push_back(const value_type& val) | 尾插 |
| pop_back() | 尾删 |
| find | 查找（注意这个是算法模块实现，不是vector的成员接口） |
| insert(const_iterator position, const value_type& val) | 在position之前插入val |
| erase(const_iterator position) | 删除position位置的数据 |
| swap(vector& x) | 交换两个vector的数据空间 |
| operator[ ] | 像数组一样访问数据 |

1.3.2 函数的使用

cpp 复制代码

void test_vector3()
{
	vector<int> v1 = { 1,2,3,4,5 };
	v1.push_back(6);

	v1.insert(v1.begin(), 0); // 头插
	Print(v1);

	v1.insert(v1.begin() + 4, 0); // 指定位置插入
	Print(v1);

	v1.erase(v1.begin()); // 头删
	Print(v1);

	v1.erase(v1.begin() + 3); // 指定位置删除
	Print(v1);
}

pusn_back和emplace_back的区别

cpp 复制代码

struct AA
{
	int _a1 = 1;
	int _a2 = 1;

	AA(int a1, int a2)
		:_a1(a1)
		,_a2(a2)
	{}
};

void test_vector4()
{
	// 两个地方能用花括号{}：1、initializer_list ；2、多参数隐式类型转换
	AA aa1 = { 0,0 };
	vector<AA> v = { aa1,{1,1},{2,2},{ 3,3 } };

	auto it1 = v.begin();
	while (it1 != v.end())
	{
		cout << it1->_a1 << ':' << it1->_a2 << endl;
		++it1;
	}
	cout << endl;

	v.push_back(aa1); // push_back只能传AA对象
	v.emplace_back(aa1); //emplace_back可以传AA对象，也可以传构造AA对象的参数

	// 用法上的差异（单个值是一样的）推荐
	v.push_back({ 6,6 }); // push_back只能传AA对象
	v.emplace_back(8, 8); // emplace_back可以传AA对象，也可以传构造AA对象的参数(更高效)

	it1 = v.begin();
	while (it1 != v.end())
	{
		cout << it1->_a1 << ':' << it1->_a2 << endl;
		++it1;
	}
	cout << endl;
}

二. vector的模拟实现

注：类模板不能声明和定义分离在两个文件，如果声明和定义分离，也必须在同一个文件

2.1 构造/析构函数的实现

1)）无参构造

cpp 复制代码

class vector
{
public:
    vector()
    {}

private:
    // 这里给缺省值，可以直接用缺省值在初始化列表初始化，就不用再显示初始化了
	iterator _start = nullptr;
	iterator _finish = nullptr;
	iterator _end_of_storage = nullptr;
};

2）用初始化列表构造

cpp 复制代码

class vector
{
public:
	// 构造就有初始化列表，这个也会用缺省值走初始化列表，如果没有缺省值，就要显示的写，这样很麻烦
    // 只要是构造就会走初始化列表，这里给了缺省值没会用缺省值在初始化列表初始化
    // 如果不显示的写也没有缺省值就会是随机值，用的时候会出问题
	vector(initializer_list<T> il)
	{
		reserve(il.size());
		// initializer_list<T>::iterator it = il.begin();
		// 有迭代器就支持范围for
		for (const auto& e : il)
		{
			push_back(e);
		}
	}
private:
    // 这里给缺省值，可以直接用缺省值在初始化列表初始化，就不用再显示初始化了
	iterator _start = nullptr;
	iterator _finish = nullptr;
	iterator _end_of_storage = nullptr;
}

3）用n个val构造

cpp 复制代码

class vector
{
public:
    // n个val的构造
	vector(size_t n, const T& val = T())
	{
		reserve(n);
		for (size_t i = 0; i < n; i++)
		{
			push_back(val);
		}
	}

	// 重载上一个函数，防止两参数类型相同时，调用时调到下面的模板（暂时解决问题，不是最好的方案，之后会学习对模板进行限制）
	vector(int n, const T& val = T())
	{
		reserve(n);
		for (int i = 0; i < n; i++)
		{
			push_back(val);
		}
	}
private:
    // 这里给缺省值，可以直接用缺省值在初始化列表初始化，就不用再显示初始化了
	iterator _start = nullptr;
	iterator _finish = nullptr;
	iterator _end_of_storage = nullptr;
}

4）迭代器区间构造

cpp 复制代码

class vector
{
public:
	// 用迭代器区间构造
	// 函数模板，迭代器不一定是vector迭代器，也可以是其他容器的迭代器
	template <class InputIterator>
	vector(InputIterator first, InputIterator last)
	{
		// 所用迭代器都支持++、!=，但不一定支持倒着走、跳跃着走等
		// reserve(last - first); // err 一段连续的区间才能减，有的迭代器可能没法减，例如：链表、二叉树
		while (first != last)
		{
			push_back(*first);
			++first;
		}
	}
private:
    // 这里给缺省值，可以直接用缺省值在初始化列表初始化，就不用再显示初始化了
	iterator _start = nullptr;
	iterator _finish = nullptr;
	iterator _end_of_storage = nullptr;
}

5）拷贝构造

cpp 复制代码

// 传统写法
class vector
{
public:
    // 编译器默认生成的拷贝是浅拷贝，也叫值拷贝，把成员变量的每个字节依次拷贝到另一个对象，不符需求
	// 这样拷贝后，两个对象就会指向同一块空间，有两个问题：1、析构两次 2、一个对象修改会影响另一个对象
	// 如果所用数据都存放在这个对象本身上面没有问题；如果是存的指针，指向堆上的空间，那么就需要深拷贝
	// 拷贝构造（深拷贝） （传统写法）
	// v2(v1)
	vector(const vector<T>& v)
	{
		// 可以调用其他的接口，不用再自己手动开空间
		reserve(v.capacity()); 
		// 直接reserve，没有初始化,_star，_finish，_end_of_storage可能是随机值，reserve函数内部算的时候会有问题
		// 这时我们可以直接给缺省值
		// 拷贝构造也是一个特殊的构造，构造就有初始化列表，也要走初始化列表；所以这里v2会用缺省值在初始化列表初始化
		for (const auto& e : v) // 这里的v中数据类型是不确定的，可能是int，也可能是string等，如果是string等直接拷贝给对象e时，是深拷贝，代价比较大；要用引用&，不修改的话最好把const也加上
		{
			push_back(e);
		}
    }
private:
    // 这里给缺省值，可以直接用缺省值在初始化列表初始化，就不用再显示初始化了
	iterator _start = nullptr;
	iterator _finish = nullptr;
	iterator _end_of_storage = nullptr;
}

// 现代写法
class vector
{
	// 拷贝构造 （现代写法）（借助其他的对象来开空间，拷贝数据）
	vector(const vector<T>& v)
	{
		vector<T> tmp(v.begin(), v.end());
		swap(tmp);
	}
}

6）赋值运算符重载

cpp 复制代码

// 传统写法
class vector
{
public:
    // v0 = v1 = v3 如果是这种连续赋值的情况，v1 = v3要有一个返回值
    // v1 = v3
    // 赋值运算符重载 (传统写法) 虽然复用了，但是核心特点还是开空间，拷贝数据自己来实现
    vector<T>& operator=(const vector<T>& v)
    {
    	// if (*this != v) // err 首先是两个vector对象没有重载!=运算符，其次即使定义了，也应该比较地址而不是内容
	    if (this != &v)
	    {
		    // 这种方法是有点小缺陷的，因为如果v1的空间比v3大，那么是存在空间的浪费的
		    clear();
		    reserve(v.capacity()); // 不一定扩容
		    // 如果是直接delete，再创建新的空间的方法；如果v1和v3的空间差不多大，那么也会浪费；反正二者都有点小缺陷
		    for (const auto& e : v) 
		    {
			    push_back(e);
		    }
	    }

	    return *this; // this就是左边的那个对象；这样就可以连续赋值
    }
private:
    // 这里给缺省值，可以直接用缺省值在初始化列表初始化，就不用再显示初始化了
	iterator _start = nullptr;
	iterator _finish = nullptr;
	iterator _end_of_storage = nullptr;
}

// 现代写法
// 赋值运算符重载（现代写法）
// v1 = v3
class vactor
{
	vector<T>& operator=(vector<T> tmp) // tmp是v3的拷贝，不一定是一样大的空间，但一定有一块空间，和v3有一样的值
	{
		swap(tmp); // 定义在类域里面，类是一个整体，找swap函数时，是在类整体里面找，而不是只向上找；只要定义在类里面就可以(上下都行)
		return *this;
		// tmp出作用域就析构了，也就把v1原来的值销毁了
	}
}

7）析构函数

复制代码

class vector
{
	~vector()
	{
		if (_start)
		{
			delete[] _start;
			_start = _finish = _end_of_storage = nullptr;
		}
	}
}

2.2 操作容量的函数的实现

1） size,capacity,clear,empty

cpp 复制代码

class vector
{
    size_t capacity() const
	{
		return _end_of_storage - _start;
	}

	size_t size() const
	{
		return _finish - _start;
	}

    void clear() // 清数据不清空间
	{
		_finish = _start;
	}

	bool empty() const
	{
		return _start == _finish;
	}
}

2）resize,reserve

cpp 复制代码

class vector
{
    void reserve(size_t n)
	{
		if (n > capacity())
		{
			size_t sz = size();
			T* tmp = new T[n];
			if (_start)
			{
				// 库里面是用类型萃取识别类型，如果是内置类型就用memcpy
				// 这里用一个for循环的拷贝
				for (size_t i = 0; i < sz; i++)
				{
					// tmp[i] = _start[i]; 
					// 内置类型直接复制没有问题
					// 自定义类型会调用赋值运算符重载实现深拷贝
					// 缺点：tmp深拷贝_start，之后又将_start中数据释放掉，存在浪费，不如直接交换
					
					// 这种更好
					std::swap(tmp[i], _start[i]);
					// 内置类型，直接调用算法库的交换
					// 自定义类型，调用该类型（如：string）对应的交换，交换资源指向
					// 将_start中的string数据指向的空间和tmp空间中的string类型数据指向的空间('\0')交换
					// 然后将_start析构
				}

				// memcpy(tmp, _start, sizeof(T) * sz); // 内置类型可以，组需要深拷贝的自定义类型不行
				// 如果是int等内置类型，直接用memcpy拷贝没有问题
				// 但是如果是string，vector等需要深拷贝的类型，如果用memcpy拷贝之后
				// 就是浅拷贝，tmp数组中的每个string类型数据中的_str和_start数组中的每个string类型数据中的_str数值相同，指向同一块空间
				// 如果_start数组中的对象是自定义类型，delete是会对指向的数组中的对象调用析构函数，让后再将_start指向的整个空间释放掉
				// delete[] _start之后，_start数组中的每个string对象会调用对应的析构，然后调用vector<T>的析构，将_start指向的空间释放掉
				// 析构后，_start中的string类型数据就变成了随机值，tmp中的string类型数据中的_str还指向那一块空间，就成了野指针，也就是随机值
				delete[] _start;
			}
			_start = tmp;
			_finish = _start + sz;
			_end_of_storage = _start + n;
		}
	}

	// 缺省参数一般用的类型
	// (内置类型)字面量常量，例：10、字符、字符串、nullptr
	// 全局变量
	// 类的静态成员变量，例：npos
	// 匿名对象
	void resize(size_t n, T val=T()) // T()调用默认构造，创建一个匿名对象做缺省参数
	// T可能是内置类型，也可能是自定义类型
	// 当T是自定义类类型时，如果不传实参，就会用缺省值，调用默认构造，没有默认构造就会出问题
	// 当T是内置类型时，我们可以理解为，C++中对内置类型作了升级，也是有默认构造和析构(不干任何事)的，int会初始化为0，浮点数0.0，指针初始化为空指针
	// 
	{
		if (n < size())
		{
			// 删除数据
			_finish = _start + n;
		}
		else
		{
			reserve(n); // 不一定扩容，reserve会检查
			while (_finish < _start + n)
			{
				*_finish = val;
				++_finish;
			}
		}
	}
}

2.3 操作数据的函数的实现

1）push_back,pop_back

cpp 复制代码

class vector
{
    void push_back(const T& x)
	{
		if (_finish == _end_of_storage)
		{
			reserve(capacity() == 0 ? 4 : capacity() * 2);
		}
		*_finish = x;
		++_finish;
	}

	void pop_back()
	{
		assert(!empty());
		--_finish;
	}
}

2）insert,erase

cpp 复制代码

class vector
{
    iterator insert(iterator pos, const T& x) 
	// 防止迭代器失效的方法？没有。 1、引用传参不行 2、再在前面加const也不行
	// 1、pos不能传引用，因为如果实参是一个表达式，例：v.insert(v.begin(), 0); v.insert(it+3, 30);等
	// 这些表达式的结果是临时对象，临时对象具有常性，不能修改，所以不能引用传参
	// 2、可以在前面加const吗？不行。加const后，pos就不能修改了
	// 综上，这里是无解的，迭代器一定会失效，要注意！！！
	{
		assert(pos >= _start);
		assert(pos <= _finish);

		// 
		if (_finish == _end_of_storage)
		{
			size_t len = pos - _start;
			reserve(capacity() == 0 ? 4 : capacity() * 2); 
			// 扩容后会将原来的空间释放掉，迭代器pos指向的那个位置就无效了，即迭代器失效，就不能使用了
			// (注：不能理解为pos成了野指针，因为迭代器可能是指针实现的也可能不是)
			pos = _start + len; // 要更新pos，防止迭代器失效
		}
		 
		iterator end = _finish - 1;
		while (end >= pos)
		{
			*(end + 1) = *end;
			--end;
		}
		*pos = x;
		++_finish;

		return pos;
	}

	iterator erase(iterator pos)
	{
		assert(pos >= _start);
		assert(pos < _finish);
			
		iterator it = pos + 1;
		while (it < _finish)
		{
			*(it - 1) = *it;
			++it;
		}
		--_finish;

		return pos;
	}
}

如何解决insert、erase等操作后后迭代器失效的问题？

重置迭代器。

cpp 复制代码

int main()
{
    vector<int> v = { 1, 2, 3 };

    auto it1 = v.begin() + 2;
	v.insert(it1, 30);
	// insert以后，it1是否失效？
	// it1失效了，也就意味着，insert以后，it1失效了，it1就不能使用了
	// 因为传过去的实参it1，形参的改变不会影响实参，所以如果扩容，it还是指向已经被销毁的那块空间，即it迭代器失效；如果不扩容就不会失效
	// 综上，我们不知道它是否扩容，所以都认为迭代器失效

    auto it2 = v.end();
    v.erase(it2); 
    // erase以后，it2是否失效？
    // it是否失效呢？失效，不能访问，访问结果未定义
	// 如果it指向的是末尾的那个值，那么erase之后，it指向的就是随机值，即迭代器it失效
	// 综上，我们不知道它是否指向末尾，就认为迭代器失效了

    return 0;
}

我们用一个题目来解析：删除所有偶数

cpp 复制代码

int main()
{
    // 删除所有的偶数
	// g++检查不严，可以运行
	// 这样写不太好，因为erase后，it相当于往后移了1位
	// auto it = v.begin();
	//while (it != v.end())
	//{
	//	if (*it % 2 == 0)
	//	{
	//		v.erase(it);
	//	}
    //
	//	++it;
	//}

	auto it = v.begin();
	while (it != v.end())
	{
		if (*it % 2 == 0)
		{
			it = v.erase(it); // erase后的迭代器要重置后才能使用
		}
		else
		{
			++it;
		}
	}

	    for (auto e : v)
	    {
		    cout << e << " ";
	    }
		cout << endl;
    }
    return 0
}

3）operator[ ],swap,迭代器

cpp 复制代码

class vector
{
    T& operator[](size_t i)
	{
		assert(i < size());
		return _start[i];
	}

	const T& operator[](size_t i) const
	{
		assert(i < size());
		return _start[i];
	}

    void swap(vector<T>& v)
	{
		// 交换它们底层的空间即可
		std::swap(_start, v._start);
		std::swap(_finish, v._finish);
		std::swap(_end_of_storage, v._end_of_storage);
	}

    iterator begin()
	{
		return _start;
	}

	iterator end()
	{
		return _finish;
	}

	const_iterator begin() const
	{
		return _start;
	}

	const_iterator end() const
	{
		return _finish;
	}
}

三. 相关知识补充

3.1 内置类型的构造

cpp 复制代码

int main()
{
    // C++ 98就有
    int i = 0;
	int j = int(); // 默认构造，int默认置为0 
	int k = int(1);

	// C++ 11 一切都可以用花括号{}初始化
	int x = {}; // 默认构造
	int y = { 1 };
	int z{ 2 }; // 省略赋值符号

    return 0;
}

3.2 auto使用时的提高效率的方法

cpp 复制代码

template<class T>
class vector
{    
    // 拷贝构造
	vector(const vector<T>& v)
	{
		reserve(v.capacity()); 
		for (const auto& e : v) 
        // 这里的v中数据类型是不确定的，可能是int，也可能是string等，如果是string等直接拷贝给对象e时，是深拷贝，代价比较大；要用引用&，不修改的话最好把const也加上
		{
			push_back(e);
		}
	}
}

int main()
{
    bit::vector<int> v2(v1);
	// 用范围for的时候注意，已知数据类型的拷贝代价不大时（如内置类型int等）可以直接用
	// 但是如果不知道数据类型或数据类型拷贝代价比较大（如string等）要加上引用&
	for (auto e : v2) // 确定vector内部的数据是int，将int类型的数据拷贝给对象e是没有问题的
	{
		cout << e << " ";
	}
	cout << endl;

    return 0;
}

总结

如有不足或改进之处，欢迎大家在评论区积极讨论，后续我也会持续更新C++相关的知识。文章制作不易，如果文章对你有帮助，就点赞收藏关注支持一下作者吧，让我们一起努力，共同进步！