cpp容器------string模拟实现
前言
- 前一篇文章我对
string的常用接口以及数据类型进行了介绍,下面我将参考cppreference进行自定义的string实现 - 看完这篇文章,你可以对
string有一个新的认识,以及string的小对象(Small object optimization)优化技术
准备工作
- 并没有
string这个类,标准库里的实现string是std::basic_string< char>的别名,所以模拟实现时不需要模板 - 其实
string和vector的原理相似,接口也差不多,只是string多了一些方便字符串的操作,既然 原理相似,那么底层原理自然也相似,但是string比vector多了一个小对象优化 ,如果存储的字符的个数少于某个值,就直接使用数组存储,当大于这个值时才在堆上申请空间。这个技术可以使用联合体 来实现,因为联合体可以在一段内存存储不同类型的数据,所有成员共享同一段内存空间,任一时刻只能保存一个成员的值,其长度等于最长成员的长度,用于节省内存。我使用联合体写了一部分后,很多函数涉及到扩容的问题,扩容又得区分是否为小对象优化(也就是联合体的活跃成员),很容易写出一连串if-else,我尝试优化但是因为之前埋的坑太多了,最终我还是放弃了使用联合体,改为成员变量来存储。感兴趣的读者可以采用联合体实现 - 实现的过程还是需要多参考标准库的行为,这样才能更好的理解
string string并不是以'\0'结尾的,需要注意每次手动给存储字符串的变量末尾加上'\0'
cpp
union SOO
{
struct
{
char start[23];
} s;
struct
{
pointer start;
size_type capacity;
} l;
};
数据类型定义
- 我把一些经常用到的数据类型进行一下封装, 包括数据类型和迭代器等类型
cpp
using value_type = char;
using pointer = char *;
using const_pointer = const char *;
using reference = char &;
using const_reference = const char &;
using size_type = std::size_t;
using iterator = char *;
using const_iterator = const char *;
using reverse_iterator = std::reverse_iterator<iterator>;
using const_reverse_iterator = std::reverse_iterator<const_iterator>;
constexpr static size_type npos = size_type(-1);// 一些函数执行失败的返回值
- 成员变量定义, 当字符串长度小于22时, 直接使用字符数组small_进行存储, 超过22在使用动态申请的空间, 这样可以高效的利用空间
cpp
char *data_;
size_type size_;
size_type capacity_;
char small_[small_capacity + 1];
构造函数
- 和
vecctor一样,不过由于string实现比较早,构造函数比较混乱,这里只实现一些常用的构造函数,构造函数涉及一些拷贝和分配空间的操作,还需要判断当前是否为小对象,这里我们先定义一些工具函数, 方便扩容时移动元素
cpp
static constexpr size_type small_capacity = 22;
bool is_small() const noexcept
{
return data_ == small_;
}
bool points_into_storage(const char *ptr) const noexcept
{
return ptr >= data_ && ptr <= data_ + size_;
}
void release_heap() noexcept
{
if (!is_small())
delete[] data_;
}
void assign_fill(size_type count, char ch)
{
ensure_storage_for(count);
std::fill_n(data_, count, ch);
size_ = count;
data_[size_] = '\0';
}
void assign_buffer(const char *buffer, size_type len)
{
ensure_storage_for(len);
if (len != 0)
std::memmove(data_, buffer, len);
size_ = len;
data_[size_] = '\0';
}
void move_from(string &&other) noexcept
{
if (other.is_small())
{
data_ = small_;
size_ = other.size_;
capacity_ = small_capacity;
std::memcpy(small_, other.small_, other.size_ + 1);
other.size_ = 0;
other.small_[0] = '\0';
return;
}
data_ = other.data_;
size_ = other.size_;
capacity_ = other.capacity_;
other.data_ = other.small_;
other.size_ = 0;
other.capacity_ = small_capacity;
other.small_[0] = '\0';
}
void ensure_storage_for(size_type required)
{
if (required <= small_capacity)
{
if (!is_small())
{
delete[] data_;
data_ = small_;
}
capacity_ = small_capacity;
return;
}
if (is_small() || capacity_ < required)
{
release_heap();
data_ = new char[required + 1];
capacity_ = required;
}
}
void ensure_capacity(size_type required)
{
if (required <= capacity_)
return;
size_type new_capacity = capacity_;
while (new_capacity < required)
{
if (new_capacity > npos / 2)
{
new_capacity = required;
break;
}
new_capacity *= 2;
}
reallocate(new_capacity);
}
void reallocate(size_type new_capacity)
{
char *new_data = new char[new_capacity + 1];
if (size_ != 0)
std::memcpy(new_data, data_, size_);
new_data[size_] = '\0';
if (!is_small())
delete[] data_;
data_ = new_data;
capacity_ = new_capacity;
}
string &insert_buffer(size_type index, const char *buffer, size_type len)
{
ensure_capacity(size_ + len);
std::memmove(data_ + index + len,
data_ + index,
size_ - index + 1);
std::memcpy(data_ + index, buffer, len);
size_ += len;
return *this;
}
- 有了这些工具函数,可以减少很多重复代码的编写, 接下来我们开始实现string的构造函数, 可以借用我们前面实现的工具函数
cpp
string() noexcept : data_(small_), size_(0), capacity_(small_capacity), small_{}
{
small_[0] = '\0';
}
explicit string(size_type count, char ch) : string()
{
assign_fill(count, ch);
}
string(const string &other) : string()
{
assign_buffer(other.data_, other.size_);
}
string(const char *ch) : string()
{
if (ch == nullptr)
throw std::invalid_argument("null c string");
assign_buffer(ch, std::strlen(ch));
}
string(string &&other) noexcept : string()
{
move_from(std::move(other));
}
template <class InputIterator,
typename = std::enable_if_t<!std::is_integral_v<InputIterator>>>
string(InputIterator first, InputIterator last) : string()
{
for (; first != last; ++first)
push_back(static_cast<char>(*first));
}
~string()
{
release_heap();
}
一些重载函数
- 字符串需要支持
+,+=等操作,还需要重载<<运算符,方便输出。注意要把operator<<声明为友元函数,因为成员运算符重载要求左操作数是当前类对象,而这里左操作数是std::ostream. 注意检查自赋值
cpp
string &operator=(const string &other)
{
if (this != &other)
{
string tmp(other);
swap(tmp);
}
return *this;
}
string &operator=(const char *ch)
{
string tmp(ch);
swap(tmp);
return *this;
}
string &operator=(string &&other) noexcept
{
if (this != &other)
{
release_heap();
data_ = small_;
size_ = 0;
capacity_ = small_capacity;
small_[0] = '\0';
move_from(std::move(other));
}
return *this;
}
string &operator+=(const string &other)
{
if (this == &other)
{
string tmp(other);
return *this += tmp;
}
ensure_capacity(size_ + other.size_);
std::memcpy(data_ + size_, other.data_, other.size_);
size_ += other.size_;
data_[size_] = '\0';
return *this;
}
string &operator+=(const char *ch)
{
string tmp(ch);
return *this += tmp;
}
string &operator+=(char ch)
{
push_back(ch);
return *this;
}
string &operator+=(string &&other)
{
return *this += static_cast<const string &>(other);
}
bool operator==(const string &other) const noexcept
{
return size_ == other.size_ &&
std::memcmp(data_, other.data_, size_) == 0;
}
bool operator!=(const string &other) const noexcept
{
return !(*this == other);
}
friend std::ostream &operator<<(std::ostream &os, const string &str)
{
os.write(str.data_, static_cast<std::streamsize>(str.size_));
return os;
}
迭代器、容量等相关函数
- 缩容并不是强制要求的,具体看编译器实现
cpp
bool empty() const noexcept { return size_ == 0; }
size_type size() const noexcept { return size_; }
size_type length() const noexcept { return size_; }
size_type capacity() const noexcept { return capacity_; }
const char *c_str() const noexcept { return data_; }
const char *data() const noexcept { return data_; }
char *data() noexcept { return data_; }
iterator begin() noexcept { return data_; }
iterator end() noexcept { return data_ + size_; }
const_iterator begin() const noexcept { return data_; }
const_iterator end() const noexcept { return data_ + size_; }
const_iterator cbegin() const noexcept { return data_; }
const_iterator cend() const noexcept { return data_ + size_; }
reverse_iterator rbegin() noexcept { return reverse_iterator(end()); }
reverse_iterator rend() noexcept { return reverse_iterator(begin()); }
const_reverse_iterator rbegin() const noexcept { return const_reverse_iterator(end()); }
const_reverse_iterator rend() const noexcept { return const_reverse_iterator(begin()); }
const_reverse_iterator crbegin() const noexcept { return const_reverse_iterator(end()); }
const_reverse_iterator crend() const noexcept { return const_reverse_iterator(begin()); }
char &front() noexcept { return data_[0]; }
const char &front() const noexcept { return data_[0]; }
char &back() noexcept { return data_[size_ - 1]; }
const char &back() const noexcept { return data_[size_ - 1]; }
char &operator[](size_type index) noexcept { return data_[index]; }
const char &operator[](size_type index) const noexcept { return data_[index]; }
void swap(string &other) noexcept
{
if (this == &other)
return;
if (!is_small() && !other.is_small())
{
std::swap(data_, other.data_);
std::swap(size_, other.size_);
std::swap(capacity_, other.capacity_);
return;
}
string tmp(std::move(other));
other = std::move(*this);
*this = std::move(tmp);
}
void shrink_to_fit()
{
if (size_ <= small_capacity)
{
if (!is_small())
{
char *old = data_;
std::memcpy(small_, data_, size_ + 1);
data_ = small_;
capacity_ = small_capacity;
delete[] old;
}
return;
}
if (size_ < capacity_)
reallocate(size_);
}
增删相关的函数
- 大部分的逻辑还是和容量有关, 什么时候扩容, 是否缩容等问题, 可以直接使用前面定义的工具函数, 注意'\0'的位置要移动, 否则可能得到的不是我们想要的结果
cpp
void push_back(char ch)
{
ensure_capacity(size_ + 1);
data_[size_++] = ch;
data_[size_] = '\0';
}
void clear() noexcept
{
size_ = 0;
data_[0] = '\0';
}
char &at(size_type index)
{
if (index >= size_)
throw std::out_of_range("string::at");
return data_[index];
}
const char &at(size_type index) const
{
if (index >= size_)
throw std::out_of_range("string::at");
return data_[index];
}
void reserve(size_type new_capa = 0)
{
if (new_capa <= capacity_)
return;
reallocate(new_capa);
}
void resize(size_type count)
{
resize(count, char());
}
void resize(size_type count, char ch)
{
if (count <= size_)
{
size_ = count;
data_[size_] = '\0';
return;
}
ensure_capacity(count);
std::fill_n(data_ + size_, count - size_, ch);
size_ = count;
data_[size_] = '\0';
}
string &insert(size_type index, size_type count, char ch)
{
if (index > size_)
throw std::out_of_range("string::insert");
if (count == 0)
return *this;
ensure_capacity(size_ + count);
std::memmove(data_ + index + count,
data_ + index,
size_ - index + 1);
std::fill_n(data_ + index, count, ch);
size_ += count;
return *this;
}
string &insert(size_type index, const char *ch)
{
if (index > size_)
throw std::out_of_range("string::insert");
if (ch == nullptr)
throw std::invalid_argument("null c string");
size_type len = std::strlen(ch);
if (len == 0)
return *this;
if (points_into_storage(ch))
{
string tmp(ch);
return insert_buffer(index, tmp.data_, tmp.size_);
}
return insert_buffer(index, ch, len);
}
iterator erase(iterator position)
{
if (position < begin() || position >= end())
return end();
size_type index = static_cast<size_type>(position - begin());
std::memmove(data_ + index,
data_ + index + 1,
size_ - index);
--size_;
return begin() + index;
}
void pop_back()
{
if (empty())
throw std::logic_error("container is empty");
--size_;
data_[size_] = '\0';
}
void append(size_type count, char ch)
{
if (count == 0)
return;
ensure_capacity(size_ + count);
std::fill_n(data_ + size_, count, ch);
size_ += count;
data_[size_] = '\0';
}
查找,分割子串
- 这里只实现了简单的查找, 其余重载函数读者可以自行实现
cpp
string substr(size_type index, size_type len = npos) const
{
if (index > size_)
throw std::out_of_range("string::substr");
size_type count = std::min(len, size_ - index);
string result;
result.assign_buffer(data_ + index, count);
return result;
}
int compare(const string &other) const noexcept
{
const size_type count = std::min(size_, other.size_);
const int cmp = std::memcmp(data_, other.data_, count);
if (cmp < 0)
return -1;
if (cmp > 0)
return 1;
if (size_ < other.size_)
return -1;
if (size_ > other.size_)
return 1;
return 0;
}
size_type find(char ch, size_type pos = 0) const noexcept
{
if (pos >= size_)
return npos;
for (size_type i = pos; i < size_; ++i)
{
if (data_[i] == ch)
return i;
}
return npos;
}
size_type find(const char *ch, size_type pos = 0) const noexcept
{
if (ch == nullptr)
return npos;
const size_type len = std::strlen(ch);
if (len == 0)
return pos <= size_ ? pos : npos;
if (pos > size_ || len > size_ - pos)
return npos;
for (size_type i = pos; i + len <= size_; ++i)
{
if (std::memcmp(data_ + i, ch, len) == 0)
return i;
}
return npos;
}
测试函数
cpp
void test_constructors_and_assignments() {
// 默认构造
mystring::string s1;
assert(s1.empty());
assert(s1.size() == 0);
assert(s1.capacity() >= 0);
assert(s1.c_str() != nullptr);
assert(s1.c_str()[0] == '\0');
// 填充构造 (count, ch)
mystring::string s2(5, 'a');
assert(s2.size() == 5);
assert(s2 == "aaaaa");
// 从 C 字符串构造
mystring::string s3("hello");
assert(s3.size() == 5);
assert(s3 == "hello");
// 拷贝构造
mystring::string s4(s3);
assert(s4 == s3);
// 移动构造
mystring::string s5(std::move(s4));
assert(s5 == "hello");
assert(s4.empty()); // 被移动后为空
// 迭代器区间构造
const char* arr = "world";
mystring::string s6(arr, arr + 5);
assert(s6 == "world");
// 赋值运算符
mystring::string s7;
s7 = s5;
assert(s7 == "hello");
s7 = "test";
assert(s7 == "test");
// 移动赋值
mystring::string s8;
s8 = std::move(s7);
assert(s8 == "test");
assert(s7.empty());
std::cout << "[PASS] constructors & assignments\n";
}
void test_element_access() {
mystring::string s("abcdef");
// operator[]
assert(s[0] == 'a');
assert(s[5] == 'f');
s[0] = 'z';
assert(s[0] == 'z');
// at() 正常访问
assert(s.at(1) == 'b');
s.at(2) = 'x';
assert(s.at(2) == 'x');
// at() 越界抛异常
bool caught = false;
try {
s.at(100);
} catch (const std::out_of_range&) {
caught = true;
}
assert(caught);
// front / back
assert(s.front() == 'z');
assert(s.back() == 'f');
s.front() = 'A';
s.back() = 'Z';
assert(s == "AbcxeZ");
// const 版本
const mystring::string cs("const");
assert(cs[0] == 'c');
assert(cs.at(1) == 'o');
assert(cs.front() == 'c');
assert(cs.back() == 't');
std::cout << "[PASS] element access\n";
}
void test_iterators() {
mystring::string s("12345");
// 正向迭代器
mystring::string::iterator it = s.begin();
assert(*it == '1');
*it = '9';
assert(s[0] == '9');
// 范围 for 支持
for (auto& ch : s) ch += 1;
assert(s == ":23456");
// const 迭代器
const mystring::string cs("hello");
mystring::string::const_iterator cit = cs.cbegin();
assert(*cit == 'h');
// *cit = 'x'; // 编译错误,符合预期
// 反向迭代器
mystring::string rs("abc");
mystring::string::reverse_iterator rit = rs.rbegin();
assert(*rit == 'c');
*rit = 'z';
assert(rs == "abz");
// 常量反向迭代器
const mystring::string crs("xyz");
auto crit = crs.crbegin();
assert(*crit == 'z');
std::cout << "[PASS] iterators\n";
}
void test_capacity() {
mystring::string s;
assert(s.empty());
assert(s.size() == 0);
assert(s.length() == 0);
s = "small";
assert(!s.empty());
assert(s.size() == 5);
assert(s.capacity() >= 5);
// reserve
s.reserve(100);
assert(s.capacity() >= 100);
assert(s.size() == 5);
s.reserve(4); // 小于当前容量,无效果
assert(s.capacity() >= 100);
// shrink_to_fit
s.shrink_to_fit();
assert(s.capacity() == s.size() || s.capacity() == mystring::string::small_capacity);
// 如果 size <= small_capacity,会切回 small 缓冲区
if (s.size() <= mystring::string::small_capacity) {
// 此时 data() 应该指向 small_
assert(s.c_str() != nullptr);
}
// resize 扩大
s.resize(10, 'x');
assert(s.size() == 10);
assert(s[5] == 'x');
assert(s[9] == 'x');
// resize 缩小
s.resize(3);
assert(s.size() == 3);
assert(s == "sma");
// clear
s.clear();
assert(s.empty());
assert(s.size() == 0);
assert(s.c_str()[0] == '\0');
std::cout << "[PASS] capacity\n";
}
void test_modifiers() {
// push_back / pop_back
mystring::string s;
s.push_back('a');
s.push_back('b');
assert(s == "ab");
s.pop_back();
assert(s == "a");
s.pop_back();
assert(s.empty());
// append (count, ch)
s.append(3, 'z');
assert(s == "zzz");
// insert (index, count, ch)
s.insert(1, 2, 'x');
assert(s == "zxxzz");
s.insert(0, 1, 'h');
assert(s == "hzxxzz");
// insert (index, const char*)
s.insert(2, "YYY");
assert(s == "hzYYYxxzz");
// 自插入测试(指针指向自身内部)
mystring::string t("abcde");
t.insert(2, t.c_str() + 1, 2); // 通过 insert(const char*) 内部会处理自插入
// 预期 "ab" + "bc" + "cde" = "abbccde"
assert(t == "abbccde");
// erase (iterator)
mystring::string u("12345");
auto it = u.erase(u.begin() + 2); // 删除 '3'
assert(u == "1245");
assert(*it == '4'); // 指向被删除元素的下一个
// 删除末尾元素
u.erase(u.end() - 1);
assert(u == "124");
// operator+=
mystring::string v("abc");
v += "def";
assert(v == "abcdef");
v += 'g';
assert(v == "abcdefg");
mystring::string w("XYZ");
v += w;
assert(v == "abcdefgXYZ");
v += mystring::string("!!");
assert(v == "abcdefgXYZ!!");
std::cout << "[PASS] modifiers\n";
}
void test_string_operations() {
// c_str / data
mystring::string s("hello");
assert(std::strcmp(s.c_str(), "hello") == 0);
assert(s.data()[0] == 'h');
char* mutable_data = s.data();
mutable_data[0] = 'H';
assert(s == "Hello");
// substr
mystring::string sub = s.substr(1, 3);
assert(sub == "ell");
sub = s.substr(2);
assert(sub == "llo");
// 越界
bool caught = false;
try {
s.substr(10);
} catch (const std::out_of_range&) {
caught = true;
}
assert(caught);
// npos 参数
sub = s.substr(1, mystring::string::npos);
assert(sub == "ello");
// compare
mystring::string a("abc");
mystring::string b("abd");
assert(a.compare(a) == 0);
assert(a.compare(b) < 0);
assert(b.compare(a) > 0);
assert(a.compare("abc") == 0);
assert(a.compare("ab") > 0);
assert(a.compare("abcd") < 0);
// find (char)
mystring::string f("hello world");
assert(f.find('o') == 4);
assert(f.find('o', 5) == 7);
assert(f.find('z') == mystring::string::npos);
// find (const char*)
assert(f.find("world") == 6);
assert(f.find("orl") == 7);
assert(f.find("xyz") == mystring::string::npos);
assert(f.find("", 100) == mystring::string::npos); // 空串,pos超出size
assert(f.find("", 5) == 5); // 空串匹配位置 pos
// 异常测试:构造 nullptr
caught = false;
try {
mystring::string(nullptr);
} catch (const std::invalid_argument&) {
caught = true;
}
assert(caught);
std::cout << "[PASS] string operations\n";
}
void test_small_string_optimization() {
// 小字符串 (<=22) 应该使用 small_ 缓冲区
mystring::string small("1234567890123456789012"); // 22 字符
assert(small.size() == 22);
assert(small.capacity() == mystring::string::small_capacity);
const void* small_ptr = small.c_str();
// 拷贝构造也应保持小缓冲区
mystring::string copy(small);
assert(copy.c_str() != small_ptr); // 深拷贝,但两者都是 small,地址不同
assert(copy.capacity() == mystring::string::small_capacity);
// 扩大到超出 small_capacity
small.push_back('X'); // 变为 23,应转移到堆
assert(small.size() == 23);
assert(small.capacity() >= 23);
// shrink_to_fit 应切回 small
small.shrink_to_fit();
assert(small.size() == 23);
assert(small.capacity() == 23); // 大于 small_capacity,所以还是在堆
// 缩小到 <=22 后 shrink_to_fit 应切回 small
small.resize(20);
small.shrink_to_fit();
assert(small.size() == 20);
assert(small.capacity() == mystring::string::small_capacity);
assert(small.c_str() != nullptr); // 指向 small_
std::cout << "[PASS] small string optimization\n";
}
void test_swap() {
mystring::string a("hello");
mystring::string b("world");
a.swap(b);
assert(a == "world");
assert(b == "hello");
// 小字符串与小字符串交换
mystring::string small1("hi");
mystring::string small2("bye");
small1.swap(small2);
assert(small1 == "bye");
assert(small2 == "hi");
// 大字符串与小字符串交换
mystring::string large(100, 'x');
mystring::string small("tiny");
large.swap(small);
assert(large == "tiny");
assert(small.size() == 100);
assert(small[0] == 'x');
// 自交换
a.swap(a);
assert(a == "world");
std::cout << "[PASS] swap\n";
}
void test_comparison_and_output() {
mystring::string a("abc");
mystring::string b("abc");
mystring::string c("abd");
assert(a == b);
assert(!(a != b));
assert(a != c);
// 输出流
std::ostringstream oss;
oss << a;
assert(oss.str() == "abc");
std::cout << "[PASS] comparison & output\n";
}
int main() {
test_constructors_and_assignments();
test_element_access();
test_iterators();
test_capacity();
test_modifiers();
test_string_operations();
test_small_string_optimization();
test_swap();
test_comparison_and_output();
std::cout << "\nAll tests passed!\n";
return 0;
}
这篇文章就到这里了,如果有写的不对的地方还请批评指正,觉得写的还不错的话欢迎点赞关注