string的模拟实现

文章展示了如何在C++中模拟实现`string`类,包括基本的构造函数、拷贝构造、赋值运算符重载、迭代器、内存管理(如`reserve`和`resize`)以及字符串操作如插入、删除、查找等功能。同时,文中还对比了传统和现代风格的实现差异,并提供了测试代码示例。
摘要由CSDN通过智能技术生成
string模拟在string.h中,test.cpp中是对模拟的测试
string.h中
#pragma once
#include<assert.h>
namespace tradition //string的传统写法模拟,特点为老实,该开空间的地方开空间,该拷贝数据就自己拷贝数据
{
        class string
        {
        public:
               typedef char* iterator;
               typedef const char* const_iterator; //迭代器还有反向迭代器暂时不实现
               iterator begin()
               {
                       return _str;
                        //在string中迭代器的底层实现就是原生指针,其他数据结构类型的迭代器的底层实现就不一定是原生指针了
               }
               iterator end()
               {
                       return _str + _size;
               }
               const_iterator begin() const //要保证可以通过迭代器实现修改的作用,所以需要两种迭代器
               {
                       return _str;
               }
               const_iterator end() const
               {
                       return _str + _size;
               }
                /*string() //给构造一个缺省值,解决不初始化只有一个空字符串的情况
                       :_str(new char[1])
               {
                       _str[0] = 0;
               }*/
               string(const char* c = "") //缺省值不能是'\0','\0'是char类型的,相当于把ascll值赋给c,即nullptr赋值给c
                       :_size(strlen(c))
                       ,_capacity(_size) //capacity是实际存储有效字符的空间,所以没有算上\0
               {
                       _str = new char[strlen(c) + 1]; //+1是包含\0的空间
                        //开空间最好放在下面,因为初始化列表实际初始化顺序是按照类的成员顺序,在这种情况下,
                       //如果初始化列表中的初始化写在_capacity下,并写成_str(new char[_size + 1])就会出问题
                       strcpy(_str, c);
               }
               ~string()
               {
                       if (_str) //存在_str指向nullstr的情况
                       {
                              delete[] _str;
                              _str = nullptr;
                              _size = _capacity = 0;
                       }
               }
               string(const string& s)
                        //拷贝构造函数,默认为浅拷贝,浅拷贝存在两个问题:1.析构两次 2.对一个string修改会影响另外一个string
                       //所以要自己写成深拷贝
                       :_size(strlen(s._str))
                       , _capacity(_size)
               {
                       _str = new char[_size + 1];
                       strcpy(_str, s._str);
               }
                //string& operator=(string& s)
               //      //赋值运算符重载函数,需要考虑新的string的容量要和拷贝的string容量相等。
               //{
               //      if (this != &s)//如果不考虑if中的情况,在自己给自己赋值的情况下,就会丢失数据。
               //      {
               //             delete[] _str;
               //             _size = strlen(s._str);
               //             _capacity = _size;
               //             _str = new char[strlen(s._str) + 1];
               //             strcpy(_str, s._str);
               //      }
               //      return *this;
               //}
               string& operator=(string& s)
                        //上面的赋值运算符重载函数还有瑕疵,C语言malloc失败后返回NULL,需要检查;
                       // new不需要检查,new失败了会抛异常,上个写法中,就算new开辟失败,也还是会delete _str
               {
                       if (this != &s)
                       {
                              char* tmp = new char[s._capacity + 1];
                              strcpy(tmp, s._str);
                              delete[] _str;
                              _str = tmp;
                              _size = s._size;
                              _capacity = s._capacity;
                       }
                       return *this;
               }
               char& operator[](size_t pos) //引用返回,允许修改
               {
                       assert(pos <= strlen(_str));
                       return _str[pos];
               }
               const char& operator[](size_t pos) const
                        //[]的函数重载不只是应用于string类,还有const string类,对于const string类,就需要新的[]重载函数。
                       //因为要保证通过[]重载函数实现string类修改的功能
               {
                       assert(pos <= strlen(_str));
                       return _str[pos];
               }
               const char* c_str() const
               {
                       return _str;
               }
               size_t size() const
               {
                       return strlen(_str);
               }
               size_t capacity() const
               {
                       return _capacity;
               }
               void reserve(size_t n) //reserve不具备缩容的功能
               {
                       if (n > _capacity)
                       {
                              char* tmp = new char[n + 1];
                              strcpy(tmp, _str);
                              delete[] _str;
                              _str = tmp;
                              _capacity = n;
                       }
               }
               void resize(size_t n ,char ch = '\0')
               {
                       if (n > _capacity)
                       {
                              reserve(n);
                              if (ch != 0)
                              {
                                      memset(_str + strlen(_str), ch, n - strlen(_str));
                                      _size = n;
                                      _str[n] = '\0';
                              }
                       }
                       else
                       {
                              if (n > _size && n < _capacity)
                              {
                                      memset(_str + strlen(_str), ch, n - strlen(_str));
                              }
                              _str[n] = '\0';
                              _size = n;
                       }
               }
               string& operator+=(const char ch)
                        //尾插\0,字符串长度不变,_size变化,如果使用范围for,可以打印字符串中的每一个字符,包括\0
                       //因为范围for是迭代器控制的,
                       //不过\0是不可见字符,一些编译器不会显现,vs2013中\0如果出现是以空格的形式,vs2019则是完全不显现
               {
                       push_back(ch);
                        //在成员函数中调用成员函数,如果不指定对象,则默认被调用的成员函数this指针与调用的成员函数相同
                       return *this;
               }
               string& operator+=(const char* str)
               {
                       append(str);
                       return *this;
               }
               string& push_back(const char ch)
               {
                        //if (_size == _capacity)
                       //{
                       //      //有了reserve就可以直接复用了
                       //      reserve(_capacity == 0 ? 4 : _capacity * 2);
                       //      /*char* tmp = new char[_capacity * 2 + 1];
                       //      strcpy(tmp, _str);
                       //      delete[] _str;
                       //      _str = tmp;
                       //      _capacity = 2 * _capacity;*/
                       //}
                       //_str[_size] = ch;
                       //_str[_size + 1] = '\0';
                       //_size++;
                       //return *this;
                       insert(_size, ch); //直接复用insert
                       return *this;
               }
               string& append(const char* str)
               {
                        //if (_size + strlen(str) > _capacity)
                       //{
                       //      reserve(_size + strlen(str));
                       //      /*char* tmp = new char[_size + strlen(str) + 1];
                       //      strcpy(tmp, _str);
                       //      delete[] _str;
                       //      _str = tmp;
                       //      _capacity = _capacity + strlen(str);*/
                       //}
                       //strcpy(_str + _size, str);
                       //_size = _size + strlen(str);//_size + strlen(str)多次使用,可以用一个变量储存
                       //return *this;
                       insert(_size, str);
                       return *this;
               }
               string& insert(size_t pos, const char ch)
               {
                       assert(pos <= _size);
                       if (_size == _capacity)
                       {
                              reserve(_capacity == 0 ? 4 : _capacity * 2);
                       }
                       for (size_t i = _size + 1; i > pos; i--)
                       {
                              _str[i] = _str[i - 1]; //解决size_t边界问题的最佳方式,让首插到0结束,而不是越过0结束
                       }
                       _str[pos] = ch;
                       _size++;
                       return *this;
               }
               string& insert(size_t pos, const char* str)
               {
                       assert(pos <= _size);
                       size_t len = strlen(str);
                       if (len == 0)
                              return *this;
                       if (len + _size > _capacity)
                       {
                              reserve(len + _size);
                       }
                       for (size_t i = _size + len; i >= pos + len; i--)
                               //如果前面不考虑len为0的情况,这里会因为整形提升在pos也为0时陷入死循环
                       {
                              _str[i] = _str[i - len];
                       }
                       for (size_t i = pos; *str != '\0'; i++, str++)
                       {
                              _str[i] = *str;
                       }
                       _size += len;
                       return *this;
               }
               string& erase(size_t pos = 0, size_t len = npos)
               {
                       assert(pos <= _size);
                       if (pos + len >= _size || len == npos)
                       {
                              _str[pos] = '\0';
                              _size = pos;
                       }
                       else
                       {
                              size_t begin = pos + len;
                              while (_str[pos++] = _str[begin++])
                              {}
                              _size = _size - len;
                       }
                       return *this;
               }
               size_t find(const char ch, size_t pos = 0)
               {
                       size_t i = pos;
                       while (_str[i])
                       {
                              if (_str[i] == ch)
                              {
                                      return i;
                              }
                              i++;
                       }
                       return -1;
               }
               size_t find(const char* str, size_t pos = 0)
               {
                       size_t i = pos;
                       const char* p = strstr(_str + pos, str);
                        //strstr是暴力查找,有一种优化算法叫kmp算法,但不在C语言的库中,感兴趣的可以了解一下。
                       if(p != nullptr)
                              return p - _str;
                       else
                              return -1;
               }
               void clear()
               {
                       _str[0] = '\0';
                       _size = 0;
               }
        private:
               char* _str;
               size_t _size; //_size是存储字符的个数
               size_t _capacity; //_capacity是实际存储有效字符的空间
               static size_t npos;
                //static修饰的变量理论上是不允许给缺省值的,因为只是声明,但是存在const static size_t npos = -1;
               //这种情况可以理解为语法上的bug,但是这么写是合法的。
        };
        size_t string::npos = -1;
        ostream& operator<<(ostream& out, const string& s) //必须设为全局函数,out才能为第一个参数
        {
                //out << s.c_str();存在一点问题,比如在字符串中间插入\0,那么一部分内容不会打印,所以一般使用下面内容。
               for (auto ch : s)
               {
                       cout << ch;
               }
               return out;
        }
         /*istream& operator>>(istream& in, string& s)
        {
               char ch = 0;
               in >> ch;
               while (ch != ' ' && ch != '\n')//这么写是有问题的,in也就是cin无法识别空格符和换行符,循环永远不会停止
               {
                       s += ch;
                       in >> ch;
               }
               return in;
        }*/
         //istream& operator>>(istream& in, string& s)
        //{
        //      char ch = 0;
        //      ch = in.get();//get函数是istream类中的函数,作用是从缓冲区里一个一个的拿出字符
        //      while (ch != ' ' && ch != '\n')
        //      {
        //             s += ch;
        //             ch = in.get();
        //             //有了get函数,就可以从缓冲区里识别空格符和换行符,比如输入hello world,hello被存入了string s中。
        //             //world则还在缓冲区里。当需要输入字符时,会直接从缓冲区中拿出来(不包括空格)。
        //             //一次输入数据结束跳过一个空格,如果缓冲区数据中有两个连续空格,
        //             //就会出现要输入字符的字符串自动为空的情况,如果换一种空格不停止的停止条件,空格就是可以显示的。
        //      }
        //      return in;
        //}
        istream& operator>>(istream& in, string& s) //对上一种写法的优化
        {
               s.clear(); //标准库中cin >> string是会清除string中原有数据的
               char ch = 0;
               ch = in.get();
               char buff[128] = { '\0' };
               size_t i = 0;
               while (ch != ' ' && ch != '\n')
               {
                        /*s += ch;*/ //频繁的+=会不停的扩容
                       buff[i++] = ch;
                       if (i == 127)
                             //127是buff数组的最后一个char空间,当完成下标126空间的赋值后,buff空间就只剩下最后一个char空间。
                            //i因为后置++的原因已经变成127,此时buff空间满了。所以在i == 127时拷贝数据。
                       {
                              s += buff;
                              memset(buff, '\0', 128);
                              i = 0;
                       }
                       ch = in.get();
               }
               s += buff;
               return in;
        }
        bool operator< (const string& s1, const string& s2)
        {
               return strcmp(s1.c_str(), s2.c_str()) < 0;
        }
        bool operator== (const string& s1, const string& s2)
        {
               return strcmp(s1.c_str(), s2.c_str()) == 0;
        }
        bool operator!= (const string& s1, const string& s2)
        {
               return !(s1 == s2);
        }
        bool operator<= (const string& s1, const string& s2)
        {
               return s1 < s2 || s1 == s2;
        }
        bool operator>= (const string& s1, const string& s2)
        {
               return !(s1 < s2);
        }
        bool operator> (const string& s1, const string& s2)
        {
               return !(s1 <= s2);
        }
}
namespace modern //string现代写法模拟——资本家版本
{
        class string
        {
        public:
               string(const char* c = "")
                       :_size(strlen(c))
                       , _capacity(_size)
               {
                       _str = new char[strlen(c) + 1];
                       strcpy(_str, c);
               }
               ~string()
               {
                       if (_str)
                       {
                              delete[] _str;
                              _str = nullptr;
                              _size = _capacity = 0;
                       }
               }
               void swap(string& s) //string& s不能用const修饰,因为std::string没有匹配参数
               {
                       std::swap(_str, s._str); //不指定std::swap,编译器会认为要调用modern::string::swap(类中优先使用成员函数)
                       std::swap(_size, s._size);
                       std::swap(_capacity, s._capacity);
               }
               string(const string& s) //这是拷贝构造的现代写法——几乎所以需要深拷贝的数据结构都可以这么写
                       :_str(nullptr) //拷贝构造:string s2(s1),其中s1是已经开好的,s2也开好了,但没有初始化,_str指向未知空间
                       ,_size(0)       //也就是说,在刚进入函数时,_str指向的空间是非法的。所以要进行初始化,因为后面要与tmp交换
                       ,_capacity(0) //tmp是临时变量,拷贝构造结束后要析构,此时tmp._str指向的是原_str指向的空间
               {
                       string tmp(s._str); //调用构造函数
                       swap(tmp);
                        //这里的交换不能用swap(*this,tmp)来解决,库中的swap是创建一个模板,如果交换的是自定义类,
                       //创造临时变量时就又要调用拷贝构造函数,形成逻辑死循环。
               }
                /*string& operator=(const string& s)
               {
                       if (this != &s)
                       {
                              string tmp(s._str);
                              swap(tmp);
                       }
                       return *this;
               }*/
               string& operator=(string s) //operator=函数精简版
               {
                       swap(s); //参数不用const修饰的原因:swap函数的底层是用std::swap实现的,std::swap是用模板实现的, 必须要保证两个参数类型相同,如果加上const,就意味着this指针指向的内容也加上const, 那么this指向的内容将不可修改。
                       return *this;
               }
        private:
               char* _str;
               size_t _size;
               size_t _capacity;
        };
}
test.cpp中
#define _CRT_SECURE_NO_WARNINGS 1
#include<iostream>
#include<string>
using namespace std;
#include"string.h"
void teststring1()
{
        tradition::string s1;
        tradition::string s2("hello world");
        tradition::string s3(s2);
        s2[0] = 'x';
        s1 = s2;
        cout << s1.c_str() << endl;
        cout << s2.c_str() << endl;
        cout << s3.c_str() << endl;
        cout << s3.size() << endl;
}
void teststring2()
{
        tradition::string s1;
        tradition::string s2("hello world");
        tradition::string s3(s2);
        s1 = s2;
        s1.append(" hehe");
        s3.push_back('a');
        s3 += '1';
        s3 += '2';
        s2 += " see you world";
        cout << s1.c_str() << endl;
        cout << s2.c_str() << endl;
        cout << s3.c_str() << endl;
}
void teststring3()
{
        tradition::string s1;
        tradition::string s2("hello world");
        tradition::string s3(s2);
        s1 = s2;
        s1.resize(15);
        s1.resize(14, 'x');
        s1.resize(5);
        cout << s1.c_str() << endl;
        cout << s2.c_str() << endl;
        cout << s3.c_str() << endl;
}
void teststring4()
{
        tradition::string s1;
        tradition::string s2("hello world");
        tradition::string s3(s2);
        s1 = s2;
        s1.resize(5);
        cout << s1.c_str() << endl;
        cout << s2.c_str() << endl;
        cout << s3.c_str() << endl;
        cout << (s1 < s2) << endl; //类的运算符重载会到对应的命名空间中去找重载函数
}
void teststring5()
{
        tradition::string s("hello world");
        s.insert(5, 'a');
        cout << s.c_str() << endl;
        s.insert(0, '\0');
        cout << s.c_str() << endl; //什么都不打印,s中第一个字符就是\0
        s.insert(0, "hehe");
        cout << s.c_str() << endl;
        for (auto ch : s)
        {
               cout << ch; //打印出string s中的全部内容。
        }
}
void teststring6()
{
        tradition::string s("hello world");
        tradition::string::iterator it = s.begin();
                //string中定义或者typedef出来的是string的内嵌类型,使用需要声明在string类中
        while (it != s.end())
        {
               cout << *it << " ";
               it++;
        }
        cout << endl;
        for (auto ch : s) //自己写的string类中有begin()和end()函数才能在这里实现范围for
        {
               ch += 1; //范围for的底层就是迭代器,将*it赋值给ch
               cout << ch << " "; //打印出i f m m p ! x p s m e
        }
        cout << endl;
        for (auto ch : s)
        {
               cout << ch << " "; //打印出h e l l o   w o r l d ,因为是*it赋值给ch,改变的是ch,而不是string s
        }
        cout << endl;
        for (auto& ch : s) //加上引用就可以改变string s了,it是指针,对指针解引用就可以改变对应空间的值
        {
               ch -= 1;
        }
        cout << endl;
        for (auto& ch : s)
                //范围for的替换具有规范性,只会替换成begin(),如果在迭代器的地方将begin()换成Begin(),
               //范围for会找不到begin(),从而报错
        {
               cout << ch << " ";
        }
        cout << endl;
        
}
void func(const tradition::string& s1)
{
        for (size_t i = 0; i < s1.size(); i++)
        {
               cout << s1[i] << " "; //string被const修饰了,需要[]函数重载也被const修饰
        }
        cout << endl;
        for (auto ch : s1) //s1是const修饰的string类型,迭代器也需要一个被const修饰的迭代器
        {
               cout << ch << " ";
        }
        cout << endl;
}
void teststring7()
{
        tradition::string s("hello world");
        func(s);
}
void teststring8()
{
        tradition::string s("hello world");
        s.erase(5, 3);
        cout << s.c_str() << endl;
        s.erase(5,8);
        cout << s << endl;
        cin >> s;
        cout << s  << endl;
}
void teststring9()
{
        modern::string s("hello world");
        modern::string s1(s);
        modern::string s2;
        s2 = s1;
}
int main()
{
        try //捕获异常
        {
               teststring9();
        }
        catch(const exception& e)
        {
               cout << e.what() << endl;
        }
        return 0;
}
最后,关于string的大小,从自己实现的来看:
private:
    char* _str;
    size_t _size;
    size_t _capacity;
大小应该是12字节。但是:
结果:
从调试结果来看,vs2019中的string大框架为(不同编译器下实现的方式不一样,大体上是一样的):
private:
    char _buf[16]; //_Myres中默认容量为15,算上\0,大小为16,就是因为这里的数组
    char* _ptr;
    size_t _size;
    size_t _capacity;
这样写的好处是,如果要存储小数组,就可以直接放到_buf[16]中去,就不用去对上申请了,如果字符串长度超过15,就放到_ptr指向的空间中去。这种操作还是为了空间换时间。字符串长度超过15,_buf[16]的空间就被浪费了。
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值