c++数据结构:string的模拟实现

海的宇宙

已于 2024-08-07 09:24:16 修改

阅读量497

点赞数 11

分类专栏： c++ 文章标签： c++ 开发语言数据结构算法

于 2024-04-13 17:15:40 首次发布

本文链接：https://blog.csdn.net/2301_76886465/article/details/137716705

版权

c++ 专栏收录该内容

13 篇文章 0 订阅

订阅专栏

定义命名空间和类

namespace shh
{
	class string
	{
	private:
		char* _str = nullptr;
		size_t _size = 0;
		size_t _capacity = 0;
	};
};

为了避免和std里面的string起冲突,我们自己搞一个命名空间
string本质上来说就是一个字符数组

构造函数

		//无参/带参的默认构造
		string(const char* str = "")
			:_size(strlen(str))
		{
			//开辟空间并完成初始化
			_str = new char[_size + 1];
			strcpy(_str, str);
			_capacity = _size;
		}

记得要多开辟一个位置来存储’/0.
缺省""里面有一个’\0’.

拷贝构造

传统写法

		//拷贝构造
		string(const string& s1)
		{
			_str = new char[s1.capacity() + 1];
			strcpy(_str, s1._str);
			_capacity = s1.capacity();
			_size = s1.size();
		}

现代写法

		//
		string(const string& s1)
		{
			string tmp(s1._str);
			swap(tmp);
		}

用s1._str对tmp进行初始化,然后交换this和tmp

不能用编译器自带的拷贝构造,因为它会按字节拷贝,使得两个对象指向同一块空间,析构的时候同一块空间被析构两次,会运行崩溃.

析构函数

		//析构函数
		~string()
		{
			delete[] _str;
			_str = nullptr;
			_capacity = _size = 0;
		}

返回string的大小和容量

		//返回大小和容量
		size_t size() const
		{
			return _size;
		}
		size_t capacity() const
		{
			return _capacity;
		}

在函数后面加const是为了普通变量和const变量都能用

operator[]

		//返回字符,可读可写
		char& operator[](size_t pos)
		{
			assert(pos < _size);
			return _str[pos];
		}

		//只能读,不能写
		const char& operator[](size_t pos) const
		{
			assert(pos < _size);
			return _str[pos];
		}

operator=

传统写法

		//s1(s) 赋值
		string& operator=(string s)
		{
			char* tmp = new char[s._capacity + 1];
			strcpy(tmp, s._str);
			delete[] _str;
			_str = tmp;
			_size = s._size;
			_capacity = s._capacity;
		}

现代写法

		string& operator=(const string& s)
		{
			string tmp(s);
			swap(tmp);
			return *this;
		}

再简化

		//现代写法
		string& operator=(string s)
		{
			swap(s);
			return *this;
		}

在传参的时候直接构造s对象,然后交换.

迭代器

string的迭代器可以理解为指针,但是并不是所有容器的迭代器都是指针.
我们要在string类里面进行定义

	public:
		typedef char* iterator;
		typedef const char* const_iterator;

begin

		//迭代器头
		iterator begin()
		{
			return _str;
		}

		//const迭代器头
		const_iterator begin() const
		{
			return _str;
		}

end

		//迭代器尾
		iterator end()
		{
			return _str + _size;
		}

		//const迭代器尾
		const_iterator end() const
		{
			return _str + _size;
		}

reserve

reserves是一个可以对sting进行扩容的函数.如果n大于_size,将后面区别初始化,反则则减少数组的长度.

		void resize(size_t n,char ch='\0')
		{
			//n大于_size,将后面区别初始化成ch
			if (n > _size)
			{
				reserve(n);
				for (size_t i = _size; i < n; i++)
				{
					_str[i] = ch;
				}
				_str[n] = '\0';
				_size = n;
			}
			//减少size,不需要修改后面的数据
			else
			{
				_str[n] = '\0';
				_size = n;
			}
		}

push_back

push_back是往对象后面添加一个字符

		//往后面添加数据
		void push_back(char ch)
		{
			//扩容
			if (_size == _capacity)
			{
				//因为_capacity初始化时被赋值为0
				reserve(_capacity == 0 ? 4: _capacity * 2);
			}
			_str[_size++] = ch;
			_str[_size] = '\0'; //字符串的末尾是'\0'
		}

append

append是往对象后面添加一串字符串.要注意扩容的时候不能简简单单扩个二倍,因为我们不知道要添加的字符串的长度.

		void append(const char* str)
		{
			size_t len = strlen(str);
			//这里要按照字符串长度进行扩容
			if (len + _size > _capacity)
			{
				reserve(len + _size);
			}
			//从_str的'\0'开始复制
			strcpy(_str + _size, str);
			_size += len;
		}

operator+=

我认为这个函数应该是最常用的函数,因为它的功能涵盖了前面的push_back和append.因为功能类似,我们对上面的函数进行复用.

		string& operator+=(char ch)
		{
			push_back(ch);
			return *this;
		}

		string& operator+=(const char* str)
		{
			append(str);
			return *this;
		}

insert

insert的功能是在动态数组里面找pos位置插入字符/字符串

		//插入字符
		void insert(size_t pos, char ch)
		{
			assert(pos <= _size);
			//不够扩容 
			if (_size == _capacity)
			{
				reserve(_capacity == 0 ? 4 : _capacity * 2);
			}
			//移数据的两种写法
			
			//int end = _size+1;
			//while (end > pos)
			//{
			//	_str[end] = _str[end - 1];
			//	end--;
			//}
			int end = _size;
			//记得要强转,因为是无符号整型.会陷入死循环
			//比较时会产生阶级提升 有符号->无符号
			while (end >= (int)_size)
			{
				_str[end + 1] = _str[end];
				end--;
			}
			_str[pos] = ch;
			_size++;
		}

		//插入一段字符串
		void insert(size_t pos, const char* str)
		{
			assert(pos <= _size);
			int len = strlen(str);
			size_t new_capacity = _size + len;
			//扩容并调整_size大小
			if (new_capacity > _capacity)
			{
				reserve(new_capacity);
				_size = new_capacity;
			}
			//从后往前复制数据,要把pos位置和后面所有都移动pos个位
			int end = _size;
			int tmp = _size-len-pos+1;
			while (tmp--)
			{
				_str[end] = _str[end - len];
				end--;
			}
			
			strncpy(_str + pos, str, len);
		}

erase

erase函数的功能就是讲字符串给缩短.

		void erase(size_t pos = 0, size_t len = npos)
		{
			assert(pos < _size);
			//需要擦除的长度大于size
			if (len >= _size - pos)
			{
				_str[pos] = '\0 ';
				_size = pos;
			}
			else
			{
				strcpy(_str + pos, _str + pos + len);
				_size -= len;
			}
		}

npos是无符号整数的最大值,我们需要在string类里面声明,类外面定义

	private:
		char* _str = nullptr;
		size_t _size = 0;
		size_t _capacity = 0;
	public:
		static const int npos;
	};
	//npos是整型的最大值,不需要每个string都具备的成员变量,只需要在string外面进行初始化
	//把它写成整个域的静态变量,每个对象都能用
	//要说清楚是哪个域的
	const int string::npos = -1;

clear

clear的功能是清空数据,没有必要真的去删除空间

		void clear()
		{
			_str[0] = '\0';
			_size = 0;
		}

empty

		bool empty()const
		{
			return _size==0;
		}

resize

开空间并完成初始化

		void resize(size_t n,char ch='\0')
		{
			//n大于_size,将后面区别初始化成ch
			if (n > _size)
			{
				//reserve里面会检查n是否大于_capacity
				reserve(n);
				for (size_t i = _size; i < n; i++)
				{
					_str[i] = ch;
				}
				_str[n] = '\0';
				_size = n;
			}
			//减少size,不需要修改后面的数据
			else
			{
				_str[n] = '\0';
				_size = n;
			}
		}

流插入<< 和流提取>>

	ostream& operator<<(ostream& out, string s)
	{
		for (auto ch : s)
		{
			out << ch;
		}
		return out;
	}

get是istream里面的成员函数,它能读所有字符,包括空格和换行
这里有一个小优化,提前开一个buff的数组来存储字符,省去了push_back反复开空间的问题.还有效减少堆区内存碎片的产生.

	istream& operator>>(istream& in, string& s)
	{
		s.clear();
		char ch;
		ch = in.get();
		char buff[128];
		int i = 0;
		//cin不能读取空格和换行,get可以
		while (ch != ' ' && ch != '\n')
		{
			buff[i++] = ch;
			if (i == 127)
			{
				buff[127] = '\0';
				s += buff;
				i = 0;
			}
			ch = in.get();

		}
		if (i != 0)
		{
			buff[i] = '\0';
			s += buff;

		}
		return in;
	}

getline

getline能读取一行字符,遇到换行符停止.这里有跟流提取>>一样的优化

	void getline(istream& in, string& s)
	{
		s.clear();
		char ch;
		ch = in.get();
		char buff[128];
		int i = 0;
		//cin不能读取空格和换行,get可以
		while (ch != '\n')
		{
			buff[i++] = ch;
			if (i == 127)
			{
				buff[127] = '\0';
				s += buff;
				i = 0;
			}
			ch = in.get();

		}
		if (i != 0)
		{
			buff[i] = '\0';
			s += buff;
		}
	}

substr

substr能复制string一段区间的值

		string substr(size_t pos = 0, size_t len = npos)
		{
			assert(pos < _size);
			string tmp;
			if (len > _size - pos)
			{
				//tmp.reserve(_size - pos);
				//strcpy(tmp._str, _str + pos);
				//tmp._size += _size-pos;
				for (size_t i = pos; i < _size; i++)
				{
					tmp += _str[i];
				}
			}
			else
			{
				//tmp.reserve(len);
				//strncpy(tmp._str, _str + pos, len);
				//tmp._str[len] = '\0';
				//tmp._size += len;
				for (size_t i = pos; i < pos+len; i++)
				{
					tmp += _str[i];
				}
			}
			return tmp;
		}

find

寻找字符/字符串

		size_t find(char ch, size_t pos = 0) const
		{
			assert(pos < _size);
			for (size_t i=pos; i<_size; i++)
			{
				if (_str[i] == ch) return i;
			}
			return npos;
		}


		size_t find(const char* ch, size_t pos = 0) const
		{
			assert(pos < _size);
			//strstr进行暴力匹配
			const char* tmp=strstr(_str + pos, ch);
			if (tmp) return tmp - _str;
			else return npos;
		}

swap

算法库里面有一个模板的swap,所有类型都能用.
但是调用算法库里面的swap会造成三次拷贝和析构,影响效率

类里面

		
		void swap(string& s1)
		{
			//要指定命名空间,不然会认为是外面自己写string里面的swap,参数不匹配,会报错
			std::swap(_str,s1._str);
			std::swap(_size, s1._size);
			std::swap(_capacity, s1._capacity);
		}

类外面

	void swap(string& s1, string& s2)
	{
		s1.swap(s2);
	}

这样就可以让我们在外面对两个string进行交换.
因为模板和现成的区别有现成的吃现成的.所以编译器还是会优先调用外面自己写的swap