vector<bool>中的代理机制与程序运行效率

最新推荐文章于 2022-05-20 20:51:43 发布

Kylin_he

最新推荐文章于 2022-05-20 20:51:43 发布

阅读量796

点赞数

分类专栏： C++ 文章标签： stl vector 代理

本文链接：https://blog.csdn.net/hekailing/article/details/40481559

版权

C++ 专栏收录该内容

12 篇文章 0 订阅

订阅专栏

事情起源于一道LeetCode题。

LeetCode上有一道题，叫N-Queens。这是一道很经典的题，很快我就给出了我的代码。如下：

class Solution {
public:
    vector<vector<string> > solveNQueens(int n) {
		vvi.clear();
		vvs.clear();
		for (int i = 0; i < n; ++i) vvi.push_back(vector<int>(n, 0));
		vvn = n;
		for (int i = 0; i < n; ++i) dfs(0, i);
		return vvs;
    }

	void dfs(int x, int y)
	{
		if (vvi[x][y]) return;
		if (x == vvn-1) {
			if (vvi[x][y] == 0) {
				vvi[x][y] = 1;
				vector<string> vs;
				for (int i = 0; i < vvn; ++i) {
					string str;
					for (int j = 0; j < vvn; ++j) {
						if (vvi[i][j] == 1) str.push_back('Q');
						else str.push_back('.');
					}
					vs.push_back(str);
				}
				vvs.push_back(vs);
				vvi[x][y] = 0;
			}
			return;
		}
		mark(x, y);
		for (int i = 0; i < vvn; ++i) {
			dfs(x+1, i);
		}
		unmark(x, y);
	}

	void mark(int x, int y) {
		vvi[x][y] = 1;
		int I = min(x, y)+1;
		for (int i = 1; i < I; ++i) if (vvi[x-i][y-i] <= 0) --vvi[x-i][y-i];
		I = min(vvn-x, y+1);
		for (int i = 1; i < I; ++i) if (vvi[x+i][y-i] <= 0) --vvi[x+i][y-i];
		I = min(vvn-x, vvn-y);
		for (int i = 1; i < I; ++i) if (vvi[x+i][y+i] <= 0) --vvi[x+i][y+i];
		I = min(x+1, vvn-y);
		for (int i = 1; i < I; ++i) if (vvi[x-i][y+i] <= 0) --vvi[x-i][y+i];
		for (int i = 0; i < vvn; ++i) if (vvi[i][y] <= 0) --vvi[i][y];
		for (int i = 0; i < vvn; ++i) if (vvi[x][i] <= 0) --vvi[x][i];
	}

	void unmark(int x, int y) {
		vvi[x][y] = 0;
		int I = min(x, y)+1;
		for (int i = 1; i < I; ++i) if (vvi[x-i][y-i] < 0) ++vvi[x-i][y-i];
		I = min(vvn-x, y+1);
		for (int i = 1; i < I; ++i) if (vvi[x+i][y-i] < 0) ++vvi[x+i][y-i];
		I = min(vvn-x, vvn-y);
		for (int i = 1; i < I; ++i) if (vvi[x+i][y+i] < 0) ++vvi[x+i][y+i];
		I = min(x+1, vvn-y);
		for (int i = 1; i < I; ++i) if (vvi[x-i][y+i] < 0) ++vvi[x-i][y+i];
		for (int i = 0; i < vvn; ++i) if (vvi[i][y] < 0) ++vvi[i][y];
		for (int i = 0; i < vvn; ++i) if (vvi[x][i] < 0) ++vvi[x][i];
	}

private:
	vector<vector<int> > vvi;
	int vvn;
	vector<vector<string> > vvs;
};

考虑到上述代码中的mask和unmask函数对vvi矩阵的修改操作较复杂。于是不用二维数组作为flag，改用四个分别代表行、列和两条对角线的数组作为flag。代码如下：

class Solution {
public:
    vector<vector<string> > solveNQueens(int n) {
		row_marker = vector<bool>(n, false);
		col_marker = vector<bool>(n, false);
		diag_marker = vector<bool>(n*2-1, false);
		diag_marker2 = vector<bool>(n*2-1, false);
		stkQ.clear();
		vvs.clear();
		vvn = n;
		dfs(0);
		return vvs;
	}

	void dfs(int n) {
		for (int i = 0; i < vvn; ++i) {
			if (permition(n, i)) {
				mark(n, i);
				if (n < vvn-1) dfs(n+1);
				else {
					vector<string> vs;
					for (int j = 0; j < vvn; ++j) {
						string str(vvn, '.');
						str.replace(stkQ[j], 1, 1, 'Q');
						vs.push_back(str);
					}
					vvs.push_back(vs);
					unmark(n, i);
					break;
				}
				unmark(n, i);
			}
		}
	}

	bool permition(int x, int y) {
		return (row_marker[x] == false) && \
			   (col_marker[y] == false) && \
			   (diag_marker[x-y+vvn-1] == false) && \
			   (diag_marker2[x+y] == false);
	}

	void mark(int x, int y) {
		row_marker[x] = true;
		col_marker[y] = true;
		diag_marker[x-y+vvn-1] = true;
		diag_marker2[x+y] = true;
		stkQ.push_back(y);
	}
	
	void unmark(int x, int y) {
		row_marker[x] = false;
		col_marker[y] = false;
		diag_marker[x-y+vvn-1] = false;
		diag_marker2[x+y] = false;
		stkQ.pop_back();
	}

private:
	vector<bool> row_marker;
	vector<bool> col_marker;
	vector<bool> diag_marker;
	vector<bool> diag_marker2;
	vector<int> stkQ;
	int vvn;
	vector<vector<string> > vvs;
};

本来以为这样修改既降低了空间复杂度，也降低了时间复杂度，应该是极好的。可惜在服务器上后者的测试时间居然略高与前者。

以后为说明方便，将修改前的代码称为代码一，修改后的代码称为代码二。

起初以为是测试使用的n较小（在我台式机上n=10时，程序就要跑几秒），所以代码二体现不出优势。但后来仔细一想，觉得就算n小一点，代码二也不至于比代码一慢呀。于是仔细分析上面两份代码。

代码一与代码二的核心思想都是采用递归形式的深度优先遍历算法。两者的遍历过程几乎完全一样，不同的仅仅是mask、unmask和判断操作。于是猜想两者的差距应该与mask和unmask的运行时间有很大关系。于是对mask和unmask性能进行测试，测试代码如下：

<span style="white-space:pre">	</span>int start = GetTickCount();
	for (int i = 0; i < 10000; ++i) {
		solution1.mark(0, 0);
		solution1.unmark(0, 0);
	}
	int end = GetTickCount();
	cout << end - start << endl;
	start = GetTickCount();
	for (int i = 0; i < 10000; ++i) {
		solution2.mark(0, 0);
		solution2.unmark(0, 0);
	}
	end = GetTickCount();
	cout << end - start << endl;

方格边长n	代码一执行10000次时间(ms)
3	46
5	78
7	140
9	172
11	187
13	234
15	265
17	296

从上表中可以看出n与T之间近似呈线性关系，这是正常的。

row_marker

然后，将代码二中的col_marker等vector的元素类型设置为bool，再将n分别设置为上述值，得到的T基本上都是234左右。col_marker的元素类型设置为char或int的时候，得到的T近似为5。我很好奇bool和char之间的区别到底是什么？

这是bool类型的row_marker[x]=1;的汇编代码：

row_marker[x] = 1;
003D36B2 push 1
003D36B4 mov eax,dword ptr [ebp+8]
003D36B7 push eax
003D36B8 lea ecx,[ebp-134h]
003D36BE push ecx
003D36BF mov ecx,dword ptr [ebp-14h]
003D36C2 call std::vector<bool,std::allocator<bool> >::operator[] (3D1181h)
003D36C7 mov dword ptr [ebp-13Ch],eax
003D36CD mov edx,dword ptr [ebp-13Ch]
003D36D3 mov dword ptr [ebp-140h],edx
003D36D9 mov dword ptr [ebp-4],0
003D36E0 mov ecx,dword ptr [ebp-140h]
003D36E6 call std::_Vb_reference<std::allocator<bool> >::operator= (3D142Eh)
003D36EB mov dword ptr [ebp-4],0FFFFFFFFh
003D36F2 lea ecx,[ebp-134h]
003D36F8 call std::_Vb_reference<std::allocator<bool> >::~_Vb_reference<std::allocator<bool> > (3D18D9h)
这是char类型的row_marker[x]=1;的汇编代码：

row_marker[x] = 1;
00D832A3 mov eax,dword ptr [x]
00D832A6 push eax
00D832A7 mov ecx,dword ptr [this]
00D832AA call std::vector<char,std::allocator<char> >::operator[] (0D81122h)
00D832AF mov byte ptr [eax],1

很明显，vector<bool>的汇编代码比vector<char>的汇编代码长太多了，可以明显看出来的区别就是vector<bool>的代码还需要调用赋值运算符，相比之下vector<char>则只是简单的使用了一句mov命令就完成了赋值操作。为什么会有这个区别？不过为了完整地看明白vector<bool>的内部机制，我们还是从[]运算符开始看起：

在执行row_marker[x] = true;语句时，程序首先调用[]操作符：

reference operator[](size_type _Off)
		{	// subscript mutable sequence
		return (*(begin() + _Off));
		}

支持随机访问容器的[]运算符都这样，并不奇怪，于是接着进入到+运算符函数中：

_Mytype operator+(difference_type _Off) const
		{	// return this + integer
		_Mytype _Tmp = *this;
		return (_Tmp += _Off);
		}

再然后是+=运算符：

_Mytype& operator+=(difference_type _Off)
		{	// increment by integer
		*(_Mybase *)this += _Off;
		return (*this);
		}

奇怪的地方出现了，对于vector的迭代器，+=运算符只需要将指向该元素的指针加_Off即可，而这里转而去调用_Mybase的+=运算符操作。就让我们一窥_Mybase中的玄机吧！

_Mytype& operator+=(difference_type _Off)
		{	// increment by integer
		if (_Off < 0 && this->_Myoff < 0 - (size_type)_Off)
			{	/* add negative increment */
			this->_Myoff += _Off;
			this->_Myptr -= 1 + ((size_type)(-1) - this->_Myoff) / _VBITS;
			this->_Myoff %= _VBITS;
			}
		else
			{	/* add non-negative increment */
			this->_Myoff += _Off;
			this->_Myptr += this->_Myoff / _VBITS;
			this->_Myoff %= _VBITS;
			}
		return (*this);
		}

至此，聪明的程序员应该能看出端倪了。这个函数的大意就是：若_Off大于0，则将_Myoff加上_Off，然后_Myoff除以_VBITS，商再累加给_Myptr，余数赋给_Myoff；若_Off小于0，则要看_MyOff是否小于_Off，如果是，则_Myptr要减去商。。。（其实就是比特映射，不再赘言了）
现在，终于完成了迭代器的运算，然后就是调用迭代器的*运算符访问了，这个方法和普通迭代器的*运算符一样。
最后，便是调用赋值运算符：

_Mytype& operator=(bool _Val)
		{	// assign _Val to bit
		if (_Val)
			*(_Vbase *)_Getptr() |= _Mask();
		else
			*(_Vbase *)_Getptr() &= ~_Mask();
		return (*this);
		}

这里面关键是一个_Mask()函数：

_Vbase _Mask() const
		{	// convert offset to mask
		return ((_Vbase)(1 << this->_Myoff));
		}

根据_Myoff找到需要修改的bit位。
经过如此复杂的工序，终于完成了vector<bool>中元素的赋值。同时也找到了代码二效率低的原因。

PS：

vector<bool>这种机制叫做代理。通过代理，原本由一个字节存储的逻辑变量只需要一个比特来存储，内存占用降低八倍，代价就是访问和修改的复杂度大大增加。所以除非是数据量特别大的情况，平时还是慎用vector<bool>。可以使用deque<bool>代替。

由于使用了代理，所以vector<bool> vb; bool* p = &vb[0];这样的语句是会编译报错的！

Kylin_he

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
vector<bool>中的代理机制与程序运行效率

事情起源于一道
复制链接

扫一扫

专栏目录

vector<bool>中的代理机制与程序运行效率

“相关推荐”对你有帮助么？