关于在C/C++中如何写一个swap函数,最直接的方法就是用临时变量
void swap_tempVariable(int &a, int &b){
int temp;
temp = a;
a = b;
b = temp;
}
然而在很多C++的面试中,常有这样的要求,让大家不用临时变量,然后写一个swap函数。这个问题简单,我们有两种方式:
//用算术的方法调换a,b的值
void swap_arithmetic(int &a, int &b){
a = a+b;
b = a-b;
a = a-b;
}
//用位运算的方法调换a,b的值
void swap_bit(int &a, int &b){
a = a^b;
b = a^b;
a = a^b;
}
后两种方法相对于第一种,少定义了一个临时变量,而且语句也只有3条,看起来在时间上和空间上都进行了优化。但结果是这样的吗?我们用gtest来测试一下,每个函数运行10000次,看看消耗了多少时间
#include"gtest/gtest.h"
#include <sys/time.h>
void swap_tempVariable(int &a, int &b){
int temp;
temp = a;
a = b;
b = temp;
}
void swap_arithmetic(int &a, int &b){
a = a+b;
b = a-b;
a = a-b;
}
void swap_bit(int &a, int &b){
a = a^b;
b = a^b;
a = a^b;
}
long getCurrentUTime()
{
struct timeval tv;
gettimeofday(&tv,NULL);
return tv.tv_usec;
}
class lexQuickTest : public testing::Test{
protected:
virtual void SetUp() {
start_time_ = getCurrentUTime();
}
virtual void TearDown() {
// Gets the time when the test finishes
const long end_time = getCurrentUTime();
std::cout << "The test take"<<end_time -start_time_<<"us.";
}
long start_time_;
};
TEST_F(lexQuickTest, swap_tempVariable){
int time = TEST_TIME;
int a;
int b;
while(time-->1){
a = time;
b = time-1;
swap_tempVariable(a,b);
}
}
TEST_F(lexQuickTest, swap_arithmetic){
int time = TEST_TIME;
int a;
int b;
while(time-->1){
a = time;
b = time-1;
swap_arithmetic(a,b);
}
}
TEST_F(lexQuickTest, swap_bit){
int time = TEST_TIME;
int a;
int b;
while(time-->1){
a = time;
b = time-1;
swap_bit(a,b);
}
}
我们来看看运行的效果,太(cai)棒(ni)了(mei),和我们想的不一样,使用临时变量的方法,运行花费了105us(请自动忽视gtest的1ms,因为us的单位太小,这个ms是随机的), 而其他两种方法分别花费了161us和167us. 多花了将近60%的时间 (经过多次测试,用临时变量的方法,运行的时间是变化,具体原因不详,但效率比后面两种方法高是肯定的)
[==========] Running 3 tests from 1 test case.
[----------] Global test environment set-up.
[----------] 3 tests from lexQuickTest
[ RUN ] lexQuickTest.swap_tempVariable
The test take105us.[ OK ] lexQuickTest.swap_tempVariable (1 ms)
[ RUN ] lexQuickTest.swap_arithmetic
The test take161us.[ OK ] lexQuickTest.swap_arithmetic (0 ms)
[ RUN ] lexQuickTest.swap_bit
The test take167us.[ OK ] lexQuickTest.swap_bit (0 ms)
[----------] 3 tests from lexQuickTest (1 ms total)
[----------] Global test environment tear-down
[==========] 3 tests from 1 test case ran. (1 ms total)
[ PASSED ] 3 tests
为什么会这样,只有把汇编代码打出来看看了(用以下方法,需要定义一个main函数)
gcc swap.cc -g
objdump -dS a.out
00000000004004ec <_Z17swap_tempVariableRiS_>:
void swap_tempVariable(int &a, int &b){
4004ec: 55 push %rbp
4004ed: 48 89 e5 mov %rsp,%rbp
4004f0: 48 89 7d e8 mov %rdi,-0x18(%rbp)
4004f4: 48 89 75 e0 mov %rsi,-0x20(%rbp)
int temp;
temp = a;
4004f8: 48 8b 45 e8 mov -0x18(%rbp),%rax
4004fc: 8b 00 mov (%rax),%eax
4004fe: 89 45 fc mov %eax,-0x4(%rbp)
a = b;
400501: 48 8b 45 e0 mov -0x20(%rbp),%rax
400505: 8b 10 mov (%rax),%edx
400507: 48 8b 45 e8 mov -0x18(%rbp),%rax
40050b: 89 10 mov %edx,(%rax)
b = temp;
40050d: 48 8b 45 e0 mov -0x20(%rbp),%rax
400511: 8b 55 fc mov -0x4(%rbp),%edx
400514: 89 10 mov %edx,(%rax)
}
400516: 5d pop %rbp
400517: c3 retq
0000000000400518 <_Z15swap_arithmeticRiS_>:
void swap_arithmetic(int &a, int &b){
400518: 55 push %rbp
400519: 48 89 e5 mov %rsp,%rbp
40051c: 48 89 7d f8 mov %rdi,-0x8(%rbp)
400520: 48 89 75 f0 mov %rsi,-0x10(%rbp)
a = a+b;
400524: 48 8b 45 f8 mov -0x8(%rbp),%rax
400528: 8b 10 mov (%rax),%edx
40052a: 48 8b 45 f0 mov -0x10(%rbp),%rax
40052e: 8b 00 mov (%rax),%eax
400530: 01 c2 add %eax,%edx
400532: 48 8b 45 f8 mov -0x8(%rbp),%rax
400536: 89 10 mov %edx,(%rax)
b = a-b;
400538: 48 8b 45 f8 mov -0x8(%rbp),%rax
40053c: 8b 10 mov (%rax),%edx
40053e: 48 8b 45 f0 mov -0x10(%rbp),%rax
400542: 8b 00 mov (%rax),%eax
400544: 29 c2 sub %eax,%edx
400546: 48 8b 45 f0 mov -0x10(%rbp),%rax
40054a: 89 10 mov %edx,(%rax)
a = a-b;
40054c: 48 8b 45 f8 mov -0x8(%rbp),%rax
400550: 8b 10 mov (%rax),%edx
400552: 48 8b 45 f0 mov -0x10(%rbp),%rax
400556: 8b 00 mov (%rax),%eax
400558: 29 c2 sub %eax,%edx
40055a: 48 8b 45 f8 mov -0x8(%rbp),%rax
40055e: 89 10 mov %edx,(%rax)
}
400560: 5d pop %rbp
400561: c3 retq
0000000000400562 <_Z8swap_bitRiS_>:
void swap_bit(int &a, int &b){
400562: 55 push %rbp
400563: 48 89 e5 mov %rsp,%rbp
400566: 48 89 7d f8 mov %rdi,-0x8(%rbp)
40056a: 48 89 75 f0 mov %rsi,-0x10(%rbp)
a = a^b;
40056e: 48 8b 45 f8 mov -0x8(%rbp),%rax
400572: 8b 10 mov (%rax),%edx
400574: 48 8b 45 f0 mov -0x10(%rbp),%rax
400578: 8b 00 mov (%rax),%eax
40057a: 31 c2 xor %eax,%edx
40057c: 48 8b 45 f8 mov -0x8(%rbp),%rax
400580: 89 10 mov %edx,(%rax)
b = a^b;
400582: 48 8b 45 f8 mov -0x8(%rbp),%rax
400586: 8b 10 mov (%rax),%edx
400588: 48 8b 45 f0 mov -0x10(%rbp),%rax
40058c: 8b 00 mov (%rax),%eax
40058e: 31 c2 xor %eax,%edx
400590: 48 8b 45 f0 mov -0x10(%rbp),%rax
400594: 89 10 mov %edx,(%rax)
a = a^b;
400596: 48 8b 45 f8 mov -0x8(%rbp),%rax
40059a: 8b 10 mov (%rax),%edx
40059c: 48 8b 45 f0 mov -0x10(%rbp),%rax
4005a0: 8b 00 mov (%rax),%eax
4005a2: 31 c2 xor %eax,%edx
4005a4: 48 8b 45 f8 mov -0x8(%rbp),%rax
4005a8: 89 10 mov %edx,(%rax)
}
4005aa: 5d pop %rbp
4005ab: c3 retq
很明显,后两种方法生成了更多的代码,用了更多的寄存器,而且调用了更费时汇编操作。而收益仅仅是在栈上少用了点点空间。。。有意思么