前言
vector是C++ STL 中的一种数据结构,可以理解为“向量”或者是“可变长数组”。vector比普通的数组更加节省空间,但是它的时间效率显然不如普通的数组。我曾经听学长说vector 的push_back 操作的总复杂度为O(NlogN),但我不这么认为(或许是我得了幻听,没有学长说过这话)。为此我按照类似vector的原理仿写了一个简易的“vector类”。
代码
#include<stdlib.h>
#include<string.h>
namespace std
{
template<class VarType>class vector //try to write a "vector" myslef
{
//made by NEYC GGN -2017.9.2, for study only
//only has "some basic functions" of STL"vector"
int len,siz; //len: the length of memory, siz: the amount of elments
VarType* bgn; //the head of the array
void swap(VarType*& A,VarType*& B){
VarType* t;
t=A;A=B;B=t; //swap two pointer
}
public:
vector(int Len=1){ //constructor
siz=0;len=Len; //at first, len= basic Length
bgn=(VarType*)calloc(len,sizeof(VarType)); //calloc new memory
}
~vector(){ //destructor
free(bgn); //free the memory when delete the whole vector
}
void clear(){
free(bgn); //free the last array
siz=0;len=1; //set len= 1
bgn=(VarType*)calloc(len,sizeof(VarType)); //calloc new memory
}
int push_back(VarType itm){ //add an element at back
if(!(siz<len)){ //if the vector is full
len<<=1; //double the length of array
VarType* new_s=(VarType*)calloc(len,sizeof(VarType)); //calloc new array
memcpy(new_s,bgn,sizeof(VarType)*siz); //copy the elments before
swap(new_s,bgn);free(new_s); //free the last array
}
bgn[siz]=itm; //add an element
return ++siz; //return the size of elements
}
int pop_back(){ //delete an element at back
if(siz==0)return 0; //siz=0 can not pop_back
if(siz<=(len>>1)){ //if can delete right half
len>>=1; //decline the length of array
VarType* new_s=(VarType*)calloc(len,sizeof(VarType)); //calloc new array
memcpy(new_s,bgn,sizeof(VarType)*siz); //copy the elments before
swap(new_s,bgn);free(new_s); //free the last array
}
return --siz;
}
VarType* begin(){ //return head pointer
return bgn;
}
VarType* end(){ //return tail pointer
return bgn+siz; //vector => [ vec.begin(), vec.end() )
}
VarType& operator[](int index){ //index a position in vector
return bgn[index]; //attention! 0 <= index < vec.size()
}
int size(){ //return the count of elements invector
return siz; //return "siz"
}
// int length(){ //get the length of the vector
// return len; //for debug only, return len
// }
};
}
//test part of the algorithm:
#include<cstdio>
#include<algorithm>
using namespace std;
int main() //debug my vector's function
{
vector<int>vec; //define a vector (as STL)
printf("input n=? "); //input the siz of elements
int n;scanf("%d",&n);
printf("input array[0..%d]=? ",n-1);
for(int i=1;i<=n;i++){ //input all the elements
int a;scanf("%d",&a);
vec.push_back(a); //push_back an element
}
sort(vec.begin(),vec.end()); //sort the vector
for(int i=0;i<vec.size();i++) //output sorted vector
printf("%d ",vec[i]);
printf("\n");
while(vec.size()!=0){ //pop the whole vector
vec.pop_back(); //attention! do not use the method to clear
printf("siz=%d \n",vec.size());
} //if you want to clear, use "vec.clear()"
return 0; //debug end
}
这里再给出一个具有错误输出的debug_vector类和vector类的完整代码,便于调试。
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
namespace std
{
template<class VarType>class vector //try to write a "vector" myslef
{
//made by NEYC GGN -2017.9.2, for study only
//only has "some basic functions" of STL"vector"
int len,siz; //len: the length of memory, siz: the amount of elments
VarType* bgn; //the head of the array
void swap(VarType*& A,VarType*& B){
VarType* t;
t=A;A=B;B=t; //swap two pointer
}
public:
vector(int Len=1){ //constructor
siz=0;len=Len; //at first, len= basic Length
bgn=(VarType*)calloc(len,sizeof(VarType)); //calloc new memory
}
~vector(){ //destructor
free(bgn); //free the memory when delete the whole vector
}
void clear(){
free(bgn); //free the last array
siz=0;len=1; //set len= 1
bgn=(VarType*)calloc(len,sizeof(VarType)); //calloc new memory
}
int push_back(VarType itm){ //add an element at back
if(!(siz<len)){ //if the vector is full
len<<=1; //double the length of array
VarType* new_s=(VarType*)calloc(len,sizeof(VarType)); //calloc new array
memcpy(new_s,bgn,sizeof(VarType)*siz); //copy the elments before
swap(new_s,bgn);free(new_s); //free the last array
}
bgn[siz]=itm; //add an element
return ++siz; //return the size of elements
}
int pop_back(){ //delete an element at back
if(siz==0)return 0; //siz=0 can not pop_back
if(siz<=(len>>1)){ //if can delete right half
len>>=1; //decline the length of array
VarType* new_s=(VarType*)calloc(len,sizeof(VarType)); //calloc new array
memcpy(new_s,bgn,sizeof(VarType)*siz); //copy the elments before
swap(new_s,bgn);free(new_s); //free the last array
}
return --siz;
}
VarType* begin(){ //return head pointer
return bgn;
}
VarType* end(){ //return tail pointer
return bgn+siz; //vector => [ vec.begin(), vec.end() )
}
VarType& operator[](int index){ //index a position in vector
return bgn[index]; //attention! 0 <= index < vec.size()
}
int size(){ //return the count of elements invector
return siz; //return "siz"
}
// int length(){ //get the length of the vector
// return len; //for debug only, return len
// }
};
template<class VarType>class debug_vector //a vector with debug function
{
//made by NEYC GGN -2017.9.3, for study only
//only has "some basic functions" of STL"vector" (with debug output)
int len,siz; //len: the length of memory, siz: the amount of elments
VarType* bgn; //the head of the array
void swap(VarType*& A,VarType*& B){
VarType* t;
t=A;A=B;B=t; //swap two pointer
}
public:
debug_vector(int Len=1){ //constructor
if(Len<=0){
fprintf(stderr,"Error:(constructor)the length of vector must >= 1!\n");
system("pause>nul");return;
}
siz=0;len=Len; //at first, len= basic Length
bgn=(VarType*)calloc(len,sizeof(VarType)); //calloc new memory
}
~debug_vector(){ //destructor
free(bgn); //free the memory when delete the whole vector
}
void clear(){
free(bgn); //free the last array
siz=0;len=1; //set len= 1
bgn=(VarType*)calloc(len,sizeof(VarType)); //calloc new memory
}
int push_back(VarType itm){ //add an element at back
if(!(siz<len)){ //if the vector is full
len<<=1; //double the length of array
VarType* new_s=(VarType*)calloc(len,sizeof(VarType)); //calloc new array
if(new_s==NULL){
fprintf(stderr,"Error:(push_back)can not calloc new memory!\n");
system("pause>nul");return siz;
}
memcpy(new_s,bgn,sizeof(VarType)*siz); //copy the elments before
swap(new_s,bgn);free(new_s); //free the last array
}
bgn[siz]=itm; //add an element
return ++siz; //return the size of elements
}
int pop_back(){ //delete an element at back
if(siz<=0){ //siz=0 can not pop_back
fprintf(stderr,"Error:(pop_back)size of vector <=0!\n");
system("pause>nul");return siz;
}
if(siz<=(len>>1)){ //if can delete right half
len>>=1; //decline the length of array
VarType* new_s=(VarType*)calloc(len,sizeof(VarType)); //calloc new array
memcpy(new_s,bgn,sizeof(VarType)*siz); //copy the elments before
swap(new_s,bgn);free(new_s); //free the last array
}
return --siz;
}
VarType* begin(){ //return head pointer
return bgn;
}
VarType* end(){ //return tail pointer
return bgn+siz; //vector => [ vec.begin(), vec.end() )
}
VarType& operator[](int index){ //index a position in vector
if(index<0 || index>=siz){
fprintf(stderr,"Error:(operator[])index(=%d) must in [1,siz(=%d)-1]!\n",index,siz);
system("pause>nul");return bgn[0];
}
return bgn[index]; //attention! 0 <= index < vec.size()
}
int size(){ //return the count of elements invector
return siz; //return "siz"
}
int length(){ //get the length of the vector
return len; //for debug only, return len
}
};
}
分析
上文代码的实现原理:
push_back:如果当前元素个数小于数组长度,直接插入这个元素;否则,重新申请一个长度为原数组长度二倍的数组,并把前面的元素复制到新数组中,再插入这个元素。
pop_back:如果当前元素个数小于数组长度的二分之一,可以申请一个长度为当前数组长度二分之一的数组,并把前面的元素复制过来,再删除当前元素;否则,直接删除当前元素。
假设calloc函数申请内存的时间复杂度为O(1)(实际上也是非常快的)。那么我们可以去估计一下插入N个数的时间复杂度:
(1)如果N=2^b (b为整数,b>1),那么需要花费时间代价
T=插入次数+sum(每次复制的长度)即:T=2^b+2^(b-1)+2^(b-2)+..+2^1+2^0=2^b+2^b-1=2N-1。因为在插入最后一个数的时候并没有超出2^n的限制,所以只需要从2^(b-1)加到2^1。(2)如果N=2^b+1
(b为整数,b>1),那么T=(2^b+1)+2^b+2^(b-1)+…+2^1+2^0=2^b+1+2^(b+1)-1=3*2^b=3N-3(3)除上面两种情况的N外,不难证明: T一定在2N-1到3N-3之间。
从以上三条来看,push_back N个数的总复杂度是O(N),均摊复杂度为插入一个数O(1)(注意是均摊),均摊复杂度常数大概是普通数组直接操作的三倍(或以上),理论占用空间不超过数组的二倍,所以空间复杂度也是O(N)。
pop_back也是同理,在此不进行证明。
这个时间复杂度针对的是你一直push-back或pop-back。如果你反复push一次pop一次,最后导致数组来回被复制,那复杂度就没有保证了。
后记
STL(Standard Template Library)为我们提供了很多很优秀的数据结构和算法模板,虽然可以直接使用,但我还是觉得应该了解一下各种STL的实现原理(意思就是不一定要会写,但是要“了解”)。
(比如说什么“set”“map”这种用红黑树实现的算法,自己手写的确是丧心病狂!!)
上文中仿写的vector的算法并不一定就是真正STL vector中的写法,但我觉得原理应该是类似的。