小肥柴慢慢手写数据结构(C篇)(1-2 线性表 ArrayList 升级版本)
目录
1-5 要改进哪些点?
我们将在前一篇代码的基础上做修改,老样子先列个单子预测下工作内容及可能出现的麻烦。
- 存储数据的data数组大小应该是动态生成的。
- 如果当前存储单元使用达到上限(capacity),那么自动扩容。
- 在create和add操作中会有一些改动,可能需要解bug。
1-6 开始施工
对照单子,一步步来:
- 使用指针保存data数组:
原始代码(ArrayList.h)
#define MAX_SIZE (50)
struct ArrayList{
ElementType data[MAX_SIZE];
int len;
int capacity;
};
改为:
#define DEFAULT_CAPACITY (20)
struct ArrayList {
ElementType *data;
int len;
int capacity;
};
默认初始化大小为20个单位(DEFAULT_CAPACITY )。
- 对应修改create函数,仅多了一个data初始化
List createList(){
List L = (PtrArrayList)malloc(sizeof(struct ArrayList));
if(L == NULL){
printf("Out of memery, create List fail\n");
return NULL;
}
L->data = malloc(sizeof(ElementType) * DEFAULT_CAPACITY);
if(L->data == NULL){
printf( "Out of memery, create list array fail\n" );
free(L);
return NULL;
}
memset(L->data, 0, DEFAULT_CAPACITY);
L->capacity = DEFAULT_CAPACITY;
L->len = 0;
return L;
}
- 对照着修改add函数
int addItem(List list, ElementType item, int pos)
先考虑下正确的逻辑:检测参数合法性–>查看当前容量是否已满–>满了就扩容–>添加元素,因为动态数组,就是要在满仓的情况下扩容后才添加新元素的(其实也是插入,Insert)
(1)检测参数合法性
之前的实现:
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
} else if(list->len == list->capacity){
printf("\nlist is full!\n");
return ERROR;
} else if(pos < 0 || pos >= list->capacity){
printf("\npos out of range!\n");
return ERROR;
}
考虑到pos=list->len=capacity的边界情况不能省去(省了怎么达到扩容条件?),小改下:
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
} else if(pos < 0 || pos > list->capacity ){
printf("\npos out of range!\n");
return ERROR;
}
(2)设置扩容条件和扩容函数growArray()
if(list->len == list->capacity && growArray(list) == ERROR){ //P1
printf("\ngrow list err\n");
return ERROR;
}
int growArray(List list){
int i;
int resize = list->capacity << 1; //P2
ElementType *newData = malloc(sizeof(ElementType) * resize);
if(newData == NULL){
printf("\ngrow array fail!\n");
return ERROR;
}
memset(newData, 0, resize);
ElementType *tmp = list->data; //P3
for(i = 0; i < list->len; i++)
newData[i] = list->data[i];
list->data = newData;
free(tmp);
list->capacity = resize;
return OK;
}
P1:进入add流程后,如果当前元素个数已满,则触发扩容流程;如果扩容流程出错,则立即停止添加元素操作。
P2:设定扩容后的元素数据大小为原始大小的两倍(*2 是因为位移运算方便,右移1位就能解决问题, << 1)
P3:因为我们要用扩容后的新数组替代老数组(本质是替换数组指向,即a0头指针),所以不仅仅要做数据的交换,还要做一个有良心的编码者----清理不用的指针和空间,用tmp暂存原始data地址后,才能安心的覆盖(list->data = newData)和释放(free(tmp))
在尾部追加元素的操作int addItemTail(List list, ElementType item)不变,重写main.c测试初始化和添加两个操作:
int main(int argc, char *argv[]) {
int i = 0;
printf("\n==============test create list && add item===================\n");
List list = createList();
printf("\nlist is empty? %d\n", isEmpty(list));
for(i = 0; i < 20; i++)
addItemTail(list, i);
printf("\ninit data: len=%d\n", list->len);
printList(list);
for(i = 20; i < 40; i++)
addItemTail(list, i);
printf("\ngrow data: len=%d\n", list->len);
printList(list);
addItemTail(list, 100);
printList(list);
addItem(list, 200, 3);
printList(list);
addItem(list, 77, 0);
printList(list);
addItem(list, 99, list->len);
printList(list);
addItem(list, 99, -1);
printList(list);
}
如果这段没有问题,则接下来其他操作就不必改动了,因为动态数组的操作没有特殊的地方。
1-7 完整代码和测试
- ArrayList.h
typedef int ElementType;
#ifndef _Array_List_h
#define _Array_List_h
#define DEFAULT_CAPACITY (20)
#define OK (0)
#define ERROR (-1)
struct ArrayList {
ElementType *data;
int len;
int capacity;
};
typedef struct ArrayList *PtrArrayList;
typedef PtrArrayList List;
List createList();
int growArray(List list);
int addItem(List list, ElementType item, int pos);
int addItemTail(List list, ElementType item);
int findItem(const List list, ElementType item);
int removeItem(List list, ElementType item);
int removeByIndex(List list, int pos);
int setItem(List list, ElementType item, int pos);
ElementType getItem(List list, int pos);
int isEmpty(List list);
void printList(const List list);
#endif
- ArrayList.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "ArrayList.h"
List createList(){
List L = (PtrArrayList)malloc(sizeof(struct ArrayList));
if(L == NULL){
printf("Out of memery, create List fail\n");
return NULL;
}
L->data = malloc(sizeof(ElementType) * DEFAULT_CAPACITY);
if(L->data == NULL){
printf( "Out of memery, create list array fail\n" );
free(L);
return NULL;
}
memset(L->data, 0, DEFAULT_CAPACITY);
L->capacity = DEFAULT_CAPACITY;
L->len = 0;
return L;
}
int growArray(List list){
int i;
int resize = list->capacity << 1;
ElementType *newData = malloc(sizeof(ElementType) * resize);
if(newData == NULL){
printf("\ngrow array fail!\n");
return ERROR;
}
memset(newData, 0, resize);
ElementType *tmp = list->data;
for(i = 0; i < list->len; i++)
newData[i] = list->data[i];
list->data = newData;
free(tmp);
list->capacity = resize;
return OK;
}
int addItem(List list, ElementType item, int pos){
int i;
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
} else if(pos < 0 || pos > list->capacity ){
printf("\npos out of range!\n");
return ERROR;
}
if(list->len == list->capacity && growArray(list) == ERROR){
printf("\ngrow list err\n");
return ERROR;
}
for(i = list->len-1; i >= pos; i--)
list->data[i+1] = list->data[i];
list->data[pos] = item;
list->len++;
return OK;
}
int addItemTail(List list, ElementType item){
return addItem(list, item, list->len);
}
int findItem(const List list, ElementType item){
int i = 0;
if(list == NULL)
return ERROR;
while(i < list->len && list->data[i] != item)
i++;
return i > (list->len - 1) ? ERROR : i;
}
int removeItem(List list, ElementType item){
int i;
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
}
int pos = findItem(list, item);
if(pos != ERROR){
for(i = pos; i < list->len-1; i++)
list->data[i] = list->data[i+1];
list->data[list->len-1] = 0;
list->len--;
return pos;
}
return ERROR;
}
int removeByIndex(List list, int pos){
int i;
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
} else if(pos < 0 || pos >= list->len){
printf("\npos out of range!\n");
return ERROR;
}
for(i = pos; i < list->len-1; i++)
list->data[i] = list->data[i+1];
list->data[list->len-1] = 0;
list->len--;
return OK;
}
int setItem(List list, ElementType item, int pos){
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
} else if(pos < 0 || pos >= list->len){
printf("\npos out of range!\n");
return ERROR;
}
list->data[pos] = item;
return OK;
}
ElementType getItem(List list, int pos){
if(list == NULL){
printf("\nlist is null\n");
return ERROR;
} else if(pos < 0 || pos >= list->len){
printf("\npos out of range!\n");
return ERROR;
}
return list->data[pos];
}
int isEmpty(List list){
if(list == NULL)
return ERROR;
return (list->len==0);
}
void printList(const List list){
int i;
if(list != NULL){
printf("\n[ ");
for(i = 0; i < list->len; i++)
printf("%d ", list->data[i]);
printf("]\n");
}
}
- main.c (不再有MAX_SIZE,改为pos=list->len测试边界条件)
#include <stdio.h>
#include <stdlib.h>
#include "ArrayList.h"
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char *argv[]) {
int i = 0;
printf("\n==============test create list && add item===================\n");
List list = createList();
printf("\nlist is empty? %d\n", isEmpty(list));
for(i = 0; i < 20; i++)
addItemTail(list, i);
printf("\ninit data: len=%d\n", list->len);
printList(list);
for(i = 20; i < 40; i++)
addItemTail(list, i);
printf("\ngrow data: len=%d\n", list->len);
printList(list);
addItemTail(list, 100);
printList(list);
addItem(list, 200, 3);
printList(list);
addItem(list, 77, 0);
printList(list);
addItem(list, 99, list->len);
printList(list);
addItem(list, 99, -1);
printList(list);
printf("\n==============test find item===================\n");
printf("\nfind 77'pos=%d\n", findItem(list, 77));
printf("\nfind 100'pos=%d\n", findItem(list, 100));
printf("\nfind 20'pos=%d\n", findItem(list, 20));
printf("\nfind 99'pos=%d\n", findItem(list, 99));
printf("\nfind 500'pos=%d\n", findItem(list, 500));
printf("\n==============test remove item===================\n");
printf("\nremove 10 =>pos=%d\n", removeItem(list, 10));
printList(list);
printf("\nremove 10 again =>pos=%d\n", removeItem(list, 10));
printList(list);
printf("\nremove 77 =>pos=%d\n", removeItem(list, 77));
printList(list);
printf("\nremove 100 =>pos=%d\n", removeItem(list, 100));
printList(list);
printf("\nremove pos=0, ret=%d\n", removeByIndex(list, 0));
printList(list);
printf("\nremove pos=10, ret=%d\n", removeByIndex(list, 10));
printList(list);
printf("\nremove pos=%d, ret=%d\n", list->len-1, removeByIndex(list, list->len-1));
printList(list);
printf("\nremove pos=%d, ret=%d\n", list->len, removeByIndex(list, list->len));
printList(list);
printf("\n==============test set item===================\n");
printf("\nset -2 pos=%d ~~~ %d \n", 0, setItem(list, 0, -2));
printList(list);
printf("\nset -10 pos=%d ~~~ %d \n", list->len-1, setItem(list, -10, list->len-1));
printList(list);
printf("\nset -100 pos=%d ~~~ %d \n", 7, setItem(list, -100, 7));
printList(list);
printf("\nset -50 pos=%d ~~~ %d \n", -1, setItem(list, -50, -1));
printList(list);
printf("\nset -60 pos=%d ~~~ %d \n", list->len, setItem(list, -60, list->len));
printList(list);
printf("\n==============test get item===================\n");
printf("get pos=>%d = %d\n", 0, getItem(list, 0));
printf("get pos=>%d = %d\n", -1, getItem(list, -1));
printf("get pos=>%d = %d\n", list->len-1, getItem(list, list->len-1));
printf("get pos=>%d = %d\n", list->len, getItem(list, list->len));
printf("\nlist is empty? %d\n", isEmpty(list));
return 0;
}
完工!
1-8 接下来做什么
- 看看还有哪些实用功能可以添加的?
(1)可不可以做一些其他的初始化,譬如
createList(指定大小)?
createList(放另一个list)?
…
这个很实用,实现起来也不难,朋友们可以试着做一做。
(2)按照一定的规则将两个list合并成一个新的list?
List combine(list1,list2)
这个也不难,但凡学过merge-sort的都能完成
(3)其实从已经完成的案例中,可以体会到很多语法细节,例如:
<1> List list本质就是指针,理解为地址也行,给了我们操控数据对象的把柄。
<2> 按位左移/按位右移操作的实用性。
<3> 指针操作的套路,往往需要暂存以下(本系列链表部分还会进一步讨论) - 总感觉编码缺点什么,去看看严版教材,找找异同和灵感吧。
- 其实动态扩容的反向操作也是应该存在的,即:
<1> 如果原本负载较多元素的ArrayList被删除了很多元素,那么就会出现大量空置的空间,十分浪费;
<2> 沿着动态扩容的思路,可以进行缩减空间的操作,这个操作可以与growArray统一为一个函数去实现(reSize(List list, int newCapacity),操控newCapacity即可);
<3> 例如:很多STL中为ArrayList和类似数据结构提供了这个动态reSize的功能,且为了防止元素数量在len==capacity这个点反复横跳,设置了一个出发reSize的阈值,一般可设定为1/4,即当前ArrayList在执行remove类操作后,需要检查当前元素个数(len)是否已经低于现有capacity的1/4,若达到阈值则执行缩减空间的操作。
【注】这是一个非常有意思的话题,对应论文后续如果我能找到的话会与大家分享。
就这样吧,因为本节是代码优化,所以有点短,但总的来说,目前我感觉同学们有疑问的点,大部分都能覆盖了,后续根据大家学习的情况,我们再修改这一帖。