C语言学习第015课——文件操作（二）

最新推荐文章于 2023-11-08 15:58:19 发布

下山打老虎◆

最新推荐文章于 2023-11-08 15:58:19 发布

阅读量239

点赞数

分类专栏： C语言基础文章标签： c语言

本文链接：https://blog.csdn.net/sinat_29174099/article/details/109722815

版权

C语言基础专栏收录该内容

16 篇文章 1 订阅

订阅专栏

本文讨论了使用stat函数获取文件大小，根据文件大小动态调整缓冲区大小，提高大文件拷贝效率，并介绍了文件操作如fseek、ftell和重命名的实践。还涉及内存管理优化，如使用索引加速单词查找，提升程序性能。

摘要由CSDN通过智能技术生成

上一篇博文最后的内容，使用1024字节也就是1KB的缓冲区，
去拷贝了20M的文件，循环次数有点多，可以通过修改SIZE的值，改成1024*1024或者8*1024*1024也就是8M，循环次数就小多了
遇到大文件，就用8M的缓冲区，遇到小文件就用1KB的缓冲区就行
那么如何获取到文件的大小呢？

获取文件状态

#include <sys/types.h>
#include <sys/stat.h>
int stat(const char *path, struct stat *buf);
功能：获取文件状态信息
参数：
path：文件名
buf：保存文件信息的结构体
返回值：
成功：0
失败-1

struct stat {
	dev_t         st_dev;         文件的设备编号
	ino_t         st_ino;         节点
	mode_t        st_mode;   	  文件的类型和存取的权限
	nlink_t       st_nlink;       连到该文件的硬连接数目，刚建立的文件值为1
	uid_t         st_uid;         用户ID
	gid_t         st_gid;         组ID
	dev_t         st_rdev;        (设备类型)若此文件为设备文件，则为其设备编号
	off_t         st_size;        文件字节数(文件大小)
	unsigned long st_blksize;     块大小(文件系统的I/O 缓冲区大小)
	unsigned long st_blocks;      块数
	time_t        st_atime;       最后一次访问时间
	time_t        st_mtime;      最后一次修改时间
	time_t        st_ctime;      最后一次改变时间(指属性)
};

我们需要使用到的是st_size，简单的使用一下stat函数

#include<stdio.h>
#include<sys/types.h>
#include<sys/stat.h>

int main(void){
    struct stat st;
    stat("hello.txt",&st);
    
    printf("%d\n",(int)st.st_size);
}

运行结果：
在这里插入图片描述
将stat函数使用到文件拷贝中，如果文件大于8M，就开辟8M的缓冲区进行拷贝，如果文件小于8M，就开辟文件大小的缓冲区进行拷贝

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<sys/types.h>
#include<sys/stat.h>

#define SIZE 8*1024*1024


int main(int argc,char* argv[]){
    FILE* fp1 = fopen(argv[1],"rb");
    FILE* fp2 = fopen(argv[2],"wb");
    if(!fp1 || !fp2){
        printf("操作失败\n");
        return -1;
    }
    if(argc<3){
        printf("缺少参数\n");
        return -2;
    }
    //开始读写
    struct stat st;					获取文件状态，放入到st结构体中
    stat(argv[1],&st);
    char* ptemp = NULL;
    int size = 0;
    if(st.st_size>SIZE){			根据文件大小，分配不同大小的内存空间
        ptemp = (char*)malloc(sizeof(char)*SIZE);
        size = SIZE;				不同大小的内存空间相应的函数参数也会发生改变
    }else{
        ptemp = (char*)malloc(sizeof(char)*st.st_size+10);
        size = st.st_size+10;			小文件多预留10个字节的缓冲区，为了安全起见
    }
    printf("开辟的缓冲区大小为%d字节\n",size);
    int count = 0;
    while(!feof(fp1)){
        memset(ptemp,0,size);
        count = fread(ptemp,1,size,fp1);
        fwrite(ptemp,1,count,fp2);
    }
    free(ptemp);
    fclose(fp1);
    fclose(fp2);
    printf("拷贝成功\n");
    return 0;
}

执行命令
在这里插入图片描述

运行结果
在这里插入图片描述

文件的随机读写

文件光标流的偏移 fseek

#include <stdio.h>
int fseek(FILE *stream, long offset, int whence);
功能：移动文件流（文件光标）的读写位置。
参数：
	stream：已经打开的文件指针
	offset：根据whence来移动的位移数（偏移量），可以是正数，也可以负数，如果正数，则相对于whence往右移动，如果是负数，则相对于whence往左移动。如果向前移动的字节数超过了文件开头则出错返回，如果向后移动的字节数超过了文件末尾，再次写入时将增大文件尺寸。
	whence：其取值如下：
		SEEK_SET：从文件开头移动offset个字节
		SEEK_CUR：从当前位置移动offset个字节
		SEEK_END：从文件末尾移动offset个字节
返回值：
	成功：0
	失败：-1

以下代码，是使用fgets读取文本文件两行数据

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int main(void){
    FILE* fp = fopen("hello.txt","r");
    char str[30];
    memset(str,0,30);
    fgets(str,30,fp);
    printf("%s",str);

    memset(str,0,30);
    fgets(str,30,fp);
    printf("%s",str);

    return 0;
}

现在想读完第二行之后，再读一次第二行，使用fseek函数实现，
我们知道，fseek函数是给光标进行偏移的，第二行内容“打不过我吧”5个汉字加一个\r和\n（Windows平台下的文件换行都是\r\n，Linux不是）
总共就是12个字节，所以使用fseek函数，先从当前位置往前偏移上12个字节试试

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int main(void){
    FILE* fp = fopen("hello.txt","r");
    char str[30];
    memset(str,0,30);
    fgets(str,30,fp);
    printf("%s",str);

    memset(str,0,30);
    fgets(str,30,fp);
    printf("%s",str);

    fseek(fp,-12,SEEK_CUR);			从当前光标位置，向左偏移12个字节 向右偏移时 使用正数
    memset(str,0,30);
    fgets(str,30,fp);
    printf("%s",str);
    return 0;
}

运行结果：
在这里插入图片描述
探讨小问题
如果在读写过程中，想要移动光标位置使用fseek，记得打开文件的时候使用

FILE* fp = fopen("hello.txt","r+");

"r+"意思是以读写方式打开文件，但是不新建文件
参数为“w+”的也是以读写方式打开文件，但是会新建文件，
打开文件之后，光标位置可以使用fseek随便修改，但是有一种方式
是以a+方式打开文件，意为追加内容
这种方式打开文件，光标位置是0，而不是文件末尾，它的运行原理为，将文件中原有的内容放在一个缓冲区中存起来，再新建一个缓冲区，追加的内容会放在这个新的缓冲区中，两者互不影响，当提交修改的时候，会将整个新缓冲区的内容追加到原有内容的缓冲区中。
所以使用“a+”方式打开文件，使用fseek函数是没有效果的

获取文件光标的位置 ftell

#include <stdio.h>
long ftell(FILE *stream);
功能：获取文件流（文件光标）的读写位置。
参数：
	stream：已经打开的文件指针
返回值：
	成功：当前文件流（文件光标）的读写位置
	失败：-1

简单的使用一下：

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int main(void){
    FILE* fp = fopen("hello.txt","r");
    char str[30];
    long pos = ftell(fp);				第一次获取光标位置
    printf("pos = %ld\n",pos);

    memset(str,0,30);
    fgets(str,30,fp);
    printf("%s",str);

    pos = ftell(fp);					读取一行之后 再一次获取光标位置
    printf("pos = %ld\n",pos);
    return 0;
}

运行结果：
在这里插入图片描述

将光标位置移动到文件开头 rewind

#include <stdio.h>
void rewind(FILE *stream);
功能：把文件流（文件光标）的读写位置移动到文件开头。
参数：
	stream：已经打开的文件指针
返回值：
	无返回值

简单的使用一下

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int main(void){
    FILE* fp = fopen("hello.txt","r");
    char str[30];
    long pos = ftell(fp);		获取一开始的光标位置
    printf("pos = %ld\n",pos);

    memset(str,0,30);
    fgets(str,30,fp);			读取一行数据
    printf("%s",str);

    pos = ftell(fp);			再获取一次光标位置
    printf("pos = %ld\n",pos);

    rewind(fp);					将光标移动到文件开头
    pos = ftell(fp);
    printf("pos = %ld\n",pos);	打印一次光标位置

    memset(str,0,30);			从当前光标位置再读取一行
    fgets(str,30,fp);
    printf("%s",str);
    return 0;
}

运行结果：
在这里插入图片描述

删除重命名文件

删除文件

#include <stdio.h>
int remove(const char *pathname);
功能：删除文件
参数：
	pathname：文件名
返回值：
	成功：0
	失败：-1

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

int main(void){
    int value = remove("hello.txt");
    if(value==0){
        printf("删除成功\n");
    }else{
        printf("删除失败\n");
    }
    return 0;
}

重命名文件剪切文件

#include <stdio.h>
int rename(const char *oldpath, const char *newpath);
功能：把oldpath的文件名改为newpath
参数：
oldpath：旧文件名
newpath：新文件名
返回值：
成功：0
失败： - 1

int main(void){
    rename("hello.txt","world.txt");		将hello.txt文件改名为world.txt
}

在文件名称前面加上路径，即可改变文件的位置，是为剪切

快易典内核代码

项目需求：输入一个英文单词，能立马出现汉语解释
现有一个英语词典的TXT文件
在这里插入图片描述
格式都是单词三个空格解释
于是思路很明显，
1：创建一个结构体，里面一个单词，一个翻译，通过文件行读取，将单词和翻译存储进堆内存中:
2：获取键盘输入单词，从堆内存中遍历查找，返回翻译
3：释放资源
首先，先定义一个结构体字典，

#ifndef __DICT_H__
#define __DICT_H__
typedef struct DICT{
    char* word;
    char* trans;
}dict;

#endif

先定义一个函数GetWord()，读取dict.txt文件，将单词和翻译分别存进结构体中，不需要参数，返回值为读取到单词的总数

#define COUNT 7989					//单词表中总共有7989行，所以理论上有7989个单词，
dict* list = NULL;					//将字典结构体定义为全局变量，可以全局调用
int GetWord(){
    FILE* fp = fopen("dict.txt","r");
    if(!fp){
        printf("加载单词库失败\n");
        return -1;
    }
    list = (dict*)malloc(sizeof(dict)*COUNT);	//给单词表结构体开辟空间
    char* temp = (char*)malloc(sizeof(char)*1024);		//读文件的字符串缓冲区temp
    int i = 0;			//结构体的角标
    while(!feof(fp)){
        memset(temp,0,1024);
        fgets(temp,1024,fp);		//读取一行
        if(!strlen(temp))
            break;
        list[i].word = (char*)malloc(sizeof(char)*1024);	//给结构体的单词分配空间
        list[i].trans = (char*)malloc(sizeof(char)*1024);	//给结构体的翻译分配空间
        memset(list[i].word,0,1024);memset(list[i].trans,0,1024);	//内容初始化为0
        sscanf(temp,"%s   %s",list[i].word,list[i].trans); 		//拆解temp内容，分配给了单词和翻译
        i++;
    }
    for(int i = 0;i<20;i++){
        printf("%s   %s\n",list[i].word,list[i].trans);
    }
    free(temp);
    fclose(fp);
    return i;
}

在main函数中，单独调用一下GetWord函数，查看一下前20行的打印

int main()
{
    GetWord();
    return 0;
}

运行结果：
在这里插入图片描述
然后开始写查找单词的函数SearchWord，范围值为int类型，根据返回值判断是否查找成功，参数为指针类型的单词和翻译

int SearchWord(char* word,char* trans){
    if(!word || !trans){
        printf("输入发生异常\n");
        return -1;
    }
    for(int i = 0;i<COUNT;i++){
        if(!strcmp(word,list[i].word)){		//如果参数的单词和 单词表里面的单词一致
            strcpy(trans,list[i].trans);		//将单词表中的翻译，复制给参数中的翻译
            return 0;
        }
    }
    return 1;
}

查完需要回收一下内存，先把回收资源函数写一下，无返回值无参数void DestorySpace()

void DestorySpace(){
    if(!list)
        return;
    for(int i=0;i<COUNT;i++){
        free(list[i].word);
        free(list[i].trans);
    }
    free(list);
    list = NULL;
}
*/

以上，函数代码就写好了，在主函数中一调用

int main()
{
    GetWord();
    char* word = (char*)malloc(sizeof(char)*1024);
    char* trans = (char*)malloc(sizeof(char)*1024);
    while(1){
        memset(word,0,1024);
        memset(trans,0,1024);
        scanf("%s",word);
        if(!strcmp(word,"comm=exit")){
            break;
        }
        if(!SearchWord(word,trans)){
            printf("%s\n",trans);
        }else{
            printf("未找到该单词\n");
        }
    }
    free(word);
    free(trans);
    DestorySpace();
    return 0;
}

运行结果：
在这里插入图片描述
程序写到这里，功能算是实现了，但是有没有办法优化一下呢？或者说，程序哪里有不够合理的地方呢？
整个程序是将7800多个单词加载到内存中，如果我要找z开头的单词，需要在循环中走7800多次，有点太多次了，可以不可以优化一下呢？
我们可以考虑给单词表加一个索引
单词表的特点是单词都是按照字母顺序a b c d下来的，我们可以新定义个结构体数组，一共26个元素，结构体内容为起始位置和结束为止，也就是说，记录下a b c 等字母开头第一个单词的位置和最后一个单词的位置，查找单词的时候，把第一个字母也传进来，直接再这个范围内寻找，循环次数就会少很多
首先，新建一个结构体，内容为开始位置和结束位置

typedef struct INDEX{
    int start;
    int end;
}index;

在GetWord函数中，读取文件的时候，顺便将索引也加进去

index* pos = NULL;		//全局变量索引

int GetWord(){
    FILE* fp = fopen("dict.txt","r");
    if(!fp){
        printf("加载单词库失败\n");
        return -1;
    }
    list = (dict*)malloc(sizeof(dict)*COUNT);
    pos = (index*)malloc(sizeof(index)*26);			//给索引数组分配地址 26个结构体
    char* temp = (char*)malloc(sizeof(char)*1024);
    int i = 0;
    int posindex = 0;					//索引下标
    pos[posindex].start = 0;			//初始化第一个字母索引的值 都为0
    pos[posindex].end = 0;
    int flag = 'a';						//根据这个flag来添加索引
    while(!feof(fp)){
        memset(temp,0,1024);
        fgets(temp,1024,fp);//#a\n0
        if(!strlen(temp))
            break;
        list[i].word = (char*)malloc(sizeof(char)*1024);
        list[i].trans = (char*)malloc(sizeof(char)*1024);
        memset(list[i].word,0,1024);memset(list[i].trans,0,1024);
        sscanf(temp,"%s   %s",list[i].word,list[i].trans);
        
        if(list[i].word[0]==flag){		//如果单词的第一个字母是a
            pos[posindex].end++;		//a开头的单词的索引的结束值+1
        }else{
            posindex++;					//索引下标值+1 以后开始算b开头的
            flag++;						//a变成b
            pos[posindex].start = pos[posindex-1].end+1;	//b开头的单词，索引开头初始化为 a的结尾+1
            pos[posindex].end = pos[posindex-1].end+1;//b开头的单词，索引结束标志初始化为  a的结尾+1
        }
        i++;
    }
    //简单打印一下 每个字母的索引值范围
    for(int i = 'a';i<='z';i++){
        printf("%c  %d-%d\n",i,pos[i-'a'].start,pos[i-'a'].end);
    }
    free(temp);
    fclose(fp);
    return i;
}

查找的时候就可以根据索引值来查找单词了

int SearchWord(char* word,char* trans,char ch){
    if(!word || !trans){
        printf("输入发生异常\n");
        return -1;
    }
    int start = pos[ch-'a'].start;	//根据单词的第一个字母，获取到索引值
    int end = pos[ch-'a'].end;
    int count = 0;			//查看一下每个单词查找多少次
    printf("查询索引在%d-%d\n",start,end);
    for(int i = start;i<end;i++){//将索引值带到循环中
        count++;
        if(!strcmp(word,list[i].word)){
            strcpy(trans,list[i].trans);
            printf("查询次数=%d\n",count);
            return 0;
        }
    }

    return 1;
}

关闭资源

void DestorySpace(){
    if(!list)
        return;
    if(!pos){
        return;
    }
    for(int i=0;i<COUNT;i++){
        free(list[i].word);
        free(list[i].trans);
    }
    free(list);
    free(pos);
    list = NULL;
    pos = NULL;
}

运行结果：
在这里插入图片描述
下面是完整代码：
dict.h

#ifndef __DICT_H__
#define __DICT_H__
typedef struct DICT{
    char* word;
    char* trans;
}dict;

typedef struct INDEX{
    int start;
    int end;
}index;

int GetWord();
int SearchWord(char* word,char* trans,char ch);
void DestorySpace();

#endif // __DICT_H__

main.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "dict.h"

#define COUNT 7989

dict* list = NULL;
index* pos = NULL;
int main()
{
    GetWord();
    char* word = (char*)malloc(sizeof(char)*1024);
    char* trans = (char*)malloc(sizeof(char)*1024);
    while(1){
        memset(word,0,1024);
        memset(trans,0,1024);
        scanf("%s",word);
        if(!strcmp(word,"comm=exit")){
            break;
        }
        if(!SearchWord(word,trans,word[0])){
            printf("%s\n",trans);
        }else{
            printf("未找到该单词\n");
        }
    }
    free(word);
    free(trans);
    DestorySpace();

    return 0;
}
int GetWord(){
    FILE* fp = fopen("dict.txt","r");
    if(!fp){
        printf("加载单词库失败\n");
        return -1;
    }
    list = (dict*)malloc(sizeof(dict)*COUNT);
    pos = (index*)malloc(sizeof(index)*26);
    char* temp = (char*)malloc(sizeof(char)*1024);
    int i = 0;
    int posindex = 0;
    pos[posindex].start = 0;
    pos[posindex].end = 0;
    int flag = 'a';
    while(!feof(fp)){
        memset(temp,0,1024);
        fgets(temp,1024,fp);//#a\n0
        if(!strlen(temp))
            break;
        list[i].word = (char*)malloc(sizeof(char)*1024);
        list[i].trans = (char*)malloc(sizeof(char)*1024);
        memset(list[i].word,0,1024);memset(list[i].trans,0,1024);
        sscanf(temp,"%s   %s",list[i].word,list[i].trans);

        if(list[i].word[0]==flag){
            pos[posindex].end++;
        }else{
            posindex++;
            flag++;
            pos[posindex].start = pos[posindex-1].end+1;
            pos[posindex].end = pos[posindex-1].end+1;
        }
        i++;
    }
    for(int i = 'a';i<='z';i++){
        printf("%c  %d-%d\n",i,pos[i-'a'].start,pos[i-'a'].end);
    }
    free(temp);
    fclose(fp);
    return i;
}

int SearchWord(char* word,char* trans,char ch){
    if(!word || !trans){
        printf("输入发生异常\n");
        return -1;
    }
    int start = pos[ch-'a'].start;
    int end = pos[ch-'a'].end;
    int count = 0;
    printf("查询索引在%d-%d\n",start,end);
    for(int i = start;i<end;i++){
        count++;
        if(!strcmp(word,list[i].word)){
            strcpy(trans,list[i].trans);
            printf("查询次数=%d\n",count);
            return 0;
        }
    }

    return 1;
}

void DestorySpace(){
    if(!list)
        return;
    if(!pos){
        return;
    }
    for(int i=0;i<COUNT;i++){
        free(list[i].word);
        free(list[i].trans);
    }
    free(list);
    free(pos);
    list = NULL;
    pos = NULL;
}