Linux 文件操作

[Pokemon]大猫猫

已于 2023-06-25 16:56:22 修改

阅读量1k

点赞数 22

分类专栏： Linux 文章标签： linux

于 2023-06-23 21:54:09 首次发布

本文链接：https://blog.csdn.net/qq_70793373/article/details/131273059

版权

Linux 专栏收录该内容

10 篇文章 0 订阅

订阅专栏

文章目录

一、task_struct 和 file 的关系
二、文件操作的系统调用
三、进程默认打开的三个文件
四、文件重定向
五、Linux 下一切皆文件
六、缓冲区

文件是在磁盘上创建出来的，当我们想进行文件操作时，根据冯诺依曼体系结构，CPU 只和内存交互，为了可以进行文件操作，应将磁盘中的文件先加载到内存中，加载什么呢？至少要加载文件的某些属性

一个用户可能同时会操作多个文件，并且存在多个用户，因此在某一时间内会存在大量的内存文件，为了管理这些内存文件，需要进行先描述，在组织，操作系统为这些内存文件创建了结构体 file，file 中的属性便从磁盘中加载而来，结构体 file 中还可以存在 struct file* 属性，于是便可以将这些 file 结构体链接起来组成某种数据结构，这样对内存文件的管理，也就转换成了对存储数据类型为结构体 file 的某种数据结构的增删查改

进行文件操作时，用户不是自己操作，而是通过进程对文件操作，因此文件操作的本质其实就是进程的 task_struct 对文件结构体 file 的操作

一、task_struct 和 file 的关系

在这里插入图片描述

task_struct 和 file 的关系中采用了 files_struct 作为中间结构，file_struct 中包含了一个指针数组 fd_array，数组元素存储的是进程加载到内存文件的 file 地址

进程在加载一个文件时，首先操作系统会创建文件结构体 file(如果 file 还未加载到内存)，然后会在进程指向的 file_struct 中的 fd_array 中找到一个下标最小的且没有被使用的位置填充该 file 的地址，进程加载文件完成后，操作系统为了能让进程还可以找到该 file，操作系统给进程提供是 file 地址在 fd_array 中的下标，这个下标我们称之为文件描述符，fd_array 也就称作文件描述符表

二、文件操作的系统调用

打开文件：在进程的文件描述符表中填充对应的 file 地址

系统调用 open，头文件 sys/types.h、sys/stat.h、fcntl.h

int open(const char *pathname, int flags);
int open(const char *pathname, int flags, mode_t mode);

返回值：成功返回文件描述符，失败返回 -1

参数：

pathname：文件路径 + 文件名 + 文件后缀
flags：位图，传递多个标志位时，标志位之间用 | 连接
mode：新建文件的权限，受进程的 umask 影响

常用的标志位：

O_RDONLY：以读的方式打开文件，从文件开始处读取
O_WRONLY：以写的方式打开文件，从文件开始处写入，不会清空文件
O_APPEND：需要传递以写的方式打开文件，从文件末尾处写入
O_TRUNC：需要传递以写的方式打开文件，将文件清空
O_CREAT：如果文件不存在，则会创建文件，如果传递这个标志，最好也传递 mode 参数

系统调用 umask，头文件 sys/typse.h、sys/stat.h，设置当前进程的 umask 为 mask，该系统调用总是成功，返回设置之前的 umask

// mode_t 类型就是 unsigned int 的 typedef
mode_t umask(mode_t mask);

系统调用 close/ read / write，头文件 unistd.h

// 关闭文件：在进程的文件描述符表中删除对应的 file 地址
// 成功返回 0，失败返回 -1
int close(int fd);

// 向文件描述符 fd 对应的文件最多读取 count 个字节到 buf 中，文件偏移量会增加文件读取到的字节数
// 成功返回读取到的字节数，0 表示未读取到任何内容，表示已经读取到文件结尾，错误返回 -1
ssize_t read(int fd, void *buf, size_t count);

// 向文件描述符 fd 对应的文件最多写入 count 个 buf 中的字符
// 成功返回写入的字节数，0 表示未写入任何内容，错误返回 -1
ssize_t write(int fd, const void *buf, size_t count);

#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main()
{
    // O_WRONLY 从文件开始处写入，不会清空文件
    // O_APPEND 从文件末尾处写入 
    // O_RDWR   从文件开始处读取
    // int fd = open("log.txt", O_WRONLY | O_CREAT, 0666);
    // int fd = open("log.txt", O_WRONLY | O_CREAT | O_APPEND, 0666);
    int fd = open("log.txt", O_RDONLY);
    if (fd == -1)
    {
        printf("open error : %s\n", strerror(errno));
        exit(1);    
    }

    // R
    char buf[128];
    while (1)
    {
        // int count = read(fd, buf, sizeof(buf) - 1);
        // if (count <= 0) break;

        // buf[count] = '\0';
        // printf("%s", buf);
        
        // 按行读取
        char ch = 1;
        int i = 0;
        for (; read(fd, &ch, 1) != 0 && ch != '\n'; ++i)
            buf[i] = ch; 

        if (i == 0) break;

        buf[i] = '\0';
        printf("%s\n", buf);
        sleep(1);
    }
    
    // W
    // char buf[128];
    // int cnt = 10;
    // while (cnt)
    // {
    //     snprintf(buf, sizeof(buf), "%s, %d\n", "hello world", cnt);
    //     ssize_t ret = write(fd, buf, strlen(buf));
    //     assert(ret != (ssize_t)-1);
    //     (void)ret;
    //     cnt--;
    // }

	// 关闭文件
    close(fd);

    return 0;
}

三、进程默认打开的三个文件

在这里插入图片描述

进程默认打开的三个文件：

0：标准输入，对应于键盘文件
1：标准输出，对应于显示器文件
2：标准错误，对应于显示器文件

#include <stdio.h>
#include <unistd.h>

int main()
{
    // 从键盘文件中读取
    while (1)
    {
        char buf[128];
        char ch;
        int i = 0;
        for (; read(0, &ch, 1) != 0 && ch != '\n'; ++i)
            buf[i] = ch;

        if (i == 0) break;

        buf[i] = '\0';
        printf("从键盘文件中按行读取 : %s\n", buf);
    }

    return 0;
}

在这里插入图片描述

#include <stdio.h>
#include <string.h>
#include <unistd.h>

int main()
{
	// 输出到显示器文件
    int cnt = 5;
    while (cnt)
    {
        char buf[128];
        snprintf(buf, sizeof(buf), "写入到 fd 1 : %s, %d\n", "hello world", cnt);
        write(1, buf, strlen(buf));
        
        snprintf(buf, sizeof(buf), "写入到 fd 2 : %s, %d\n", "hello world", cnt);
        write(2, buf, strlen(buf));

        cnt--;

        printf("\n");
    }
}

在这里插入图片描述

在进程中打开的第一个文件，返回的文件描述符是 3

#include <stdio.h>
#include <assert.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main()
{
    // 打开文件
    int fd = open("log.txt", O_WRONLY | O_CREAT, 0666);
    assert(fd != -1);

    printf("fd : %d\n", fd);

    // 关闭文件
    close(fd);

    return 0;
}

在这里插入图片描述

四、文件重定向

进程打开新文件时，返回的文件描述符为：文件描述符表中最小并且没有被使用的下标

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main()
{
    int fd1 = open("file.txt", O_WRONLY | O_CREAT, 0666);
    int fd2 = open("file.txt", O_WRONLY | O_CREAT, 0666);
    int fd3 = open("file.txt", O_WRONLY | O_CREAT, 0666);
    int fd4 = open("file.txt", O_WRONLY | O_CREAT, 0666);
    int fd5 = open("file.txt", O_WRONLY | O_CREAT, 0666);

    printf("%d %d %d %d %d\n", fd1, fd2, fd3, fd4, fd5);

    close(fd3);

    int newFd = open("file.txt", O_WRONLY | O_CREAT, 0666);

    printf("%d\n", newFd);

    close(fd1);
    close(fd2);
    close(fd4);
    close(fd5);
    close(newFd);

	return 0;
}

在这里插入图片描述

重定向的原理：更改进程的文件描述符表中特定下标指向的文件

在这里插入图片描述

实现输出重定向：正常信息输出到 log.normal，异常信息输出到 log.error

#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

int main()
{
    // 关闭标准输出，打开 log.normal 文件, 此时 log.normal 的文件描述符 为 1
    close(1);
    int normalfd = open("log.normal", O_WRONLY | O_CREAT, 0666);

    // 关闭标准错误，打开 log.error 文件，此时 log.error 的文件描述符为 2 
    close(2);
    int errorfd = open("log.error", O_WRONLY | O_CREAT, 0666);
    
    // 写入到标准输出，即写入到 1 号文件描述符，即写入到 log.normal 文件
    printf("log.normal fd : %d\n", normalfd);
    printf("log.error  fd : %d\n", errorfd);

    // 写入到标准错误，即写入到 2 号文件描述符，即写入到 log.error 文件
    // 以读方式打开一个不存在的文件，模拟错误信息
    int fd = open("abc.txt", O_RDONLY);
    if (fd == -1) perror("open");

	return 0;
}

在这里插入图片描述

系统调用 dup2，头文件 unistd.h

// 将 oldfd 拷贝给 newfd，必要时先关闭 newfd
// 即 fd_arrray[newfd] = fd_array[oldfd]
// 成功返回新的文件描述符，失败返回 -1
int dup2(int oldfd, int newfd);

在这里插入图片描述

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

int main()
{
    int fd = open("log.txt", O_WRONLY | O_CREAT, 0666);

    // 重定向 fd_array[1] = fd_array[fd]
    int ret_fd = dup2(fd, 1);

    char buf[128];
    snprintf(buf, sizeof(buf), "dup2 返回值 : %d\n", ret_fd);
    write(1, buf, strlen(buf));
    write(fd, buf, strlen(buf));

    close(fd);
    
    return 0;
}

在这里插入图片描述

五、Linux 下一切皆文件

通过 file 结构体用户可以像文件一样看待 Linux 中的硬件
在这里插入图片描述

六、缓冲区

在调用向文件写入数据的库函数时，数据并不是立刻写入到文件，而是先写到语言提供的缓冲区中

#include <stdio.h>
#include <unistd.h>

int main()
{
    // 写入到显示器文件
    for (int i = 0; i < 3; ++i)
    {
        printf("you can see me? ");
        sleep(1);
    }
    printf("\n");

    return 0;
}

程序并不会一秒一秒的显示 you can see me?，而是 3 秒后全部显示
在这里插入图片描述

缓冲区的作用是为了防止频繁的调用系统调用而花费太多时间，最好可以尽量少的调用系统调用就可以将所有数据写入到 file 的缓冲区中
在这里插入图片描述

对于不同的文件，语言的缓冲区采用的刷新策略有三种：

无缓冲：不提供缓冲区，直接调用系统调用写到 file 的缓冲区
行缓冲：遇到 \n 时，调用系统调用写到 file 的缓冲区
全缓冲：缓冲区写满时，调用系统调用写到 file 的缓冲区

语言中对于显示器文件采用的是行缓冲，普通文件采用的是全缓冲，而 file 的缓冲区的刷新策略是由操作系统自主决定的

#include <stdio.h>
#include <string.h>
#include <unistd.h>

int main()
{
    // C语言库函数
    fprintf(stdout, "hello fprintf\n");
    
    // 系统调用
    const char* msg = "hello write\n";
    write(1, msg, strlen(msg));

    // 创建子进程
    fork();

    return 0;
}

重定向前后输出的数据不一样
在这里插入图片描述

输出重定向前，fprintf 写入到显示器文件中，此时采用的是行缓冲，因此在 fork 之前，缓冲区的数据已经被刷新到 file 的缓冲区了
输出重定向后，fprintf 写入到普通文件中，此时采用的是全缓冲，因此在 fork 之前，缓冲区的数据并没有刷新，于是 fork 之后，父子进程结束时都会刷新缓冲区也就造成了这种现象
write 直接写入到 file 的缓冲区中，因此 fork 和输出重定向对 write 不受影响