习题 4.6
编写类似cp(1)的程序,它复制包含空洞的文件,但是不将字节0写到输出文件中去。
- 空洞文件的特性:“在UNIX文件操作中,文件位移量可以大于文件的当前长度,在这种情况下,对该文件的下一次写将延长该文件,并在文件中构成一个空洞,这一点是允许的。位于文件中但没有写过的字节都被设为 0。” 特点表述为:offset > 实际文件大小。
- 制作空洞文件:在linux下,利用lseek人为的修改offset可以获得一个空洞文件。具体操作方法为,在写入数据的过程中使用lseek移动文件读写指针,使之后移但不应写入数据。使得中间存在不含数据但是占用大小的部分。
- 生成空洞文件
/**
* @file createhole.c
* @author wangs7
* @brief 生成64k的空洞文件 空洞块大小分别为 holesize[i]
* @version 0.1
* @date 2021-04-22
*
* @copyright Copyright (c) 2021
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <limits.h>
#include <dirent.h>
#include <fcntl.h>
int holesize[]={1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32*1024};
int filesize = 64*1024;
int main()
{
int i = 0;
int count = 0;
int ret = 0, fd = 0;
char filename[32]={0};
unsigned char buf[32*1024]={0};
memset(buf, 1, 32*1024);
/* 每个文件交叉写入 blocksize[i] 的空洞和数据 直到写满 64k*/
for (; i< sizeof(holesize)/ sizeof(int); ++i) {
count = 0;
memset(filename, 0, 32);
sprintf(filename, "%s%d", "holesize", holesize[i]);
fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
if(fd < 0) {
fprintf(stderr, "open file fail\n");
exit(1);
}
while(count < filesize) {
ret = lseek(fd, holesize[i], SEEK_CUR);
if(ret < 0) {
fprintf(stderr, "lseek fail\n");
exit(1);
}
int remain = holesize[i];
while(remain) {
ret = write(fd, buf, remain);
if(ret < 0 ) {
fprintf(stderr, "write fail\n");
exit(1);
}
remain -= ret;
}
count += holesize[i] * 2;
}
close(fd);
}
return 0;
}
运行结果
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ ll -s holesize*
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize1
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize1024
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize128
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize16
32 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize16384
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize2
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize2048
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize256
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize32
32 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize32768
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize4
36 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize4096
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize512
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize64
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize8
32 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize8192
- mycp
/**
* @file mycp.c
* @author wangs7
* @brief Write a program similar to CP (1), which copies the empty file,
* but does not write byte 0 to the output file.
* @version 0.1
* @date 2021-04-22
*
* @copyright Copyright (c) 2021
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <limits.h>
#include <dirent.h>
#include <fcntl.h>
#include <errno.h>
/* 读函数 */
ssize_t read_ex(int fd, void *buf, size_t nbyte){
size_t read_remain = nbyte;
unsigned char *read_start = (unsigned char*)buf;
ssize_t read_num = -1;
ssize_t total_num = 0;
while(read_remain) {
read_num = read(fd, read_start, read_remain);
if(-1 == read_num){
return -1;
}
else if(0 == read_num){
break;
}
else{
read_remain -= read_num;
read_start += read_num;
total_num += read_num;
}
}
return total_num;
}
/* 写函数 */
ssize_t write_ex(int fd, const void *buf, size_t nbyte){
size_t write_remain = nbyte;
unsigned char *write_start = (unsigned char*)buf;
ssize_t write_num = -1;
ssize_t total_num = 0;
while(write_remain) {
write_num = write(fd, write_start, write_remain);
if(-1 == write_num){
return -1;
}
else{
write_remain -= write_num;
write_start += write_num;
total_num += write_num;
}
}
return total_num;
}
int my_cp(const char *sf, const char *df)
{
int fd1 = -1, fd2 = -1;
int rev = -1;
unsigned char *buffer = NULL, *buffer_zero = NULL;
long pagesize = 0;
long long blocks, blksize, size;
int read_num, write_num, write_remain, have_holes = 0;
struct stat st;
fd1 = open(sf, O_RDONLY);
if(-1 == fd1){
fprintf(stderr, "open file1 faild");
goto err;
}
if(fstat(fd1, &st) !=0) {
fprintf(stderr, "fstat: ");
goto err;
}
else{
#ifdef _SC_PAGESIZE
pagesize = sysconf(_SC_PAGESIZE);
if (pagesize < 0) {
if (errno != 0) {
if (errno == EINVAL) {
fprintf(stderr, " (not supported)\n");
pagesize = st.st_blksize;
}
else {
fprintf(stderr, "sysconf error\n");
goto err;
}
} else {
fprintf(stderr, " (no limit)\n");
pagesize = st.st_blksize;
}
}
printf("pagesize: %ld\n", pagesize);
#else
pagesize = st.st_blksize;
#endif
blocks = st.st_blocks;
blksize = st.st_blksize;
size = st.st_size;
printf("st.st_blocks: %lld\n", blocks);
printf("st.st_blksize: %lld\n", blksize);
printf("st.st_size: %lld\n", size);
/* 块大小512,在不同平台上可能不兼容 Ubuntu上使用命令 sudo fdisk -l /dev/sda1 查看*/
/* && 后边的内容说明 : */
/* > 右侧为内存中实际读到的文件大小 满页页数+一个或不加不满页 再乘页大小 */
/* > 左侧为磁盘块大小 乘 磁盘上占用块数量 */
/* 比对即可知文件是否含有空洞 */
/* 空洞必须从一页的起始位置开始计算,并且等于或超过pagesize,才不占用实际磁盘空间 */
if(S_ISREG(st.st_mode) && (size / pagesize + (size%pagesize?1:0)) * pagesize > 512 * blocks) {
have_holes = 1;
printf("%s is a sparse-block file!\n", sf);
} else{
have_holes = 0;
printf("%s is not a sparse-block file!\n", sf);
}
}
/* 以一页的大小读写 */
buffer = malloc(pagesize);
buffer_zero = malloc(pagesize);
if(buffer == NULL || buffer_zero == NULL) {
perror ("malloc fail");
goto err;
}
memset(buffer, '\0', pagesize);
memset(buffer_zero, '\0', pagesize);
fd2 = open(df, O_WRONLY | O_CREAT | O_TRUNC, S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH);
if (-1 == fd2) {
fprintf(stderr, "open file2 faild\n");
goto err;
}
/* 一页一页读写 */
while((read_num = read_ex(fd1, buffer, pagesize)) > 0) {
/* 读取到空洞 */
if(have_holes && !memcmp(buffer_zero, buffer, read_num)){
if(-1 == lseek(fd2, read_num, SEEK_CUR)){
fprintf(stderr, "lseek file2 fail\n");
goto err;
}
}
/* 非空洞 */
else{
write_num = write_ex(fd2, buffer, read_num);
if (-1 == write_num){
fprintf(stderr, "write file2 error\n");
goto err;
}
}
}
if(-1 == read_num){
fprintf(stderr, "read file1 error\n");
goto err;
}
rev = 0;
err:
if(buffer) free(buffer);
if(buffer_zero) free(buffer_zero);
close(fd1);
close(fd2);
return rev;
}
int main(int argc, char *argv[])
{
if(argc < 3) {
printf("Usage: %s file1 file2\n", argv[0]);
return -1;
}
my_cp(argv[1], argv[2]);
return 0;
}
运行
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ ./mycp holesize1 holesize1.mycp
pagesize: 4096
st.st_blocks: 128
st.st_blksize: 4096
st.st_size: 65536
holesize1 is not a sparse-block file!
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ cp holesize1 holesize1.cp
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ ll -s holesize1 holesize1.*
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize1
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:48 holesize1.cp
64 -rw-r--r-- 1 linux linux 65536 Apr 23 01:48 holesize1.mycp
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ ./mycp holesize4096 holesize4096.mycp
pagesize: 4096
st.st_blocks: 72
st.st_blksize: 4096
st.st_size: 65536
holesize4096 is a sparse-block file!
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ cp holesize4096 holesize4096.cp
linux@ubuntu:~/UNIX_learning/Unit_4/4.6$ ll -s holesize4096 holesize4096.*
36 -rw-r--r-- 1 linux linux 65536 Apr 23 01:35 holesize4096
32 -rw-r--r-- 1 linux linux 65536 Apr 23 01:52 holesize4096.cp
32 -rw-r--r-- 1 linux linux 65536 Apr 23 01:52 holesize4096.mycp