看到的写法,一行行的读,一行行的写(更有一个字节一个字节的读,这样更加慢),这样写起来代码虽然简单,但是速度会很慢,而且当一行的长度超过缓冲,就有问题了。
快的方法是分三步走,
整个buffer整个buffer的读,同时找换行符,找到要删除的行之后,再找这行的结尾,然后从结尾开始就只需要整个buffer的读写就可以了。经过测试,1000万行,86M的文本文件,瞬间处理完。
#include <sys/fcntl.h>
#include <sys/stat.h>
#include <sys/stat.h>
#include <string.h>
#include <stdio.h>
#define BUFFER_SIZE 0x400
int remove_file_line(int fd0, int fd1, unsigned int index)
{
unsigned char buffer[BUFFER_SIZE];
unsigned int position = 0;
unsigned int start = 0;
unsigned int numberofbytes;
unsigned int i;
unsigned int l;
while (index > 0 && (numberofbytes = read(fd0, buffer + position, BUFFER_SIZE - position)) > 0)
{
numberofbytes += position;
while (position < numberofbytes)
{
// 定位换行符的过程可以优化(参见以前的文章),当文件极大的时候,优化效果越明显
if (buffer[position] == '\n')
{
start = position + 1;
index--;
if (index == 0)
{
position++;
break;
}
}
position++;
}
if (index == 0)
{
l = numberofbytes;
if (start > 0)
{
write(fd1, buffer, start);
start = 0;
}
i = 0;
while (position < l)
{
buffer[i++] = buffer[position++];
}
position = i;
}
else
{
if (start > 0)
{
write(fd1, buffer, start);
i = 0;
while (start < position)
{
buffer[i++] = buffer[start++];
}
start = 0;
position = i;
}
else
{
if (position == BUFFER_SIZE)
{
write(fd1, buffer, position);
position = 0;
}
}
}
}
if (index == 0)
{
while ((numberofbytes = read(fd0, buffer + position, BUFFER_SIZE - position)) > 0)
{
numberofbytes += position;
// 定位换行符的过程可以优化(参见以前的文章),当文件极大的时候,优化效果越明显
position = 0;
while (position < numberofbytes && buffer[position] != '\n')
{
position++;
}
if (position < numberofbytes)
{
position++;
if (position < numberofbytes)
{
write(fd1, buffer + position, numberofbytes - position);
}
position = 0;
break;
}
else
{
position = 0;
}
}
// 上面的循环可能没有执行
if (position > 0)
{
while (start < position && buffer[start] != '\n')
{
start++;
}
if (start < position)
{
start++;
if (start < position)
{
write(fd1, buffer + start, position - start);
}
}
}
while ((numberofbytes = read(fd0, buffer, BUFFER_SIZE)) > 0)
{
write(fd1, buffer, numberofbytes);
}
}
else
{
if (position > 0)
{
write(fd1, buffer, position);
//position = 0;
}
}
return(index);
}
int main(int argc, char *argv[])
{
int fd0, fd1;
int index;
if (argc > 3)
{
index = atoi(argv[3]);
fd0 = open(argv[1], O_RDONLY, S_IRUSR | S_IWUSR| S_IRGRP | S_IROTH);
if (fd0 != -1)
{
fd1 = open(argv[2], O_CREAT | O_WRONLY, S_IRUSR | S_IWUSR| S_IRGRP | S_IROTH);
if (fd1 != -1)
{
if (remove_file_line(fd0, fd1, index) == 0)
{
printf("delete success\n");
}
else
{
printf("delete failed\n");
}
close(fd1);
}
close(fd0);
}
}
return 0;
}