这里尝试用mmap重写文件。我不想填满我的硬盘,所以没有测试任何大的。这是一个可运行的例子,但是你会想把我用来测试的东西从前面和后面去掉。在
这会将文件的前50%写入新文件,然后对其进行修剪。不知道这是不是你想要的顺序!在import mmap
import shutil
import os
from glob import glob
files_to_trim = 'deleteme*'
fraction_to_keep = .5
blocksize = 128*1024
# make test file
open('deleteme1', 'w').writelines('all work and no play {}\n'.format(i)
for i in range(6))
open('deleteme2', 'w').writelines('all work and no play {}\n'.format(i)
for i in range(10,18))
with open('output', 'wb') as out:
for filename in sorted(glob(files_to_trim)):
st_size = os.stat(filename).st_size
sample_size = int(st_size * fraction_to_keep)
with open(filename, 'r+b') as infile:
memfile = mmap.mmap(infile.fileno(), 0)
# find next line ending
need_newline = False
count = memfile.find(b'\n', sample_size)
if count >= 0:
count += 1 # account for \n
else:
count = st_size
need_newline = memfile[-1] == '\n'
# copy blocks to outfile
for rpos in range(0, count+blocksize-1, blocksize):
out.write(memfile[rpos:min(rpos+blocksize, count)])
if need_newline:
out.write('\n')
# trim infile
remaining = st_size - count
memfile[:remaining] = memfile[count:]
memfile.flush()
memfile.close()
infile.truncate(remaining)
infile.flush()
# validate test file
print('deleteme1:')
print(open('deleteme1').read())
print('deleteme2:')
print(open('deleteme2').read())
print('output:')
print(open('output').read())