1 文件操作
① 有一个jsonline格式的文件file.txt大小约为10k
def get_lines():
with open('file.txt','rb') as f:
return f.readlines()
if __name__ == '_main_':
for e in get_lines():
process(e) #process each line data
如果处理大小为10G的文件,但是内存只有4G,只能修改get_lines函数,应该如何实现,需要考虑的问题有哪些?
#1
def get_lines():
with open('file.txt','rb') as f:
for i in f:
yield i
#2 设置每次返回的行数,避免读取次数太多。
def get_lines():
l = []
with open('file.txt','rb') as f:
data = f.readlines(60000)
l.append(data)
yield l
#3
import mmap import mmap
def get_lines(fp):
with open(fp,"r+") as f:
m = mmap(f.fileno(),0)
tmp = 0
for i char in enumerate(m):
if char = b"\n":
yield m[tmp:i+1].decode()
tmp = i+1
if __name__ == "_main_":
for i in get_lines("fp_some_h