按行读取,大文件
def do_readline ( path: str ) :
r"""
:param path: 文件路径
"""
with open ( path, "r" , encoding= 'utf-8' ) as file_handler:
line = file_handler. readline( )
while line:
print ( line)
line = file_handler. readline( )
按块读取,大文件
def do_read_chunks ( path, chunk_size= 1024 * 1024 ) :
r"""
:param path: 文件路径
:param chunk_size: 块大小
"""
file_object = open ( path)
while True :
chunk_data = file_object. read( chunk_size)
if not chunk_data:
break
yield chunk_data
if __name__ == "__main__" :
file_path = 'D:/file_path.txt'
for chunk in do_read_chunks( file_path) :
print ( chunk)
读取所有行
def do_readlines ( path: str ) :
r"""
:param path: 文件路径
"""
with open ( path, "r" , encoding= 'utf-8' ) as file_handler:
lines = file_handler. readlines( )
for line in lines:
print ( line)
print ( line. strip( ) )
一次性, 二进制读取
def do_read_bytes ( path: str ) :
r"""
:param path: 文件路径
"""
with open ( path, "rb" ) as file_handler:
lines = file_handler. read( )
print ( str ( lines, encoding= 'utf-8' ) )
一次性, 二进制迭代器读取
for 迭代中 file_handler 被视为一个迭代器, 会自动的采用缓冲IO和内存管理, 可用来处理大文件
def do_read_bytes_itr ( path: str ) :
r"""
:param path: 文件路径
"""
with open ( path, "rb" ) as file_handler:
for data in file_handler:
print ( data)
print ( str ( data, encoding= 'utf-8' ) )
下载大文件
import requests
import shutil
def download_file ( url) :
local_filename = url. split( '/' ) [ - 1 ]
with requests. get( url, stream= True ) as rps:
with open ( local_filename, 'wb' ) as f:
shutil. copyfileobj( rps. raw, f)
return local_filename
import requests
def download_file_stream ( url) :
local_filename = url. split( '/' ) [ - 1 ]
try :
with requests. get( url, stream= True ) as resp:
resp. raise_for_status( )
with open ( local_filename, 'wb' ) as f:
for chunk in resp. iter_content( chunk_size= 4096 ) :
f. write( chunk)
except :
return local_filename