Python 基础学习
Python 大文件分割 - 增加进度条
-- coding:utf-8 --
from datetime import datetime
def main(source_dir, target_dir ):
# 计数器
flag = 0
# 文件名
name = 1
# 存放数据
datalist = []
import time
print("开始。。。。。")
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
with open(source_dir, mode='r', newline='', encoding='utf-8') as f_source:
for line in f_source:
flag += 1
datalist.append(line)
if flag == 300000:
import tqdm
with tqdm.tqdm(range(len(datalist))) as tbar:
# with open(target_dir + "jz_yuanshi_list_" + str(name) + ".txt", 'w+') as f_target:
with open(target_dir + str(name).zfill(3) + ".txt", mode='a+', newline='', encoding='utf-8') as f_target:
print(str(name).zfill(3))
j=0;
for data in datalist:
j=j+1
if j==1 and len(data)>0:
f_target.write(data.strip())
else:
if len(data) > 0: # 最后一行不为空,才写入
f_target.write('\n'+data.strip() )
# percent = str(format(float(j / len(datalist)) * 100, '.3f')) + '%'
# tbar.set_description(
# '正在写入' + str(j).zfill(10) + '.png' + '_' + str(len(datalist)) + '_' + percent)
# # tbar.set_postfix(k=num, v=v)
# tbar.set_postfix(k=j)
# tbar.update()
name += 1
flag = 0
datalist = []
# 处理最后一批行数少于200万行的
# with open(target_dir + "jz_yuanshi_list_" + str(name) + ".txt", 'w+') as f_target:
with open(target_dir + str(name).zfill(3) + ".txt", mode='a+', newline='', encoding='utf-8') as f_target:
with tqdm.tqdm(range(len(datalist))) as tbar:
print(str(name).zfill(3))
j=0
for data in datalist:
j = j + 1
if j == 1 and len(data) > 0:
f_target.write(data.strip())
else:
if len(data) > 0: # 最后一行不为空,才写入
f_target.write('\n' + data.strip())
# percent = str(format(float(j / len(datalist)) * 100, '.3f')) + '%'
# tbar.set_description(
# '正在写入' + str(j).zfill(10) + '.png' + '_' + str(len(datalist)) + '_' + percent)
# # tbar.set_postfix(k=num, v=v)
# tbar.set_postfix(k=j)
# tbar.update()
print("完成。。。。。")
print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
if name == “main”:
source_dir = ‘F:/250.txt’
target_dir = ‘F:/’
main(source_dir, target_dir )
参考:http://www.zzvips.com/article/159519.html