前言
拆分txt文件
- 根据行数拆分txt文件
- 根据文件大小拆分txt文件
- 提取txt文件的奇数行,偶数行
一、根据行数拆分
def spilt_1(file_path, output_file, limit):
"""
根据行数拆分文件
:param file: 待拆分的文件
:param output_file: 拆分之后的文件名(前部)
:param limit: 每个文件中的行数
:return:
"""
out_i = 1 # 拆分之后的文件名下标
txt_line = []
with open(file_path, 'r', encoding='UTF-8') as fp:
for line in fp:
txt_line.append(line)
if len(txt_line) <= limit:
continue
with open(output_file + str(out_i) + '.txt', 'w', encoding='UTF-8') as fo:
for i in txt_line[:-1]:
fo.write(i)
txt_line = []
out_i += 1
if txt_line:
with open(output_file + str(out_i) + '.txt', 'w', encoding='UTF-8') as fo:
for i in txt_line[:-1]:
fo.write(i)
print("done!")
二、根据文件大小拆分
def spilt_2(file_path, output_file, limit):
"""
根据文件大小拆分文件
:param file: 待拆分的文件
:param output_file: 拆分之后的文件名(前部)
:param limit: 每个文件的大小*1000
(limit=100000, 即100KB:每次读取100KB大小内容,拆分后每个文件大小100MB)
:return:
"""
out_i = 1 # 拆分之后的文件名下标
with open(file_path, 'r', encoding='UTF-8') as fp:
read_data = fp.read(limit) # 100KB 每次读取100KB大小内容
while(read_data != ''):
with open(output_file + str(out_i) + '.txt', 'w', encoding='UTF-8') as fo:
for j in range(0, 1024): # 100KB * 1024 =100mb 拆分后每个文件大小100MB
fo.write(read_data)
read_data = fp.read(limit) # 100KB
if(read_data == ""):
break
out_i = out_i + 1
print("done!")
三、提取奇偶行
def spilt_3(path, odd_path, even_path):
"""
提取奇数行,偶数行并分别保存为txt文件
:param path: 待拆分的文件
:param odd_path: 奇数行文件
:param even_path: 偶数行文件
:return:
"""
with open(path, "r", encoding="utf-8") as f:
lines = f.readlines()
file_1 = open(odd_path, 'w', encoding='utf-8')
file_2 = open(even_path, 'w', encoding='utf-8')
for num, line in enumerate(lines):
# print(num, line)
if (num % 2) == 0: # num为偶数说明是奇数行
print(line.strip(), file=file_1) # .strip用来删除空行
else: # # num为奇数说明是偶数行
print(line.strip(), file=file_2)
file_1.close()
file_2.close()