#!/usr/bin/python
import sys,os,commands
if __name__=='__main__':
if len(sys.argv)==4 and sys.argv[1]=='help':
print 'usage:*.py file2seg num_file des_folder'
else:
#get total line of the train/test file
shellcmd='wc -l '+sys.argv[1]
line_no=commands.getstatusoutput(shellcmd)
line_no=line_no[1][0:line_no[1].index(' ')]
f=open(sys.argv[1],'r')
#calculate the num per file(just about)
per_file_line=int(line_no)/int(sys.argv[2])
print line_no
print 'per_file_line about:',per_file_line
current_line_no=0
file_suffix=0
f_new=open(sys.argv[3]+str(file_suffix)+".txt",'w')
for line in f:
current_line_no=current_line_no+1
new_file_suffix=current_line_no/per_file_line
if new_file_suffix <> file_suffix:#new file part
#meet the blank line,good split
if len(line.strip())==0:
f_new.close()
f_new=open(sys.argv[3]+str(new_file_suffix)+".txt",'w')
file_suffix=new_file_suffix
#else:
# while True:#forward till we find a new blank line
# print line
f_new.write(line)
f_new.flush()
f_new.close()
文件分割,用于将一个文件产生k份文档(仿linux的split,考虑遇到空行再分开)
最新推荐文章于 2023-12-23 15:58:55 发布