#!/usr/bin/python
#coding=utf-8
import os
import re
#
dataDir = raw_input("Please enter data dir(eg:20150419):")
#print dataDir
#限制单个文件大小(单位为M)
fileSize = 200
#源数据目录
# dayDir = '20150419'
dayDir = dataDir
baseDir = "/home/prod/Huqin/data"
#源数据完整目录
fullDir = "/home/prod/Huqin/data/"+ dayDir
listDir = os.listdir(fullDir)
#print listDir
#存放合并数据目录
resultDir = baseDir+'/result'
#创建目录用于存放合并数据
if os.path.exists(resultDir):
pass
else:
os.mkdir(resultDir)
#++++++++++++++++++方法区+++++++++++++++++++++
#判断文件大小的函数
def isBig(fileDir):
if (os.path.getsize(fileDir)/1024/1024) >= fileSize:
print "Big"
num = fileDir[-1]
num = int(num) + 1
num = str(num)
f = fileDir[:-1]+num
os.mknod(f)
return f
else:
return fileDir
#++++++++++++++++++++++++++++++++++++++++++++
for time in range(24):
if time < 10:
time = '0'+str(time)
else:
time = str(time)
flag = 1
for fileName in listDir:
fname = dayDir+time
pattern = re.compile(fname)
if pattern.search(fileName):
f = resultDir+"/"+fname+".txt"+str(flag)
if os.path.exists(f):
pass
else:
os.mknod(f) #创建新文件
if (os.path.getsize(f)/1024/1024) >= fileSize:
num = f[-1]
num = int(num) + 1
num = str(num)
#print num
f = f[:-1]+num
if os.path.exists(f):
pass
else:
os.mknod(f)
flag += 1
fi = open(fullDir+"/"+fileName,'r')
fo = open(f,'a')
fo.writelines(fi.readlines())
fi.close()
fo.close()
else:
# print "No Match"
pass
自己写的Python脚本 用于文件按照小时合并且单个文件不超过200M
最新推荐文章于 2024-08-12 14:30:00 发布