使用python的多线程,多进程来处理硬盘测试中产生的批量数据,分析duplicate数量,可运行,结果ok,就是速度一般般。
解析每一行的数据过程,仅作工程中使用,不具有代表性。
from sys import argv
from os.path import exists
from multiprocessing import Pool
from time import sleep
import os
import multiprocessing
import threading
import time
"""refer link:
http://www.cnblogs.com/rollenholt/archive/2012/04/23/2466179.html
http://www.cnblogs.com/vamei/archive/2012/10/12/2721484.html
"""
def getFileList(directryName):
if os.path.isdir(directryName):
listFile=os.listdir(directryName)
#print listFile
return listFile
def threadTest(str,lock):
lock.acquire()
print str
lock.release()
#Multi_thread
def multiThread():
record = []
lock = threading.Lock()
for i in range(5):
thread = threading.Thread(target = threadTest,args =(i,lock))
thread.start()
record.append(thread)
for thread in record:
thread.join()
def multiThreadForDuplicateCheck():
start = time.clock()
record = []
directory = raw_input("Enter directory name\n")
for (path,dirs,files) in os.walk(directory):
#fileList = getFileList(path)
for fileName in files:
#print os.path.splitext(fileName)[1]
if os.path.splitext(fileName)[1] == '.txt':
fileName = path+'\\'+fileName
thread = threading.Thread(target = DuplicateCheck,args =(fileName,))
thread.start()
record.append(thread)
for thread in record:
thread.join()
elapsed = (time.clock() - start)
print ("Muliti thread Time used:",elapsed)
#Multi_process
def multiProcessForDuplicateCheck():
start = time.clock()
record = []
directory = raw_input("Enter directory name\n")
for (path,dirs,files) in os.walk(directory):
#fileList = getFileList(path)
for fileName in files:
#print os.path.splitext(fileName)[1]
if os.path.splitext(fileName)[1] == '.txt':
fileName = path+'\\'+fileName
process = multiprocessing.Process(target = DuplicateCheck,args=(fileName,))
process.start()
record.append(process)
for process in record:
process.join()
elapsed = (time.clock() - start)
print ("Muliti process Time used:",elapsed)
#duplicate check function
def DuplicateCheck(fileName):
firstLine = 0
asatWriteDone = 0
asatWriteGLogCnt = 0
asatWriteCylList = []
asatReadDone = 0
asatReadGLogCnt =0
asatReadCylList = []
#fileName = 'C:/Users/xlian/Downloads/rawdata (20)/EZ08D1XM.txt'
#fileName = raw_input("Enter file to decode")
f = open(fileName,'r')
#A:read all lines if file is small,if too big,change to B
"""allLines = f.readlines()
f.close()
for eachLine in allLines:
print eachLine
"""
#B:Read line by line using file iterator
for eachLine in f:
if firstLine == 0:
SN = eachLine[7:16].strip()
firstLine = 1
#print eachLine
#splitStr = eachLine.split(' ')
if "F:47" in eachLine:
asatWriteGLogCnt +=1
#print eachLine
#asatWriteCylList.append(eachLine[:}
asatWriteCylList.append(eachLine[0:12].strip()+eachLine[13:15].strip()+eachLine[23:26].strip())
if "0x2e" in eachLine:
if asatWriteDone == 0:
asatWriteDone = 1 # asat write increase done
elif asatWriteDone == 1:
asatWriteDone = 2 # asat write descrease done
else:
pass
if asatWriteDone == 2:
if "0x2b" in eachLine:
asatReadDone = 1 #Asat Read log check done
if asatReadDone == 0:
if eachLine[23:26].strip() == '10':
#if eachLine[0:12].strip() in asatWriteCylList:
for cylIndex in range(len(asatWriteCylList)):
if eachLine[0:12].strip() == asatWriteCylList[cylIndex][0:-3] and eachLine[13:15].strip() == asatWriteCylList[cylIndex][-3]:
#print asatWriteCylList[cylIndex]
asatReadGLogCnt +=1
del asatWriteCylList[cylIndex]
break
#if splitStr[2] in asatWriteCylList:
# asatReadGLogCnt += 1
# print "asat read"+eachLine
f.close()
if asatWriteGLogCnt != 0:
ratio = 100*asatReadGLogCnt/asatWriteGLogCnt
else:
ratio = 0
print 'SN:%s,Write G log:%d Read G log:%d Ratio:%.4f%%\n'%(SN,asatWriteGLogCnt,asatReadGLogCnt,ratio)
#sleep(1)
#directry = raw_input("Enter directry:D:/LXK/Your directry\n")
#print getFileList(directry)
if __name__=="__main__":
"""
i = 0
pool = Pool(processes = 4)
directory = raw_input("Enter directory name\n")
fileList = getFileList(directory)
print fileList[0]
while i
result = pool.apply_async(DuplicateCheck,(fileList[i],))
i+=1
pool.close()
pool.join()
if result.successful():
print "check successfull"
"""
#multiThread() # test for thread operation
multiThreadForDuplicateCheck() #multi thread operation,lower than mutiprocess
#multiProcessForDuplicateCheck() #multi process operation