python 多线程基于正则表达式的多线程文本替换功能实现

最新推荐文章于 2024-09-15 22:31:42 发布

bairao8167

最新推荐文章于 2024-09-15 22:31:42 发布

阅读量117

点赞数

文章标签： python

原文链接：http://www.cnblogs.com/fangfu/p/3974270.html

版权

#!/usr/bin/env python

import os
import os.path
import sys
import re
import shutil
import csv
from multiprocessing import Pool #support muti thread map reduce function


specialChars = ['.', '^' , '$', '*', '+', '?' ,'\\', '[', ']', '|', '(', ')' ]

g_max_thread_pool_size = 30#max thread pool size

def dealSpecialChars(str):
    s = ''
    for c in str:
        if c in specialChars:
            s = s + ('[' + c + ']')
        else:
            s = s+c
    return s

def readDictsFromCsv(filePath):
    #print filePath
    dicts = dict()
    with open(filePath, 'rb') as csvfile:
        dictsReader = csv.DictReader(csvfile, fieldnames=['srcName', 'newName'], restkey=None, delimiter=',', quotechar='|')
        for d in dictsReader:
            #dicts[dealSpecialChars(d['srcName'])] = d['newName']
            dicts[d['srcName']] = d['newName']
    return dicts

BinaryExtList = ['.bmp', '.avi', '.res', '.xls', '.doc', '.dll', '.lib', '.bpl', '.exe', '.chm']

replaceDicts = {r"RNC820V400R008C00SPC500": r"93" }  

def ApplyReplace(str, keys, replaceDicts):
    ret = str    
    for pattern in keys:
        try:
            ret = re.sub(pattern, replaceDicts[pattern], ret)
        except:
            print "Unexpected error ApplyReplace(str, keys, replaceDicts):",str, 
        finally:
            print  str, ret
    return ret

def NeedReplace(str, keys):
    for pattern in keys:
        #print pattern, str
        try:
            if re.search(pattern, str):
                return True
        except:
            print "Unexpected error NeedReplace(str, keys):",str, ':'
    return False
    

defaultExtList = ['.txt', '.xml']  
def findFile(srcDir, filter = None):
    if(filter == None):
        filter = defaultExtList
    filelist = []
    for name in os.listdir(srcDir):
        fullPath = srcPath + '\\' + name
        if os.path.isdir(fullPath):
            filelist.append(findFile(fullPath))
        else:
            if os.path.splitext(fullPath)[1].lower() in filter:
                filelist.append(fullPath)

    return filelist
                

def ReplaceAllStrInFile(file, dicts, keys, filter = defaultExtList):
    print 'ReplaceAllStrInFile:file-',file, 'begin!'
   
    for key in keys:
        print key, dicts[key]#for test
    fullPath = file
    if os.path.splitext(fullPath)[1].lower() not in filter:
        return
    #print 'convert file:', fullPath
    try:
        srcFile = open(fullPath, 'r')
        tempfile = fullPath+'temp'
        destFile = open(tempfile, 'w')
        needRewrite = False
        try:
            for line in srcFile:
                if NeedReplace(line, keys):
                    line = ApplyReplace(line, keys, dicts)
                destFile.write(line)
            srcFile.close()
            destFile.close()
            os.remove(fullPath)
            os.rename(tempfile, fullPath)
            print 'convert file:', fullPath, 'success!'
        except:
            print 'convert file:', fullPath, 'failed!'
            srcFile.close()
            destFile.close()
            os.remove(tempfile)
        
    except:
        print 'convert file:', fullPath, 'failed!'
        return False
    return True

def ReplaceAllStrInFileByRows(srcfile, csvfilePath, maxRow = 10):
    if maxRow < 1:
        return false
    dicts = readDictsFromCsv(csvfilePath)
    keys=(sorted(dicts.keys(), key=lambda key: len(key), reverse=True))#reverse keys by elements' length
    length = len(keys)
    rows = range(length/maxRow + 1)
    for i in rows:
        ReplaceAllStrInFile(srcfile, dicts, keys[(i*maxRow):(i+1)*maxRow])
        print ' '.join(keys[(i*maxRow):(i+1)*maxRow])

def f(x):
    return ReplaceAllStrInFileByRows(x[0], x[1])

if __name__ == "__main__":
    args = sys.argv    
    if len(args) <> 3:
        print '''
        usage: python testcsv.py D:\ss\temp.csv D:\ss\test\
        '''
        exit    
    csvfilePath = args[1]
    srcPath = args[2]
    
    filelist = findFile(srcPath)
    #print filelist, len(filelist)
   
    dataItems = []
    for file in filelist:
        dataItems.append([file, csvfilePath])
    #ReplaceAllStrInFileByRows(dataItems[0][0], dataItems[0][1])    
    pool_size = g_max_thread_pool_size
    if len(filelist) < g_max_thread_pool_size:
        pool_size = len(filelist)
    pool = Pool(processes=pool_size)#muti thread
   
    pool.map(f, dataItems)
    #print result.get(timeout=10)
    #pool.map