Python遍历文件，正则选取汉字便于多语言支持

最新推荐文章于 2024-09-13 17:18:19 发布

振长策而御宇内

最新推荐文章于 2024-09-13 17:18:19 发布

阅读量1.2k

点赞数

分类专栏： Python 文章标签： python 正则遍历多语言支持

本文链接：https://blog.csdn.net/zhenyu5211314/article/details/50437699

版权

Python 专栏收录该内容

25 篇文章 0 订阅

订阅专栏

遍历文件夹中的文件，搜索中文进行抽取替换，便于多语言翻译

#!/usr/bin/python
# -*- coding: utf-8 -*-
import re
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')
print sys.getdefaultencoding()

#list files

def listFiles(dirPath):
	fileList=[]
	for root,dirs,files in os.walk(dirPath):
		for fileObj in files:
			fileList.append(os.path.join(root,fileObj))
	return fileList

def main():
	fileDir = os.getcwd() + "/retry1"
	fileTarget = os.getcwd() + "/target2"
	fileList = listFiles(fileDir)

	logData = open("./log.txt", 'w+')

	fData = open("Localization.lua", "r+")
	fData.seek(0, 0)
	fData.truncate()
	fData.write('local Localization = {\n')
	#print(fileList)
	
	for fileObj in fileList:
		num = 1
		strCounter = 1

		f = open(fileObj,'r+')
		print >> logData,"\nopen file :" + os.path.basename(fileObj)
		fileName = os.path.basename(fileObj).split('.')[0]
		#print fileName
		all_the_lines=f.readlines()
		f.close() 

		strTable = []
		numStrTable = []
		numStrTable.append(fileName+' = {\n')

		if ("protobuf" in fileObj) or (os.path.basename(fileObj).split('.')[1] != "lua") or (fileName == "CCLocalization") or (fileName == "bit") :
			print >> logData,"do nothing ... ... ..."
			continue
		print >> logData,"analysising ... ... ..."

		for line in all_the_lines: 
			regex = re.compile("(['\"])(?:\\\.|.)*?\\1")
			regex1 = re.compile(u"[\u4e00-\u9fa5]+")
			regex2 = re.compile(u"print")
			regex3 = re.compile(u"cclog")
			line2 = line.decode('utf8')	# writable line
			line1 = line.decode('utf8')	# temp line

			# ingore print and cclog
			results1 = regex2.findall(line1)  
			length1 = len(results1)
			results2 = regex3.findall(line1)  
			length2 = len(results2)	
			if length1 + length2 == 0 :
				# iterate over all found quotes pairs
				for match in regex.finditer(line1):	
					start = match.start()
					end = match.end()	
					temp = line1[start:end]
					counter = 0
					for match1 in regex1.finditer(temp):
						counter += 1
					if counter > 0:
						nStr = 'localization.str%s'%strCounter
						print >> logData,'replace at:%s'%num + " " + temp + " ------> " + nStr					
						line2 = line2.replace( temp, nStr)					
						
						mStr = 'str%s'%strCounter
						numStrTable.append(mStr+' = '+temp+',\n')

						strCounter += 1

			strTable.append(line2)
			num = num + 1

		numStrTable.append('},\n')
		# if fit str exits insert require str
		if strCounter > 1 :
			dirPath = fileObj.replace( "retry1", "target2")
			targetPath = dirPath.split(os.path.basename(dirPath))[0]
			if os.path.exists(targetPath) == 0 :
				os.mkdir(targetPath)
			targetF = open(dirPath,'w')
			targetF.seek(0, 0)
			targetF.truncate()
			
			targetF.write('''local localization = require("CCLocalization").''' + fileName + "\n")
			for lines in strTable :
				targetF.write(lines)

			targetF.close() 

			# write Localization data 
			for lines2 in numStrTable :
				fData.write(lines2)
	
	fData.write('}\nreturn Localization')
	fData.close()  

if __name__=='__main__':
	main() 

# Attention:Localization.lua 放在instead.py脚本同级目录，将LuaScripts文件夹拷贝到(同级目录下)retry1文件夹中，运行脚本，target2(同级目录下)文件夹中为修改的文件