cocos扫描lua文件中中文工具

扫描中文工具

将lua代码中的中文字符导出到Excel中.Excel表中ID和String,Count

使用
  1. 修改root,指定自己需要的目录,如root = “c:/”
  2. python版本:2.7
  3. 强烈推荐 regexr,能看到正则表达式的错误
代码
#将lua代码中的中文字符导出到Excel中.Excel表中ID和String,Count
# -*- coding:utf-8-*-
# 处理中文字符的情况将
import codecs
import re
import os
import sys
import chardet
import xlsxwriter
reload(sys)
sys.setdefaultencoding('utf-8')

m_str = {}

extensions = {'.lua'} #需要遍历文件的扩展名

ignoresDir = {".git", ".gitattributes", "config"} # 忽略的目录
ignoresFile = {}#忽略的文件

# 忽略规则
ignorerules = {ur'print\(\".*[\u4e00-\u9fa5]+.*\".*\)',  #忽略print后面的中文
               ur'\-\-.*[\u4e00-\u9fa5]+.*'} #忽略注释中的中文
# 匹配规则 截取前后n位
rules = ([ur'\"[\u4e00-\u9fa5]+\"', 1], # 刷选 "中文"
         [ur'\[\[[\u4e00-\u9fa5]+\]\]', 2], # 刷选 [[中文]]
         [ur'\[\[.*[\u4e00-\u9fa5]+.*\]\]', 2], # 刷选 [[da 中文 da]]
         [ur'\".*[\u4e00-\u9fa5]+.*\"', 1]) # 刷选 "da 中文 da"

extrules = {ur'[\u4e00-\u9fa5]+',}

index = 100000  # id 开始位置

class StringAttr:
    _string = ""
    _id = -1
    _count = 0

    def __init__(self, string, ID):
        self._string = string
        self._id = ID
        self._count = 0


def main():
    # root = raw_input("Enter a path: ")
    root = r''
    root = root.replace("\r", "")
    current_dir = os.path.realpath(root)
    foreachPath(current_dir)
    output2Excel()

# 遍历路径
def foreachPath(curPath):
    exists = os.path.exists(curPath)
    if exists == False:
        print("the file is not exists! file path = " + curPath)
        return
    filePath = os.listdir(curPath)
    for file in filePath:
        fullPath = os.path.join(curPath, file)
        isFile = os.path.isfile(fullPath)

        if isFile:
            oprateFile(fullPath, file)
        else:
            oprateDir(fullPath, file)


def oprateDir(fullPath, fileName):
    flag = True
    for ignore in ignoresDir:
        if ignore == fileName:
            flag = False
    if flag:
        foreachPath(fullPath)


def oprateFile(fullPath, fileName):
    flag = True
    for ignore in ignoresFile:
        if ignore == fileName:
            flag = False
    if flag:
        _, fileType = os.path.splitext(fullPath)
        for ext in extensions:  # 匹配扩展名
            if ext == fileType:
                marryChinese(fullPath)


# 匹配中文
def marryChinese(fullPath):
    print("marryChinese = {}\n".format(fullPath))
    fileData = openFile(fullPath)
    result = fileData

    for rule in ignorerules:
        pattern = re.compile(rule)
        searchs = pattern.findall(fileData)
        if searchs:
            flag = True
            for search in searchs:
                get_string_id = u"--------------------------------------"
                fileData = fileData.replace(search, get_string_id)

    exchanges = {}
    i = 0
    for rule in rules:
        pattern = re.compile(rule[0])
        searchs = pattern.findall(fileData)
        if searchs:
            
            for search in searchs:
                chineseStr = search[rule[1]:len(search)]
                chineseStr = chineseStr[0:len(chineseStr) - rule[1]]
                if not m_str.has_key(chineseStr):
                    m_str[chineseStr] = StringAttr(chineseStr, getString_ID())
                temp = m_str[chineseStr]
                temp._count = temp._count + 1
                m_str[chineseStr] = temp
                get_string_id = u"GET_STRING({})".format(
                    temp._id)  # 将优先级高的规则匹配完抛弃
                fileData = fileData.replace(search, get_string_id)
                exchanges[i] = StringAttr(search, temp._id)
                i = i + 1
    flag = False
    # for rule in extrules:
    #     flag = True
    #     pattern = re.compile(rule)
    #     searchs = pattern.findall(fileData)
    #     if searchs:
    #         for search in searchs:
    #             result = result.replace(search, "-----------------------------------------")
    
    for strKey in exchanges:  # 真正在文件上操作
        flag = True
        strV = exchanges[strKey]
        get_string_id = u"GET_STRING({})".format(strV._id)
        result = result.replace(strV._string, get_string_id)
        print("replace path = {} old = {} new = {}\n".format(
            fullPath, strV._string, get_string_id))

    if flag:
        output2File(fullPath, result)


def opStr(str):
    if m_str.has_key(str):
        return
    m_str[str] = StringAttr(str, getString_ID())


def getString_ID():
    global index
    index = index + 1
    return index


def openFile(fullPath):
    file = open(fullPath)
    filedata = file.read()
    filedata = filedata.decode('utf-8')
    print("openFile = {}\n".format(fullPath))
    file.close()
    return filedata


def output2File(fullPath, fileData):
    print("outputFile = " + fullPath)
    file = open(fullPath, "w+")
    file.write(fileData)
    # 关闭文件
    file.close()

# 输出到Excel


def output2Excel():
    workbook = xlsxwriter.Workbook('string.xlsx')  # 建立文件

    worksheet = workbook.add_worksheet("sheel1")

    worksheet.write('A1', 'ID')
    worksheet.write('B1', 'STRING')
    worksheet.write('C1', 'COUNT')
    worksheet.excel2003_style
    strIndex = 1
    for str in m_str:
        strAttr = m_str[str]
        worksheet.write(strIndex, 0, strAttr._id)
        worksheet.write(strIndex, 1, strAttr._string.encode("utf-8"))
        worksheet.write(strIndex, 2, strAttr._count)
        strIndex = strIndex + 1
    print(strIndex)
    workbook.close()

# 输出到代码


if __name__ == '__main__':
    main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值