“当码农好像也七八年了,从屁颠屁颠的愣头青到现在小弟喊帮忙都叫[大神]了,开个博客记点东西吧。”
以上
__起因比较复杂,算是历史遗留问题吧。服务端使用的java,protobuff的编译选择了每个文件转出java_outer_class,之后内部message是爱怎么重名都没关系,但是前端就蛋疼了。—message对一应proto命名空间下的一个类,绝对要冲突。__ __一开始还不清楚这个问题,因为做lua的哥们从上个公司继承来的优良传统,每个proto文件改写包名为文件名,我一开始问为什么这么纠结,那哥们说他们原来就是每个文件一个包名,这边不这么做就报错,估计是pbc的问题…后来才闹明白。既然已经这样了,那就继续用吧…__ __因为受不了手动改,写shell,写mac的console工具也都不够通用,于是服务端的哥们提议python,我也就是基本知道语法的程度,大概能用。__
* *系统库头引用*
import sys
import re
import os
import shut
* *获取传入参数列表,这里是用了“`-p“`作为参数名指示,空格后接参数值*
def cmd_getargs():
arg_dict = {}
tmp_key = ""
tmp_value = ""
start = 0
for single_arg in sys.argv:
if single_arg[0] and single_arg[0] == '-':
start = 1
tmp_key = single_arg[1:]
else:
if start == 1:
tmp_value = single_arg
if len(tmp_key) and len(tmp_value):
arg_dict[tmp_key] = tmp_value
tmp_key = ""
tmp_value = ""
return arg_dict
* *读取文件夹下所有proto文件,这脚本里面裁切文件路径用了好几种方式…我也不知道哪种比较好。*
def protos_in_dir(path):
path_array = []
for root, dirs, files in os.walk(path):
subfiles = os.listdir(root)
for fn in subfiles:
fpath = root + "/" + fn
fextension = fn[fn.rfind('.'):]
if os.path.isfile(fpath) and fextension == ".proto":
path_array.append(fpath)
return path_array
- 从文件中获取正则匹配到第一个字符串
def regex_find_first(path, rgstr):
# open the file
f = open(path, "r")
content = f.read()
f.close()
# replace
pattern = re.compile(rgstr)
results = pattern.findall(content)
if len(results) > 0:
return results[0]
return ""
- 这个函数和逻辑相关了,获取所有使用import 的文件引用
def regex_find_all_import(path):
# open the file
f = open(path, "r")
content = f.read()
f.close()
# replace
pattern = re.compile("""import[ ]+"[a-z,A-Z,0-9,.]*"[ ]*;""")
results = pattern.findall(content)
ret = []
for str in results:
ret.append(str.split("\"")[1])
return ret
- 查找并返回文件内所有的message和enum
def regex_find_all_message(path):
# open the file
f = open(path, "r")
content = f.read()
f.close()
# replace
pattern = re.compile("""message[ ]+[a-z,A-Z,0-9]+.+""")
results = pattern.findall(content)
pattern = re.compile("""enum[ ]+[a-z,A-Z,0-9]+.+""")
results += pattern.findall(content)
ret = []
for str in results:
tmp = str.split(" ")[1]
if tmp[-1] == "{":
tmp = tmp[:-1]
ret.append(tmp)
return ret
- 使用正则替换所有字符串
def regex_replace(path, rgstr, rpstr):
# open the file
f = open(path, "r")
content = f.read()
f.close()
# replace
new_content = re.sub(rgstr, rpstr, content)
# save to file
f = open(path, "w")
f.write(new_content)
f.close()
- 使用特殊占位字符串中转,替换所有引用的message或enum的package,没做错误处理,出错重来的节奏
这里做了修改,前天有个引用的proto文件使用了其他包名然后引起了替换报错,才发现我直接判断使用的是proto为源包名进行的替换,github已更新,这里留着当警示吧…
def regex_replace_import(path, old_import, old_msg, new_msg):
# open the file
f = open(path, "r")
content = f.read()
f.close()
# replace import to placeholder
str_import = """import[ ]+""" + "\"" + old_import + """.proto"[ ]*;"""
pattern = re.compile(str_import)
arr = pattern.findall(content)
old_str_import = ""
if len(arr) == 0:
print("find [" + old_import + "] in [" + path + "] failed!")
return
old_str_import = arr[0]
place_holder_import = """---place_holder_import---"""
content = re.sub(old_str_import, place_holder_import, content)
# replace java_outer_classname to placeholder
str_java = """option[ ]+java_outer_classname[ ]*=[ ]*"[a-z,A-Z,0-9,.]*";"""
pattern = re.compile(str_java)
arr = pattern.findall(content)
old_java = ""
if len(arr) == 0:
print("find [" + old_import + "] in [" + path + "] failed!")
return
old_str_java = arr[0]
place_holder_java = """---place_holder_java---"""
content = re.sub(old_str_java, place_holder_java, content)
#
str_sub_proto = """proto.""" + old_msg
pattern = re.compile(str_sub_proto)
arr = pattern.findall(content)
if len(arr) > 0:
content = re.sub(str_sub_proto, new_msg, content)
else:
content = re.sub(old_msg, new_msg, content)
content = re.sub(place_holder_import, old_str_import, content)
content = re.sub(place_holder_java, old_str_java, content)
# save to file
f = open(path, "w")
f.write(content)
f.close()
- 主函数…不太懂python,他们说直接放
__main__()
里面就会执行…但不清楚为毛我的脚本必须手动调用main函数
def __main__():
arg_dict = cmd_getargs()
if not arg_dict.has_key("p"):
print("please use -p [proto path] cmd to pass param")
return
proto_path = arg_dict["p"]
os.chdir(proto_path)
os.system("git reset --hard")
os.system("git clean -fd")
# os.system("git pull --all")
# 先遍历一遍,把有引用的拿出来留着后面处理,没有引用的就顺手改了
all_files = protos_in_dir(proto_path)
need_import_files = []
for spath in all_files:
# print(spath)
base_name = os.path.basename(spath)
package_name, ext = os.path.splitext(base_name)
# print(package_name)
import_msg = regex_find_first(spath, """import[ ]+"[a-z,A-Z,0-9,.]*"[ ]*;""")
regex_replace(spath, "package[ ]+proto;", "package " + package_name + ";")
if import_msg != "":
need_import_files.append(spath)
# else:
# print("changed package name: " + spath)
# print("\nfor import:\n")
# 开始处理带引用的proto
for spath in need_import_files:
# print(spath)
ffolder, fname = os.path.split(spath)
all_imports = regex_find_all_import(spath)
# print(all_imports)
for import_msg in all_imports:
all_msg = regex_find_all_message(ffolder + "/" + import_msg);
# print(all_msg)
package_name, ext = os.path.splitext(import_msg)
for msg in all_msg:
regex_replace_import(spath, package_name, msg, package_name + "." + msg)
# 处理完,直接编译吧
for spath in all_files:
base_name = os.path.basename(spath)
package_name, ext = os.path.splitext(base_name)
cmd = "protoc " + base_name + " -o " + package_name + ".pb"
os.system(cmd)
# 建个文件夹把编译完的pb丢进去
pb_path = os.path.join(proto_path, "pb")
if os.path.isdir(pb_path):
os.system("rm -rf " + pb_path)
os.makedirs(pb_path)
for root, dirs, files in os.walk(proto_path):
subfiles = os.listdir(root)
for fn in subfiles:
fpath = root + "/" + fn
fextension = fn[fn.rfind('.'):]
if os.path.isfile(fpath) and fextension == ".pb":
shutil.move(fpath, os.path.join(pb_path, fn))
__main__();
大概就这么多,除了可以说It works!
之外好像没啥可说的。读写文件是直接全文读取到变量,幸好proto文件都不大;查找和替换也是…api不熟…不清楚对不对。
不过也是好在能用,proto编译也通过,暂时没发现错误,省了不少功夫。
再次叹服,几十个文件一个个改过来的…坚韧精神。