转载自http://www.pianshen.com/article/4142315326/
# -*- coding:utf-8 -*-
import codecs
import xml.etree.ElementTree as ET
import sys
defaultencoding = 'utf-8'
if sys.getdefaultencoding() != defaultencoding:
reload(sys)
sys.setdefaultencoding(defaultencoding)
def tmx_process(file, target_file):
tree = ET.parse(file)
root = tree.getroot()
body = root[1]
txtfile = codecs.open(target_file, 'w+', 'utf-8')
print(len(body))
for item in body:
try:
bs = item[0][0].text
zh = item[1][0].text
bs = bs.decode('utf-8')
zh = zh.decode('utf-8')
txtfile.write(zh + r' ||| ' + bs + '\n')
except Exception as ex:
print ex.message
countinue
txtfile.close()
return 'Done'