import sys
reload(sys)
sys.setdefaultencoding("gbk")
#python 2k.py myinput.txt myoutput.txt就能够执行了。myinput.txt 一行一个单词。 最终的翻译结果都写入到输出文件myoutput.txt中了。
import httplib
import md5
import random
import hashlib
import json #导入json模块
import urllib #导入urllib模块
from urllib2 import Request, urlopen, URLError, HTTPError #导入urllib2模块
sys.path.append("../")
import jieba
def splitSentence(inputFile, outputFile):
fin = open(inputFile, 'r') #以读的方式打开文件
fout = open(outputFile, 'w') #以写得方式打开文件
for eachLine in fin:
line = eachLine.strip().decode('gbk', 'ignore') #去除每行首尾可能出现的空格,并转为Unicode进行处理
wordList = list(jieba.cut(line)) #用结巴分词,对每行内容进行分词
outStr = ''
for word in wordList:
outStr += word
outStr1=outStr
outStr +=chr(10)+chr(10)+ ' '
fout.write(outStr.strip().encode('gbk') + '\n') #将分词好的结果写入到输出文件
fin.close()
fout.close()
def splitSentence1(inputFile, outputFile):
fin = open(inputFile, 'r') #以读的方式打开文件
fout = open(outputFile, 'w') #以写得方式打开文件
for eachLine in fin:
line = eachLine.strip().decode('gbk', 'ignore') #去除每行首尾可能出现的空格,并转为Unicode进行处理
wordList = list(jieba.cut(line)) #用结巴分词,对每行内容进行分词
outStr = ''
for word in wordList:
outStr += word
outStr +=' '
fout.write(outStr.strip().encode('gbk') + '\n') #将分词好的结果写入到输出文件
fin.close()
fout.close()
splitSentence1('myinput.txt', 'myoutput1.txt')
splitSentence('myinput.txt', 'myoutput.txt')
def translate():
inputFile='myoutput.txt'
outputFile='myfanyi.txt'
fin = open(inputFile, 'r') #以读的方式打开输入文件
fout = open(outputFile, 'w') #以写的方式代开输出文件
for eachLine in fin: #按行读入文件
quoteStr = eachLine.strip() #去除每行首尾可能的空格等
#将读入的每行内容转换成特定的格式进行翻译
appid = '20160117000009037'
secretKey = 'HLiuzw_fNZMxgoZNj0pX'
httpClient = None
myurl = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
fromLang = 'zh'
toLang = 'en'
salt = random.randint(32768, 65536)
sign = appid+quoteStr+str(salt)+secretKey
m1 = md5.new()
m1.update(sign.encode(encoding='utf-8'))
sign = m1.hexdigest()
myurl = myurl+'?appid='+appid+'&q='+urllib.quote(quoteStr)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
try:
resultPage = urlopen(myurl) #调用百度翻译API进行批量翻译
except HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except URLError as e:
print('We failed to reach a server.')
print('Reason: ', e.reason)
except Exception, e:
print 'translate error.'
print e
continue
resultJason = resultPage.read().decode('utf-8') #取得翻译的结果,翻译的结果是json格式
print resultJason
js = None
try:
js = json.loads(resultJason) #将json格式的结果转换成Python的字典结构
except Exception, e:
print 'loads Json error.'
print e
continue
key = u"trans_result"
if key in js:
dst = js["trans_result"][0]["dst"] #取得翻译后的文本结果
outStr = dst
print outStr
else:
outStr = quoteStr #如果翻译出错,则输出原来的文本
fout.write(quoteStr+" "+outStr.strip() + '\n') #将结果输出
fin.close()
fout.close()
def translate1():
inputFile='myoutput1.txt'
outputFile='myfanyi1.txt'
fin = open(inputFile, 'r') #以读的方式打开输入文件
fout = open(outputFile, 'w') #以写的方式代开输出文件
for eachLine in fin: #按行读入文件
quoteStr = eachLine.strip() #去除每行首尾可能的空格等
#将读入的每行内容转换成特定的格式进行翻译
appid = '20160117000009037'
secretKey = 'HLiuzw_fNZMxgoZNj0pX'
httpClient = None
myurl = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
fromLang = 'zh'
toLang = 'en'
salt = random.randint(32768, 65536)
sign = appid+quoteStr+str(salt)+secretKey
m1 = md5.new()
m1.update(sign.encode(encoding='utf-8'))
sign = m1.hexdigest()
myurl = myurl+'?appid='+appid+'&q='+urllib.quote(quoteStr)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
try:
resultPage = urlopen(myurl) #调用百度翻译API进行批量翻译
except HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except URLError as e:
print('We failed to reach a server.')
print('Reason: ', e.reason)
except Exception, e:
print 'translate error.'
print e
continue
resultJason = resultPage.read().decode('utf-8') #取得翻译的结果,翻译的结果是json格式
print resultJason
js = None
try:
js = json.loads(resultJason) #将json格式的结果转换成Python的字典结构
except Exception, e:
print 'loads Json error.'
print e
continue
key = u"trans_result"
if key in js:
dst = js["trans_result"][0]["dst"] #取得翻译后的文本结果
outStr = dst
print outStr
else:
outStr = quoteStr #如果翻译出错,则输出原来的文本
fout.write(quoteStr+" "+outStr.strip() + '\n') #将结果输出
fin.close()
fout.close()
if __name__ == '__main__':
#translate(sys.argv[1], sys.argv[2])
#通过获得命令行参数获得输入输出文件名来执行,方便
translate1()
translate()
#//
#-*- coding:utf-8 -*-
import sys
reload(sys)
sys.setdefaultencoding("gbk")
#python 2k.py myinput.txt myoutput.txt就能够执行了。myinput.txt 一行一个单词。 最终的翻译结果都写入到输出文件myoutput.txt中了。
import httplib
import md5
import random
import hashlib
import json #导入json模块
import urllib #导入urllib模块
from urllib2 import Request, urlopen, URLError, HTTPError #导入urllib2模块
sys.path.append("../")
import jieba
def splitSentence(inputFile, outputFile):
fin = open(inputFile, 'r') #以读的方式打开文件
fout = open(outputFile, 'w') #以写得方式打开文件
for eachLine in fin:
line = eachLine.strip().decode('gbk', 'ignore') #去除每行首尾可能出现的空格,并转为Unicode进行处理
wordList = list(jieba.cut(line)) #用结巴分词,对每行内容进行分词
outStr = ''
for word in wordList:
outStr += word
outStr1=outStr
outStr +=chr(10)+chr(10)+ ' '
fout.write(outStr.strip().encode('gbk') + '\n') #将分词好的结果写入到输出文件
fin.close()
fout.close()
def splitSentence1(inputFile, outputFile):
fin = open(inputFile, 'r') #以读的方式打开文件
fout = open(outputFile, 'w') #以写得方式打开文件
for eachLine in fin:
line = eachLine.strip().decode('gbk', 'ignore') #去除每行首尾可能出现的空格,并转为Unicode进行处理
wordList = list(jieba.cut(line)) #用结巴分词,对每行内容进行分词
outStr = ''
for word in wordList:
outStr += word
outStr +=' '
fout.write(outStr.strip().encode('gbk') + '\n') #将分词好的结果写入到输出文件
fin.close()
fout.close()
splitSentence1('myinput.txt', 'myoutput1.txt')
splitSentence('myinput.txt', 'myoutput.txt')
def translate():
inputFile='myoutput.txt'
outputFile='myfanyi.txt'
fin = open(inputFile, 'r') #以读的方式打开输入文件
fout = open(outputFile, 'w') #以写的方式代开输出文件
for eachLine in fin: #按行读入文件
quoteStr = eachLine.strip() #去除每行首尾可能的空格等
#将读入的每行内容转换成特定的格式进行翻译
appid = '20160117000009037'
secretKey = 'HLiuzw_fNZMxgoZNj0pX'
httpClient = None
myurl = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
fromLang = 'en'
toLang = 'zh'
salt = random.randint(32768, 65536)
sign = appid+quoteStr+str(salt)+secretKey
m1 = md5.new()
m1.update(sign.encode(encoding='utf-8'))
sign = m1.hexdigest()
myurl = myurl+'?appid='+appid+'&q='+urllib.quote(quoteStr)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
try:
resultPage = urlopen(myurl) #调用百度翻译API进行批量翻译
except HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except URLError as e:
print('We failed to reach a server.')
print('Reason: ', e.reason)
except Exception, e:
print 'translate error.'
print e
continue
resultJason = resultPage.read().decode('utf-8') #取得翻译的结果,翻译的结果是json格式
print resultJason
js = None
try:
js = json.loads(resultJason) #将json格式的结果转换成Python的字典结构
except Exception, e:
print 'loads Json error.'
print e
continue
key = u"trans_result"
if key in js:
dst = js["trans_result"][0]["dst"] #取得翻译后的文本结果
outStr = dst
print outStr
else:
outStr = quoteStr #如果翻译出错,则输出原来的文本
fout.write(quoteStr+" "+outStr.strip() + '\n') #将结果输出
fin.close()
fout.close()
def translate1():
inputFile='myoutput1.txt'
outputFile='myfanyi1.txt'
fin = open(inputFile, 'r') #以读的方式打开输入文件
fout = open(outputFile, 'w') #以写的方式代开输出文件
for eachLine in fin: #按行读入文件
quoteStr = eachLine.strip() #去除每行首尾可能的空格等
#将读入的每行内容转换成特定的格式进行翻译
appid = '20160117000009037'
secretKey = 'HLiuzw_fNZMxgoZNj0pX'
httpClient = None
myurl = 'http://api.fanyi.baidu.com/api/trans/vip/translate'
fromLang = 'en'
toLang = 'zh'
salt = random.randint(32768, 65536)
sign = appid+quoteStr+str(salt)+secretKey
m1 = md5.new()
m1.update(sign.encode(encoding='utf-8'))
sign = m1.hexdigest()
myurl = myurl+'?appid='+appid+'&q='+urllib.quote(quoteStr)+'&from='+fromLang+'&to='+toLang+'&salt='+str(salt)+'&sign='+sign
try:
resultPage = urlopen(myurl) #调用百度翻译API进行批量翻译
except HTTPError as e:
print('The server couldn\'t fulfill the request.')
print('Error code: ', e.code)
except URLError as e:
print('We failed to reach a server.')
print('Reason: ', e.reason)
except Exception, e:
print 'translate error.'
print e
continue
resultJason = resultPage.read().decode('utf-8') #取得翻译的结果,翻译的结果是json格式
print resultJason
js = None
try:
js = json.loads(resultJason) #将json格式的结果转换成Python的字典结构
except Exception, e:
print 'loads Json error.'
print e
continue
key = u"trans_result"
if key in js:
dst = js["trans_result"][0]["dst"] #取得翻译后的文本结果
outStr = dst
print outStr
else:
outStr = quoteStr #如果翻译出错,则输出原来的文本
fout.write(quoteStr+" "+outStr.strip() + '\n') #将结果输出
fin.close()
fout.close()
if __name__ == '__main__':
#translate(sys.argv[1], sys.argv[2])
#通过获得命令行参数获得输入输出文件名来执行,方便
translate1()
translate()