抓取百度翻译时,浏览器发送的http请求。
模拟这个发送请求,获取翻译结果。
python代码:
#!/usr/bin/env python
#coding:utf-8
#Author:liushsh
import sys
import logging
import logging.config
import re
import cookielib
import urllib,urllib2
import random
import json
logging.config.fileConfig("conf/logger.conf")
logger = logging.getLogger('root')
def Translation(en_str,zh_str):
# set cookie process
cj = cookielib.CookieJar()
cookie_support = urllib2.HTTPCookieProcessor(cj)
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
# simulation browse load host url,get cookie
# http://fanyi.baidu.com/v2transapi?from=en&to=zh&query=hello%20world&transtype=hash
baidu_translation='''http://fanyi.baidu.com/v2transapi'''
post_param={'from':'en','to':'zh','query':en_str,'transtype':'hash'}
post_data=urllib.urlencode(post_param)
hostUrl = baidu_translation
domain = urllib2.Request(hostUrl,post_data)
f = urllib2.urlopen(domain)
html = f.read()
#logger.info("json:[%s]"%(html))
j = json.loads(html)
if j['trans_result']['data'][0]['dst'] != None:
zh_str[0] = j['trans_result']['data'][0]['dst']
logger.info("[%s]==>[%s]"%(en_str,zh_str[0]))
return True
return False
def main():
argc=len(sys.argv)
if argc < 2:
print "Usage:%s 'hello world'"%(sys.argv[0])
exit(0)
for en_str in sys.argv[1:len(sys.argv)]:
try:
zh_str = [""]
Translation(en_str,zh_str)
print "[%s]==>[%s]"%(en_str,zh_str[0])
except urllib2.URLError,e:
logger.error("[except:%s]"%(e))
except Exception,e1:
logger.error("[except:%s]"%(e1))
if __name__ == "__main__":
main()
运行结果:
liushsh@liushsh:~/python/http_operate/baidu$ python baidu_en_zh_v1.0.py 'hello world' 'world hello'
[hello world]==>[你好世界]
[world hello]==>[世界你好]
现在只做了英文翻译成中文,稍微改下
post_param={'from':'en','to':'zh','query':en_str,'transtype':'hash'}
这一行中的from和to的取值,应该可以用于其他语言之间的翻译。