首先我是在网上搜到了一篇博客,这个代码原本是python2的,我给改成了Python3,但是出现了错误
import re
import urllib
import urllib.request
import urllib.parse
#urllib:
#urllib2: The urllib2 module defines functions and classes which help in opening
#URLs (mostly HTTP) in a complex world — basic and digest authentication,
#redirections, cookies and more.
def translate(text):
'''''模拟浏览器的行为,向Google Translate的主页发送数据,然后抓取翻译结果 '''
#text 输入要翻译的英文句子
text_1=text
#'langpair':'en'|'zh-CN'从英语到简体中文
values={'hl':'zh-CN','ie':'UTF-8','text':text_1,'langpair':"'en'|'zh-CN'"}
chaper_url='http://translate.google.cn'
#headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
data = urllib.parse.urlencode(values).encode(encoding='UTF8')
#req = urllib.request.Request(url,data,headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'} )
#模拟一个浏览器
#browser='Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 2.0.50727)'
#req.add_header('User-Agent',browser)
#向谷歌翻译发送请求
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = urllib.request.Request(url=chaper_url, headers=headers)
response=urllib.request.urlopen(req).read()
response=response.decode('utf-8')
#response = urllib.request.urlopen(req)
#读取返回页面
#html=response
#从返回页面中过滤出翻译后的文本
#使用正则表达式匹配
#翻译后的文本是'TRANSLATED_TEXT='等号后面的内容
#.*? non-greedy or minimal fashion
#(?<=...)Matches if the current position in the string is preceded
#by a match for ... that ends at the current position
p=re.compile(r"(?<=TRANSLATED_TEXT=).*?;")