python3.7 爬虫 2 翻译文字

获取json 格式 

import urllib.request
import urllib.parse

# 通过抓包的方式获取的url,并不是浏览器上显示的url
url ="http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&esessionFrom="

# 发送到web服务器的表单数据
data = {};
data['i'] = 'I love fish'
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '1503581407033'
data['sign'] = '67472a1b3638989677f7aca9af3be0aa'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data['typoResult'] = 'true'
data = urllib.parse.urlencode(data).encode('utf-8')

response = urllib.request.urlopen(url, data)

html = response.read().decode('utf-8')

print(html)

完善翻译 

content = input("请输入需要翻译的文字: ")

# 发送到web服务器的表单数据
data = {};
data['i'] = content
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '1503581407033'
data['sign'] = '67472a1b3638989677f7aca9af3be0aa'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_CLICKBUTTION'
data['typoResult'] = 'true'
data = urllib.parse.urlencode(data).encode('utf-8')

response = urllib.request.urlopen(url, data)
html = response.read().decode('utf-8')
target = json.loads(html)

print("翻译结果是:%s"%target['translateResult'][0][0]['tgt'])

 

修改User-Agent

方法1 headers 必须是一个字典的形式

import urllib.request
import urllib.parse
import json
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&sessionFrom="
def main():
    while True:
        content = input("请输出需要翻译的内容(退出输入q):")
        if content == 'q':
            break
    
        data = {};
        data['i'] = content
        data['from'] = 'AUTO'
        data['to'] = 'AUTO'
        data['smartresult'] = 'dict'
        data['client'] = 'fanyideskweb'
        data['salt'] = '1503581407033'
        data['sign'] = '67472a1b3638989677f7aca9af3be0aa'
        data['doctype'] = 'json'
        data['version'] = '2.1'
        data['keyfrom'] = 'fanyi.web'
        data['action'] = 'FY_BY_CLICKBUTTION'
        data['typoResult'] = 'true'
        data = urllib.parse.urlencode(data).encode('utf-8')
 
        ##第一种方法
        headers = {};
        headers['Referer']  = 'http://fanyi.youdao.com'
        headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36  "
        req = urllib.request.Request(url,data,headers)#创建Request对象时将headers参数传入

        response = urllib.request.urlopen(req)
    
        html = response.read().decode('utf-8')
        tar = json.loads(html)
        print("翻译结果是:%s"%tar['translateResult'][0][0]['tgt'])
if __name__=="__main__":
    main()

方法二 就是在Request 对象生成之后,用add_header()方法追加进去:

import urllib.request
import urllib.parse
import json
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&sessionFrom="
def main():
    while True:
        content = input("请输出需要翻译的内容(退出输入q):")
        if content == 'q':
            break
    
        data = {};
        data['i'] = content
        data['from'] = 'AUTO'
        data['to'] = 'AUTO'
        data['smartresult'] = 'dict'
        data['client'] = 'fanyideskweb'
        data['salt'] = '1503581407033'
        data['sign'] = '67472a1b3638989677f7aca9af3be0aa'
        data['doctype'] = 'json'
        data['version'] = '2.1'
        data['keyfrom'] = 'fanyi.web'
        data['action'] = 'FY_BY_CLICKBUTTION'
        data['typoResult'] = 'true'
        data = urllib.parse.urlencode(data).encode('utf-8')
 
        ##第一种方法
        '''headers = {};
        headers['Referer']  = 'http://fanyi.youdao.com'
        headers["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36  "
        req = urllib.request.Request(url,data,headers)#创建Request对象时将headers参数传入
        '''
        ##第二种方法,Requset对象生成之后通过 add_header()方法添加
        req = urllib.request.Request(url,data)
        req.add_header('Referer', 'http://fanyi.youdao.com')
        req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36')

        response = urllib.request.urlopen(req)
    
        html = response.read().decode('utf-8')
        tar = json.loads(html)
        print("翻译结果是:%s"%tar['translateResult'][0][0]['tgt'])
if __name__=="__main__":
    main()

使用代理IP访问

 1) proxy_support = urllib.request.ProxyHandler({ })

#参数是一个字典,字典键值是处里的类型,例如http、ftp、或https, 字典的值就是代理的IP地址的端口号

2) opener = urllib.request.build_opener(proxy_support)

#opener 可以看作是一个私人订制, 当时用urlopen()函数打开一个网页的时候,你就在使用默认的opener在工作;                     #这个opener是可以定制的,例如给它定制特殊的headers,或者给它定制指定的代理IP。

3)urllib.request.install_opener(opener)

将定制好的opener安装到系统中,是一劳永逸的方法。因为在此之后,只需要调用普通的urlopen()函数,就是以定制好的opener进行工作。
这里需要指出的是,安装后会替换掉默认的opener。如果你不想替换默认的opener,你可以在每次特殊需要的时候,用opener.open()的 方法打开网页:

req = urllib.request.Request(url,data)
response = opener.open(req)

下面程序采用代理IP方式 翻译文字

import urllib.request
import urllib.parse
import json
import time  #导入time模块
url = "http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule&sessionFrom="
def main():
    while True:
        content = input("请输出需要翻译的内容(退出输入q):")
        if content == 'q':
            break
    
        data = {};
        data['i'] = content
        data['from'] = 'AUTO'
        data['to'] = 'AUTO'
        data['smartresult'] = 'dict'
        data['client'] = 'fanyideskweb'
        data['salt'] = '1503581407033'
        data['sign'] = '67472a1b3638989677f7aca9af3be0aa'
        data['doctype'] = 'json'
        data['version'] = '2.1'
        data['keyfrom'] = 'fanyi.web'
        data['action'] = 'FY_BY_CLICKBUTTION'
        data['typoResult'] = 'true'
        data = urllib.parse.urlencode(data).encode('utf-8')
 
        proxy_support = urllib.request.ProxyHandler({"https": "222.161.16.10:9999"})#ProxyHandler来设置使用代理服务器
        opener = urllib.request.build_opener(proxy_support)
        opener.addheaders = [('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36')]
        urllib.request.install_opener(opener)
        
        try:
            print("正在尝试使用代理IP访问...")
            req = urllib.request.Request(url,data)
            #req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36')
            response = urllib.request.urlopen(req)
        except urllib.error.URLError:
            print("访问出错...")
        else:
            print("采用代理服务器访问成功...")
        
        html = response.read().decode('utf-8')
        tar = json.loads(html)
        print("翻译结果是:%s"%tar['translateResult'][0][0]['tgt'])
    
if __name__=="__main__":
    main()

 

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值