python 爬图和搞翻译

urllib(package)---URL + lib
URL三部分
1.协议;http,https,ftp,file,ed2k...
2.存放资源的服务器域名系统或IP地址(有时候要包含端口号,各个传输协议都有默认的端口号,如http的默认端口号为80)
3.资源的具体地址,如目录或者文件名等'''
import urllib.request
response=urllib.request.urlopen('http://www.baidu.com')
html=response.read()
print(html)#二进制的html
html=html.decode('utf-8')#解码
print(html)

#下载一只猫
import urllib.request
response = urllib.request.urlopen("http://placekitten.com/g/500/500")
cat_img = response.read()

with open('cat_500_500.jpg','wb') as f:
    f.write(cat_img)
#Open the URL url, which can be either a string or a Request object
req = urllib.request.Request("http://placekitten.com/g/1000/1000")
response=urllib.request.urlopen(req)
cat_img = response.read()

with open('cat_1000_1000.jpg','wb') as f:
    f.write(cat_img)
url=response.geturl()
print(url)
print(response.info())#远程服务器返回的head信息
print(response.getcode())#状态码 200 ok

#有道翻译
HEADERS
General #审查元素 Network
    Request URL:http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule#实现翻译的机制
    Request Method:POST#请求方法
    Status Code:200 OK #状态码
    Remote Address:220.181.76.84:80 #IP
Response Headers
    Connection:keep-alive
    Content-Encoding:gzip
    Content-Type:application/json; charset=utf-8
    Date:Wed, 16 May 2018 13:51:49 GMT
    Server:nginx
    Transfer-Encoding:chunked
    Vary:Accept-Encoding
Request Headers#浏览器(客服端)
    Accept:application/json, text/javascript, */*; q=0.01
    Accept-Encoding:gzip, deflate
    Accept-Language:zh-CN,zh;q=0.8
    Connection:keep-alive
    Content-Length:209
    Content-Type:application/x-www-form-urlencoded; charset=UTF-8
    Cookie:OUTFOX_SEARCH_USER_ID=811929914@10.169.0.84; JSESSIONID=aaaqdrksg4RUz_vQPnPnw; OUTFOX_SEARCH_USER_ID_NCOO=849759353.5900192; fanyi-ad-id=44545; fanyi-ad-closed=1; ___rl__test__cookies=1526478709532
    Host:fanyi.youdao.com
    Origin:http://fanyi.youdao.com
    Referer:http://fanyi.youdao.com/?keyfrom=fanyi.logo
    User-Agent:Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36 #服务端用来判断非人类访问 屏蔽
    X-Requested-With:XMLHttpRequest
Query String Parameters
    smartresult:dict
    smartresult:rule
From Date#post提交的主要内容
    i:I like AV
    from:AUTO
    to:AUTO
    smartresult:dict
    client:fanyideskweb
    salt:1526478709534
    sign:3baf4e6690a45a3fe7ed1b10bbdcc21d
    doctype:json
    version:2.1
    keyfrom:fanyi.web
    action:FY_BY_CLICKBUTTION
    typoResult:false
import urllib.request
import urllib.parse
import json
while True:
    content=input('请输入需要翻译的内容(q退出):')
    if content == 'q':
        break

    url='http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'
    head={}
    head['User-Agent']='Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'

    data = {}
    data['i']= content
    data['from']='AUTO'
    data['to']='AUTO'
    data['smartresult']='dict'
    data['client']='fanyideskweb'
    data['salt']='1526478709534'
    data['sign']='3baf4e6690a45a3fe7ed1b10bbdcc21d'
    data['doctype']='json'
    data['version']='2.1'
    data['keyfrom']='fanyi.web'
    data['action']='FY_BY_CLICKBUTTION'
    data['typoResult']='false'
    data=urllib.parse.urlencode(data).encode('utf-8')#浏览器看不懂字典,把data转换为ASCII字符串,再编码为utf-8
    
    req = urllib.request.Request(url,data,head)
    #Request.add_header('User-Agent',值)
    response = urllib.request.urlopen(req)
    html = response.read().decode('utf-8')
    #print(html)
    #"translateResult":[[{"src":"I like AV","tgt":"我喜欢AV"}]]  json结构

    target = json.loads(html)#将json中的东东变为python对象
    print('翻译结果:%s'%(target['translateResult'][0][0]['tgt']))
阅读更多
文章标签: Python
个人分类: python爬虫
想对作者说点什么? 我来说一句

没有更多推荐了,返回首页

关闭
关闭
关闭