python爬去百度图片_python 爬取百度图片

import requests

from bs4 import BeautifulSoup

import re

import os

import json

from urllib import parse

headers='''

Accept-Ranges: bytes

Access-Control-Allow-Origin: *

Age: 570820

Cache-Control: max-age=2628000

Connection: keep-alive

Content-Length: 45163

Content-Type: image/jpeg

Date: Sat, 11 May 2019 06:17:00 GMT

ETag: 3448023fd5dc275ff4088c50d1da7d5f

Expires: Tue, 04 Jun 2019 01:43:20 GMT

Last-Modified: Thu, 01 Jan 1970 00:00:00 GMT

Ohc-Response-Time: 1 0 0 0 0 0

Server: JSP3/2.0.14

'''

class DownBaiDuImg(object):

listheader='''

Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8

Accept-Encoding: gzip, deflate

Accept-Language: zh-CN,zh;q=0.9,en;q=0.8

Cache-Control: max-age=0

Connection: keep-alive

Cookie: BDIMGISLOGIN=0; winWH=%5E6_1366x631; BDqhfp=%E6%AF%94%E5%9F%BA%E5%B0%BC%26%26-10-1undefined%26%260%26%261; BAIDUID=ED5602028E2013468035151C8C3C3A53:FG=1; BIDUPSID=ED5602028E2013468035151C8C3C3A53; PSTM=1552569672; BDSFRCVID=ZoFOJeC62GC4q3c9ZolNh5mNHGcamB3TH6aoUWSSBZNRGvSy07o7EG0PqU8g0Kub55HBogKK0mOTHv8F_2uxOjjg8UtVJeC6EG0P3J; H_BDCLCKID_SF=tJAq_D0hfIP3fP36q45Mq4tHen6y0fRZ5mAqoq3nJPD5HITLhPvFM5LDX47x5-oL0J7naIQqaM5RVUOtWxTCQnK92H0f25b43bRTQxKy5KJvfJ_Gjf7IhP-UyN3LWh37bJblMKoaMp78jR093JO4y4Ldj4oxJp8eWJQ2QJ8BJI02MDJP; BDUSS=k5MTWt1V2RvRHRBMVBrUVFMeURRY243ZWRMNDEtMkg1Mm94VnNYcVp5cUh5cmxjQVFBQUFBJCQAAAAAAAAAAAEAAAA64oOWs8y36rPYMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAIc9klyHPZJcW; uploadTime=1557547291054; cleanHistoryStatus=0; BDRCVFR[Tp5-T0kH1pb]=mk3SLVN4HKm; delPer=0; PSINO=1; BDRCVFR[CCf63Vmik7b]=mk3SLVN4HKm; BDRCVFR[dG2JNJb_ajR]=mk3SLVN4HKm; H_PS_PSSID=1441_28939_28981_21126_28519_28775_28723_28963_28836_28585_26350_22157; indexPageSugList=%5B%22%E6%AF%94%E5%9F%BA%E5%B0%BC%22%2C%22%E7%BE%8E%E5%A5%B3%22%5D

Host: image.baidu.com

Upgrade-Insecure-Requests: 1

User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.75 Safari/537.36

'''

"""docstring for DownBaiDuImg"""

def __init__(self,header,kw):

super(DownBaiDuImg, self).__init__()

self.heades=self.gen_headers(header)

self.num=0

self.kw=parse.quote(kw)

def gen_headers(self,s):

ls = s.split('\n')

lsl = []

ls = ls[1:-1]

headers = {}

for l in ls:

l = l.split(': ')

lsl.append(l)

for x in lsl:

headers[str(x[0]).strip(' ')] = x[1]

return headers

def downimg(self,url,name):

try:

content=requests.get(url,timeout=2).content

with open('../images/'+name,'wb') as f:

f.write(content)

f.close()

return True

except Exception as e:

return False

else:

pass

finally:

pass

def doing(self,page):

listheader=self.gen_headers(self.listheader);

page=str(page)

# print('http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+self.kw+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&hd=&latest=&copyright=&word='+self.kw+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&cg=girl&pn=60&rn='+page)

text=requests.get('http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord='+self.kw+'&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1&z=&ic=&hd=&latest=&copyright=&word='+self.kw+'&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=&fr=&expermode=&force=&cg=girl&pn=60&rn='+page,headers=listheader)

text=text.text

if json.loads(text)['data']:

for x in json.loads(text)['data']:

if 'thumbURL' in x.keys():

h=parse.quote(x['hoverURL'])

g=parse.quote(x['thumbURL'])

imgurl='http://image.baidu.com/search/down?tn=download&ipn=dwnl&word=download&ie=utf8&fr=result&url='+h+'&thumburl='+g

exe=os.path.splitext(imgurl)[-1]

self.num+=1

filename=str(self.num)+exe;

if self.downimg(imgurl,filename):

print('下载成功')

else:

print('下载失败')

obj=DownBaiDuImg(headers,'绿色护眼壁纸大全')

for x in range(0,5):

obj.doing(x*30)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值