python爬虫下载-python爬虫下载壁纸图片

#!/usr/bin/env python#-*- coding:utf-8 -*-#Author: ss

from bs4 importBeautifulSoupimportrequestsimporttimeimportrandomimportos

my_headers=["Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36","Mozilla/5.0 (Windows NT 6.1; WOW64; rv:30.0) Gecko/20100101 Firefox/30.0"

"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14","Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; Win64; x64; Trident/6.0)"]

headers={'User-Agent':random.choice(my_headers)

}defDownloads(url,abc,title1):

data= requests.get(url,headers=headers)

time.sleep(1)

with open('.\壁纸\' + title1 + '\' + str(abc) + '.jpg','wb+') as f:

f.write(data.content)defget_image(url,abc,title1):#url = 'http://desk.zol.com.cn/showpic/1366x768_89338_102.html'

data = requests.get(url, headers=headers)

soup= BeautifulSoup(data.text, 'lxml')

url= soup.select('img')[0].get('src')print('正在下载{}第{}张'.format(title1, abc))

Downloads(url,abc,title1)defget_image_url(url,abc):#url = 'http://desk.zol.com.cn/bizhi/7254_89744_2.html'

data = requests.get(url,headers=headers)

soup= BeautifulSoup(data.text,'lxml')

time.sleep(2)

url= 'http://desk.zol.com.cn' + soup.select('dd#tagfbl > a#1366x768')[0].get('href')

title1= soup.select('a#titleName')[0].textif not os.path.exists('.\壁纸\' +title1):

os.mkdir('.\壁纸\' +title1)

get_image(url,abc,title1)defget_one_urls(url):#url = 'http://desk.zol.com.cn/bizhi/7211_89338_2.html'

data = requests.get(url,headers=headers)

soup= BeautifulSoup(data.text,'lxml')

urlss= soup.select('div.photo-list-box > ul.clearfix')

title1= soup.select('a#titleName')[0].textprint('正在下载{}'.format(title1))

abc=0for urls inurlss:

urls= urls.select('li > a')for url inurls:try:

url= 'http://desk.zol.com.cn' + url.get('href')

time.sleep(1)

abc+= 1get_image_url(url,abc)except:continue

defget_urls(url):#url = 'http://desk.zol.com.cn/fengjing/1366x768/'

data = requests.get(url,headers=headers)

soup= BeautifulSoup(data.text.encode('ISO-8859-1').decode('GB18030'),'lxml')

urls= soup.select('li.photo-list-padding > a')#titles = soup.select('li.photo-list-padding > a.pic > span > em')#urls = soup.select('body > div.wrapper.top-main.clearfix > div.main > ul > li > a')[0].get('href')

for url inurls:try:

time.sleep(1)

url= 'http://desk.zol.com.cn' + url.get('href')

get_one_urls(url)except:continue

defurls():for i in range(10):try:

url= 'http://desk.zol.com.cn/fengjing/1366x768/' + str(i) + '.html'time.sleep(1)

get_urls(url)except:continueurls()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值