python爬取妹子图片2_利用爬虫爬取清纯妹子图片

weixin_39640090

于 2020-12-07 23:55:00 发布

阅读量58

点赞数

文章标签： python爬取妹子图片2

该楼层疑似违规已被系统折叠隐藏此楼查看此楼

from bs4 import BeautifulSoup

import requests

import time

import os

def get_html(url):

try:

response=requests.get(url)

response.encoding='gb2312'

if response.status_code==200:

print('成功连接！网址为'+url)

return response.text

except requests.RequestException:

return None

def get_url_and_name(url):

"传入的参数为主页面链接，返回值是一个含有2元素的列表，元素1为图包链接，元素2为图包名"

html=get_html(url)

soup=BeautifulSoup(html,'lxml')

name=[]

url_1=[]

list2=soup.find_all(class_='t')

sign=1

for item in list2:

if(sign!=1 and sign!=42):

url_temp=item.find('a').get('href')

name_temp=item.find(class_='title').find('a').get('title')

url_1.append(url_temp)

name.append(name_temp)

sign=sign+1

temp=[url_1,name]

return temp

def get_pic_url(url):

"传入的参数为图包的链接,返回值为图包的所有图片的链接"

address=[]

html1=get_html(url)

soup=BeautifulSoup(html1,'lxml')

list4=soup.find(class_='page').find_all('a')

temp=1

while(temp

if(temp==1):

url_3=url

else:

url_3=url.replace('.html','_'+str(temp)+'.html')

temp=temp+1

html2=get_html(url_3)

soup1=BeautifulSoup(html2,'lxml')

list3=soup1.find(class_='content').find_all('img')

for item in list3:

address.append(item.get('src'))

return address

def pic_download(url,name,path):

"url为一个图包的所有图片链接的列表,name为图包的名字，path为下载的目录"

os.mkdir(path+'./'+name)

//因为使用的是mkdir函数，所以需要保证要创建的文件夹不能存在，否则会报错

print('正在下载的图包名为'+name)

index=1

for i1 in url:

filename = path+'./'+name+'./'+str(index) +'.jpg'

with open(filename, 'wb') as f:

img = requests.get(i1).content

f.write(img)

index += 1

time.sleep(2)

print(name+'下载完毕!')

def main(i):

//i为该图站首页的页数(第几页)

url='https://www.keke234.com/gaoqing/list_5_'+str(i)+'.html'

path=r'N:\pic_download'

//path为自定义路径

information=get_url_and_name(url)

num=0

for item in information[0]:

address=get_pic_url(item)

pic_download(address,information[1][num],path)

num=num+1

if __name__ == '__main__':

for i in range(1,2):

main(i)

weixin_39640090

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。