该楼层疑似违规已被系统折叠 隐藏此楼查看此楼
from bs4 import BeautifulSoup
import requests
import time
import os
def get_html(url):
try:
response=requests.get(url)
response.encoding='gb2312'
if response.status_code==200:
print('成功连接!网址为'+url)
return response.text
except requests.RequestException:
return None
def get_url_and_name(url):
"传入的参数为主页面链接,返回值是一个含有2元素的列表,元素1为图包链接,元素2为图包名"
html=get_html(url)
soup=BeautifulSoup(html,'lxml')
name=[]
url_1=[]
list2=soup.find_all(class_='t')
sign=1
for item in list2:
if(sign!=1 and sign!=42):
url_temp=item.find('a').get('href')
name_temp=item.find(class_='title').find('a').get('title')
url_1.append(url_temp)
name.append(name_temp)
sign=sign+1
temp=[url_1,name]
return temp
def get_pic_url(url):
"传入的参数为图包的链接,返回值为图包的所有图片的链接"
address=[]
html1=get_html(url)
soup=BeautifulSoup(html1,'lxml')
list4=soup.find(class_='page').find_all('a')
temp=1
while(temp
if(temp==1):
url_3=url
else:
url_3=url.replace('.html','_'+str(temp)+'.html')
temp=temp+1
html2=get_html(url_3)
soup1=BeautifulSoup(html2,'lxml')
list3=soup1.find(class_='content').find_all('img')
for item in list3:
address.append(item.get('src'))
return address
def pic_download(url,name,path):
"url为一个图包的所有图片链接的列表,name为图包的名字,path为下载的目录"
os.mkdir(path+'./'+name)
//因为使用的是mkdir函数,所以需要保证要创建的文件夹不能存在,否则会报错
print('正在下载的图包名为'+name)
index=1
for i1 in url:
filename = path+'./'+name+'./'+str(index) +'.jpg'
with open(filename, 'wb') as f:
img = requests.get(i1).content
f.write(img)
index += 1
time.sleep(2)
print(name+'下载完毕!')
def main(i):
//i为该图站首页的页数(第几页)
url='https://www.keke234.com/gaoqing/list_5_'+str(i)+'.html'
path=r'N:\pic_download'
//path为自定义路径
information=get_url_and_name(url)
num=0
for item in information[0]:
address=get_pic_url(item)
pic_download(address,information[1][num],path)
num=num+1
if __name__ == '__main__':
for i in range(1,2):
main(i)