留个作业

爬取风景图片并保存到本地文件夹里

http://pic.fengjing.com/

参考代码

from bs4 import BeautifulSoup
import requests
import time
import os
import threading

def get_html(url):
    try:
        response=requests.get(url)
        response.encoding='gb2312'
        if response.status_code==200:
            print('成功连接!网址为'+url)
            return response.text
    except requests.RequestException:
       return None

def get_url_and_name(url):
    "传入的参数为主页面链接,返回值是一个含有2元素的列表,元素1为图包链接,元素2为图包名"
    html=get_html(url)
    soup=BeautifulSoup(html,'lxml')
    name=[]
    url_1=[]
    list2=soup.find_all(class_='t')
    sign=1
    for item in list2:
        if(sign!=1 and sign!=42):
            url_temp=item.find('a').get('href')
            name_temp=item.find(class_='title').find('a').get('title')
            url_1.append(url_temp)
            name.append(name_temp)
        sign=sign+1
    temp=[url_1,name]
    return temp
def pic_down(url,name,path):
    "url为图包链接,name为图包名,path为储存位置"
    address=[]
    file_folder_name=path+'./'+name
    html1=get_html(url)
    soup=BeautifulSoup(html1,'lxml')
    list4=soup.find(class_='page').find_all('a')
    temp=1
    while(temp<len(list4)):
        if(temp==1):
            url_3=url
        else:
            url_3=url.replace('.html','_'+str(temp)+'.html')
        temp=temp+1
        html2=get_html(url_3)
        soup1=BeautifulSoup(html2,'lxml')
        list3=soup1.find(class_='content').find_all('img')
        for item in list3:
            address.append(item.get('src'))   
    if(os.path.exists(file_folder_name)==True):
        return 0
    os.makedirs(file_folder_name)
    print('正在下载的图包名为'+name)
    index=1
    for i1 in address:
        filename = path+'./'+name+'./'+str(index) +'.jpg'
        with open(filename, 'wb') as f:
            img = requests.get(i1).content
            f.write(img)
        index += 1
        time.sleep(2)
    print(name+'下载完毕!')

def main(i):
    url='https://www.keke234.com/gaoqing/list_5_'+str(i)+'.html'
    path=r'C:\Users\Heisenberg\Desktop\tupian'
    information=get_url_and_name(url)
    print('hello \n',information)
    num=0
    for item in information[0]:    
        threading.Thread(target=pic_down,args=(item,information[1][num],path)).start()   
        num=num+1
        time.sleep(2)            

if __name__ == '__main__':
    for i in range(2,5):
        main(i)

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值