bs4爬取橘子图片

# -*- coding: utf-8 -*-
"""
Created on Mon Jul  8 15:57:13 2019

@author: HY
"""
import requests
from bs4 import BeautifulSoup
import re
import random
import datetime
from urllib import error
import urllib
import lxml
import os

def mm(url):
    src_list = []
    header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'}
    total_page = 12
    for i in list(range(1,total_page + 1)):
        stop = random.uniform(1,3)
        url = "https://www.ivsky.com/tupian/juzi_t22164/index_"+str(i) +".html"
        req = requests.get(url=url,headers=header)
        html = req.text
#        soup = BeautifulSoup(html,"html.parser")
#        src = soup.select('img').get('src')
        s=re.compile(r'<img src="(.*?)"')
        ls = re.findall(s,html)
        src_list.append(ls)
        
    return src_list

def download(url,sl):
    path = 'img\%s.jpg'%sl              	## 保存路径
    urllib.request.urlretrieve(url,path)    ## 下载photos
            
if __name__=='__main__':
    url="https://www.ivsky.com/tupian/juzi_t22164/index_0.html"
    src_list = mm(url)
    for i in range(len(src_list)):
        for j in src_list:
            for e in range(len(j)):
                sl = str(i)+str(len(j))+str(e)
                url = 'http:' + j[e]
                download(url,sl)
                time_str = datetime.datetime.now().isoformat()
                
    print('All Done!')
    
    
    

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值