本文出处:http://blog.csdn.net/qq_27512671/article/details/78022625
效果图
都让让都让让,老司机先来一发效果图源码最下方
实现思路分为三步走:
1. 获取网页数据源
2. 解析网页源数据,获得所有的图片地址列表
3. 遍历列表,并将图片保存到本地
实现步骤
获取网页数据
def gethemltext(url):
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
解析网页源数据,获得所有的图片地址列表
def getImageList(html, lst):
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('img')
for i in a:
try:
href = i.attrs['src']
lst.append(href)
except:
continue
遍历列表,并将图片保存到本地
for src in list:
try:
print(root + src)
urllib.request.urlretrieve(root + src, r'D:\pythonPath\%s.jpg' % tmp)
tmp = tmp + 1
print('成功')
except:
print('失败')
print('下载完毕')
实现案例
获取全景网首页所有图片数据
import os
import re
import urllib
import uuid
import requests
from bs4 import BeautifulSoup
from requests import request
urlPath = 'http://www.quanjing.com/'
localPath = 'd:\\pythonPath'
def gethemltext(url):
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
def getImageList(html, lst):
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('img')
for i in a:
try:
href = i.attrs['src']
lst.append(href)
except:
continue
def start():
root = "http://www.quanjing.com/"
html = gethemltext("http://www.quanjing.com/?audience=151316")
list = []
getImageList(html, list)
tmp = 0
for src in list:
try:
print(root + src)
urllib.request.urlretrieve(root + src, r'D:\pythonPath\%s.jpg' % tmp)
tmp = tmp + 1
print('成功')
except:
print('失败')
print('下载完毕')
#开始获取
start()
获取斗鱼神秘主播间头像
import os
import re
import urllib
import uuid
import requests
from bs4 import BeautifulSoup
from requests import request
urlPath = 'http://www.quanjing.com/'
localPath = 'd:\\pythonPath'
def gethemltext(url):
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
def getImageList(html, lst):
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('img')
for i in a:
try:
href = i.attrs['data-original']
lst.append(href)
except:
continue
def start():
root = "http://www.quanjing.com/"
html = gethemltext("https://www.douyu.com/directory/game/yz")
list = []
getImageList(html, list)
tmp = 0
for src in list:
try:
print(root + src)
urllib.request.urlretrieve( src, r'D:\pythonPath\%s.jpg' % tmp)
tmp = tmp + 1
print('成功')
except:
print('失败')
print('下载完毕')
#开始获取
start()