import urllib.request
import os
import re
# 开拓明天网址
url = r'https://www.up72.com/'
# 请求该网址
response = urllib.request.urlopen(url)
# 读取该网址
html = response.read().decode('utf-8')
# 切换文件夹
os.chdir('kaituo')
# 分析三种图片模式
# /kt-picture/process.gif png gif
# kt-picture/kt-case-01.jpg gif png
# /upload/20180427/vTuWkjPI0wRwyduE.jpeg
# 建立三种正则表达式
pattern01 = re.compile(r'(/kt-picture.*?gif|/kt-picture.*?jpg|/kt-picture.*?png)')
pattern02 = re.compile(r'(kt-picture/kt.*?gif|kt-picture/kt.*?jpg|kt-picture/kt.*?png)')
pattern03 = re.compile(r'(/upload/.*?jpeg|/upload/.*?jpg)')
# 匹陪三个正则 获取图片名列表
result01 = pattern01.findall(html)
result02 = pattern02.findall(html)
result03 = pattern03.findall(html)
def url_photo(list):
# 建立图片的url
for i in list:
if i[0] == '/':
url_ph = r'https://www.up72.com'+i
else:
url_ph = r'https://www.up72.com/'+i
headers = {
'User-Agnet': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'}
# 设置一个请求体
req = urllib.request.Request(url_ph, headers=headers)
# 发起请求
response = urllib.request.urlopen(req)
data = response.read()
filename = i.split('/')[-1]
with open(filename,'wb')as f:
f.write(data)
# 调用函数 获取图片
url_photo(result01)
url_photo(result02)
url_photo(result03)
爬取‘’开图明天‘’图片实例
最新推荐文章于 2020-11-28 14:25:38 发布