# coding=utf-8
import urllib.request
import re
html = 'https://coding.imooc.com/'
req = urllib.request.urlopen(html) # 打开网站
buf = req.read() # 把读取的内容缓存在内存上
buf = buf.decode('utf-8') # 把网页上读取到的内容编码为utf-8,否则出现TypeError: cannot use a string pattern on a bytes-like object。
listurl = re.findall(r'\/\/.+\.jpg',buf) # 正则表达式读取以src开头和.jpg结尾的字符串
k = 0
s = 'http:'
for x in listurl: # 进行字符串的拼接,否则会出现ValueError:unknown url type: '//img.mukewang.com/5b5188020001677306000338-240-135.jpg" src="//img.mukewang.com/5b5188020001677306000338-240-135.jpg'
x = s + x
listurl[k] = x
k += 1
# print(listurl)
i = 0
for url in listurl:
f = open(str(i)+'.jpg','wb+') # 文件打开方式有问题,应修改为用二进制方式打开wb+,否则出现TypeError: write() argument must be str, not bytes
req = urllib.request.urlopen(url)
buf = req.read()
f.write(buf)
i += 1