1.环境python.36,PyCharm
2.分析URL,得到URL=https://list.jd.com/list.html?cat=9987,653,655&page=1 这种格式
3观察图片链接,img开始.jpg结尾
4.代码
import re
import urllib.request
import traceback
import os
for num in range(1, 20):
url = "https://list.jd.com/list.html?cat=9987,653,655"
pageurl = url + "&page=" + str(num)
response = urllib.request.urlopen(pageurl)
content = response.read().decode('utf-8')
pattern = '\/\/(img.*?\.jpg)'
urls = re.findall(pattern, content)
count = 1;
base = "F:/Python/img/"
for url in urls:
newurl = "http://" + url
end = base + str(num) + "page"
if (os.path.isdir(end) == False):
os.makedirs(end)
name = end + "/" + str(count) + ".jpg"
try:
urllib.request.urlretrieve(newurl, name)
count = count + 1
except:
print(traceback.format_exc())
num = num + 1;