ucun
2018-05-21 21:34:40 +08:00
@soho176 #1 urlretrieve 下载图片坑多。图片模糊、打不开等等
```python
#!/usr/bin/env python
# -*- coding:utf-8 -*-
from urllib.request import Request,urlopen,urlretrieve
from urllib.error import HTTPError
import re
import os
def getHTMLText(url):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36"}
req = urllib.request.Request(url=url,headers=headers)
try:
with urllib.request.urlopen(req) as f:
return f.read().decode('utf-8')
except HTTPError as e:
print('Error code:',e.code)
def getURLList(html):
regex = r"( http(s?):)([/|.|\w|\s|-])*\.(?:jpg|gif|png)"
lst = []
matches = re.finditer(regex, html, re.MULTILINE)
for x,y in enumerate(matches):
try:
lst.append(str(y.group()))
except:
continue
return sorted(set(lst),key = lst.index)
def download(lst,filepath='img'):
if not os.path.isdir(filepath):
os.makedirs(filepath)
filecounter = len(lst)
filenow = 1
for url in lst:
filename = filepath +'/' + url.split('/')[-1]
opener = urllib.request.build_opener()
opener.addheaders = [("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36")]
urllib.request.install_opener(opener)
urllib.request.urlretrieve(url,filename)
if __name__ == '__main__':
url = input('please input the image url:')
filepath = input('please input the download path:')
html = getHTMLText(url)
lst = getURLList(html)
download(lst,filepath)
```