import requests
from lxml import etree
from multiprocessing import Pool
class imgs():
@staticmethod
def download(url,name):
try:
response = requests.get(url,timeout=10)
with open(name + '.jpg', 'wb') as file:
file.write(response.content)
print(name+"下载成功")
except Exception as e:
print(name+"下载失败")
def __init__(self):
self._firsturl='https://fermi.gsfc.nasa.gov/ssc/data/access/lat/10yr_catalog/ap_lcs.php'
pool1=Pool(5)
page_url_list=pool1.map(self._get_imgUrlName,self._get_pageUrl())
for i in page_url_list:
pool2=Pool(5)
for j in i:
pool2.apply_async(self.download,args=(j[0],j[1],))
pool2.close()
pool2.join()
def _get_imgUrlName(self,url):
response=requests.get(url)
html=etree.HTML(response.text)
urls=[("https://fermi.gsfc.nasa.gov/ssc/data/access/lat/10yr_catalog/"+i)for i in html.xpath('//table[@class="styled-table"]//tr/td/a[1]/@href')]
names=html.xpath('//table[@class="styled-table"]//tr/td/b/text()')
urlname=list(zip(urls,names))
print(url+"获取完毕")
return urlname
def _get_pageUrl(self):
response=requests.get(self._firsturl)
html = etree.HTML(response.text)
page_url=[('https://fermi.gsfc.nasa.gov/ssc/data/access/lat/10yr_catalog/'+i)for i in html.xpath('/html/body/div[2]/div[4]/div[2]/p[8]/a/@href')]
return page_url
if __name__=="__main__":
imgs()
效果图