import urllib
import urllib.request
import re
def get_save(url,picname):
response=urllib.request.urlopen(url)
data=response.read()
f=open(picname,'wb')
f.write(data)
f.close()
def get_all_picurl(url):
#write the html to file
response=urllib.request.urlopen(url)
data=response.read()
f=open('C:\\xx.txt','wb')
f.write(data)
f.close()
#read to data and extract the img url
f=open('c:\\xx.txt','r',encoding='utf-8')
data=f.read()
pattern = re.compile(r'src="(data/attachment/forum/.{20,40}\.jpg)')
result = re.findall(pattern,data,0)
f.close()
return result
url_header='http://bbs.chinanews.com/'
url_first_page='http://bbs.chinanews.com/picview-185-4977929-1.shtml#'
picurls=get_all_picurl(url_first_page)
count=0
for a_url in picurls:
count+=1
print('processing'+' '+str(count)+":"+a_url+'...'+'\n')
url_pic=url_header+a_url
name_pic='C:\\Users\\Administrator\\Desktop\\chinanews\\'+str(count)+'.jpg'
get_save(url_pic,name_pic)
python获取某网址下所有图片
最新推荐文章于 2023-09-25 15:56:31 发布