站长素材网站图片自动下载教程
import urllib.request
import urllib.parse
from lxml import etree
headers={
'User-Agent':
'#放入自己的ua'
}
def creat_request(page):
if (page==1):
url='http://sc.chinaz.com/tupian/siwameinvtupian.html'
else:
url='http://sc.chinaz.com/tupian/siwameinvtupian_{}.html'.format(str(page))
request = urllib.request.Request(url=url, headers=headers)
return request
def get_content(request):
response=urllib.request.urlopen(request)
content=response.read().decode('utf-8')
return content
def downlod(page,content):
tree=etree.HTML(content)
name_list=tree.xpath('/html/body/div[3]/div/div/div/a/text()')
downlod_url=tree.xpath('/html/body/div[3]/div[2]/div/img/@data-original')
for i in range(len(name_list)):
name=name_list[i]
url='http:'+downlod_url[i]
urllib.request.urlretrieve(url=url,filename='./imag/'+name+'.jpg')
if __name__=='__main__':
start_page=int(input("请输入起始页:"))
end_page=int(input("请输入结束页:"))
for page in range(start_page,end_page+1):
request=creat_request(page)
content=get_content(request)
downlod(page, content)