直接上代码!!!
这个爬虫还算成功,有没有大佬指导一下
# -*- coding: utf-8 -*-
import requests,re,os,time
from lxml import etree
if not os.path.exists('F:/python/7-100个爬虫/2-峰鸟网图'):
os.mkdir('F:/python/7-100个爬虫/2-峰鸟网图')
def picture_url():
urls = 'http://bbs.fengniao.com/forum/forum_101.html'
page_text = requests.get(url=urls).text
tree = etree.HTML(page_text)
url_name = tree.xpath('/html/body/div[8]/ul/li/h3/a/@title')
url_page = tree.xpath('/html/body/div[8]/ul/li/h3/a/@href')
for i,j in zip(url_name,url_page):
if not os.path.exists(f'F:/python/7-100个爬虫/2-峰鸟网图/{i}'):
os.mkdir(f'F:/python/7-100个爬虫/2-峰鸟网图/{i}')
download(i,'http://bbs.fengniao.com'+j)
print('下载完成!!!')
def download(i,url):
headers = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'}
page_text = requests.get(url=url,headers=headers).text
data_url = re.findall(r'(https://bbs.qn.img-space.com.*?.jpg)\?',page_text)
a = 0
for j in data_url:
a+=1
data = requests.get(url=j).content
with open(f'F:/python/7-100个爬虫/2-峰鸟网图/{i}/{a}.jpg','wb') as wj:
wj.write(data)
print(f'{a}.jpg下载完成!!!')
time.sleep(0.5)
if __name__ == '__main__':
picture_url()