# -*- coding: utf-8 -*-
import requests
from lxml import etree
import os
if __name__ == '__main__':
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
}
url='http://sc.chinaz.com/jianli/free.html'
page_text=requests.get(url=url,headers=headers).text
tree = etree.HTML(page_text)
div_list=tree.xpath('//div[@id="main"]/div/div')
if not os.path.exists('./muban'):
os.mkdir('./muban')
url_list=[]
name_list=[]
for div in div_list:
muban_href= div.xpath('./a/@href')[0]
#print(muban_href)
download_url = requests.get(url=muban_href,headers=headers).text
#print(download_url)
download_tree = etree.HTML(download_url)
name=download
python爬虫学习(十五)xpath模板下载
最新推荐文章于 2023-08-04 10:44:49 发布