#!/usr/bin/env python
# -*- coding:utf-8 -*-
#需求:解析下载页面的链接地址http://pic.netbian.com/4kmeinv/
# 对模板的首页进行解析
# 每个简历模板的详情页拿到
# 解析下载地址的链接
import requests
from lxml import etree
if __name__ == '__main__':
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
url = 'http://sc.chinaz.com/jianli/free.html'
page_text = requests.get(url=url, headers=headers).text
tree = etree.HTML(page_text)
# a_list = tree.xpath('//div[@class="main_list jl_main"]/div[1]/a/@href')
# print(a_list)
# for a in a_list:
div_list = tree.xpath('//div[@class="main_list jl_main"]/div')
for a_list in div_list:
#取得免费模板的详细页面的链接地址
href_list = a_list.xpath('./a/@href')
for detail_list_url in href_list:
detail_text = requests.get(url=detail_list_url, headers=headers).text
detail_tree = etree.HTML(detail_text)
download_url=detail_tree.xpath('//div[@class="clearfix mt20 downlist"]/ul/li[1]/a/@href')
print(download_url)
python 爬取简历模板的下载地址
最新推荐文章于 2021-08-30 15:38:30 发布