简介:爬取“个人简历网”中的简历模板并存储到本地(http://www.gerenjianli.com/moban/index.html)
代码:
import requests
from lxml import etree
import os
if __name__ == '__main__':
# 这是只爬取一页数据
# url = 'http://www.gerenjianli.com/moban/index.html'
#
# headers = {
# 'User-Agent':'这里放自己浏览器的UA就行啦'
# }
# # page_text = requests.get(url=url,headers=headers).text
# response = requests.get(url=url, headers=headers)
# # response.encoding = 'utf-8'
# page_text = response.text
#
# tree = etree.HTML(page_text)
# li_list = tree.xpath('//div[@class="list_boby"]/ul[@class="prlist"]/li')
# # print(li_list)
#
# #创建文件夹
# if not os.path.exists('./resumeLibs'):
# os.mkdir('./resumeLibs')
# for li in li_list:
# a = li.xpath('./div/a/@href')[0]