python爬取51jobs一级职类等数据

最新推荐文章于 2024-10-02 10:53:34 发布

fishfishes

最新推荐文章于 2024-10-02 10:53:34 发布

阅读量139

点赞数

文章标签： python xpath

本文链接：https://blog.csdn.net/weixin_43533164/article/details/118413290

版权

一、一级职类、二级职类

代码如下：

# 网站：http://baike.51job.com/zhiwei/all/
import requests
from lxml import etree

url = 'http://baike.51job.com/zhiwei/all/'
req = requests.get(url)
req.encoding = 'gbk'
xhtml = etree.HTML(req.content)


"""  使用Xpath   """
# 一级职类
title = xhtml.xpath("/html/body/div[2]/div/div[2]/div[2]/p[@class='s_jname']")
with open('first.csv', 'w') as f:
    f.write("一级职类" + "\n")
    for i in range(len(title)):
        f.write(str(title[i].text) + '\n')

# 二级职类
node = xhtml.xpath("/html/body/div[2]/div/div[2]/div[2]/div[@class='lts']/a")
with open('second.csv', 'w') as f:
    f.write("二级职类" + "\n")
    for j in range(len(node)):
        f.write(str(node[j].text) + '\n')



"""  使用bs4提取一级职类   """
# from bs4 import BeautifulSoup
# soup = BeautifulSoup(req.text, 'lxml')
# title1 = soup.findAll('p', {'class': 's_jname'})
# with open('xxx.csv', 'w') as fp:
#     for i in title1:
#         fp.write(str(i.text) + '\n')

二、二级以及对应的二级职类的信息

代码如下：

# 网站：  http://baike.51job.com/zhiwei/01071/
import requests
from lxml import etree
from bs4 import BeautifulSoup


url = 'http://baike.51job.com/zhiwei/01071/'
req = requests.get(url)
req.encoding = 'gbk'
soup = BeautifulSoup(req.text, 'lxml')
xhtml = etree.HTML(req.content)

# 岗位名称
job_title = soup.findAll('p', {'class': 'job_name'})
for name in job_title:
    print("岗位名称:", name.text)


# 岗位介绍
job_description = soup.findAll('p', {'class': 'j_exp'})
for description in job_description:
    print("岗位介绍：", description.text)


# 求职指导-岗位要求-不同级别以及相应的学历要求/技能要求/专业
education = xhtml.xpath('//*[@id="job0"]/p/text()')
for e in range(len(education)):
    print(education[e])



# 求职指导-职位工作内容-不同工作年限以及相应的工作内容
content = xhtml.xpath('//*[@id="content1"]/p/text()')
for c in range(len(content)):
    print(content[c])