python爬取51jobs一级职类等数据

一、一级职类、二级职类

代码如下:

# 网站:http://baike.51job.com/zhiwei/all/
import requests
from lxml import etree

url = 'http://baike.51job.com/zhiwei/all/'
req = requests.get(url)
req.encoding = 'gbk'
xhtml = etree.HTML(req.content)


"""  使用Xpath   """
# 一级职类
title = xhtml.xpath("/html/body/div[2]/div/div[2]/div[2]/p[@class='s_jname']")
with open('first.csv', 'w') as f:
    f.write("一级职类" + "\n")
    for i in range(len(title)):
        f.write(str(title[i].text) + '\n')

# 二级职类
node = xhtml.xpath("/html/body/div[2]/div/div[2]/div[2]/div[@class='lts']/a")
with open('second.csv', 'w') as f:
    f.write("二级职类" + "\n")
    for j in range(len(node)):
        f.write(str(node[j].text) + '\n')



"""  使用bs4提取一级职类   """
# from bs4 import BeautifulSoup
# soup = BeautifulSoup(req.text, 'lxml')
# title1 = soup.findAll('p', {'class': 's_jname'})
# with open('xxx.csv', 'w') as fp:
#     for i in title1:
#         fp.write(str(i.text) + '\n')

二、 二级以及对应的二级职类的信息

代码如下:

# 网站:  http://baike.51job.com/zhiwei/01071/
import requests
from lxml import etree
from bs4 import BeautifulSoup


url = 'http://baike.51job.com/zhiwei/01071/'
req = requests.get(url)
req.encoding = 'gbk'
soup = BeautifulSoup(req.text, 'lxml')
xhtml = etree.HTML(req.content)

# 岗位名称
job_title = soup.findAll('p', {'class': 'job_name'})
for name in job_title:
    print("岗位名称:", name.text)


# 岗位介绍
job_description = soup.findAll('p', {'class': 'j_exp'})
for description in job_description:
    print("岗位介绍:", description.text)


# 求职指导-岗位要求-不同级别以及相应的学历要求/技能要求/专业
education = xhtml.xpath('//*[@id="job0"]/p/text()')
for e in range(len(education)):
    print(education[e])



# 求职指导-职位工作内容-不同工作年限以及相应的工作内容
content = xhtml.xpath('//*[@id="content1"]/p/text()')
for c in range(len(content)):
    print(content[c])

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值