爬取前程无忧网

 这种写法会报错误

"""
目标内容:
职位名称--公司名称--地点--薪资--发布时间
内容处理:
地点只要市,不要区
薪资转成数字类型
"""
import requests,re,time
from lxml import etree

for page in range(1,101):
    url = 'https://search.51job.com/list/010000%252C020000%252C030200%252C040000%252C180200,000000,0000,00,9,99,python,2,'+str(page)+'.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
    response = requests.get(url)
    root = etree.HTML(response.content)

    job_list = root.xpath("//div[@id='resultList']/div[@class='el']")
    for job in job_list:
        #岗位名称
        name = job.xpath("p/span/a/@title")
        name = name[0] if name else '暂不显示'

        #公司名称
        company = job.xpath("span[@class='t2']/a/text()")
        company = company[0] if company else '暂不显示'

        #工作地点
        address = job.xpath("span[@class='t3']/text()")
        address = address[0] if address else '暂不显示'
        address = address.split('-')[0]

        #薪资
        money = job.xpath("span[@class='t4']/text()")
        # if len(money) == 0:
        #     money = '面议'
        # else:
        #     money = money[0]
        #简化写法
        money = money[0] if money else '面议'
        min_money = 0
        max_money = 0
        if '-' in money:
            pattern = re.compile('[-万/月千]')
            result = pattern.split(money)
            if '万' in money:
                min_money = float(result[0]) * 10000
                max_money = float(result[1]) * 10000
            elif '千' in money:
                min_money = float(result[0]) * 1000
                max_money = float(result[1]) * 1000
            if '年' in money:
                min_money = min_money // 12
                max_money = max_money // 12
        min_money = str(min_money)
        max_money = str(max_money)

        #发布日期
        date = job.xpath("span[@class='t5']/text()")
        date = date[0] if date else '暂不显示'
        print(name,company,address,min_money+'-'+max_money,date)
        # time.sleep(3)

        file = open('前程无忧.csv','a',encoding='gb18030')
        file.write(name+','+company+','+address+','+min_money+','+max_money+','+date)
        file.write('\n')
        file.close()

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值