前程无忧 链接 序号

from urllib.request import Request, urlopen
import bs4
import requests
import re
import json
import xlwt

workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('1')

worksheet.write(0, 0, label='序号')
worksheet.write(0, 1, label='工作名称')
worksheet.write(0, 2, label='公司名称')
worksheet.write(0, 3, label='工作地区')
worksheet.write(0, 4, label='公司属性')
worksheet.write(0, 5, label='职位要求')
worksheet.write(0, 6, label='工作链接')
worksheet.write(0, 7, label='职责要求')
z = 1


for x in range(1,6):
    url=('https://search.51job.com/list/080200,000000,0000,00,9,99,%25E5%25A4%25A7%25E6%2595%25B0%25E6%258D%25AE,2,'+str(x))+'.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
    header = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
    ret = Request(url=url, headers=header)
    html = urlopen(ret)
    bs = bs4.BeautifulSoup(html, 'html.parser')
    allinfos=bs.find_all('script', type="text/javascript")

    for i in allinfos:
        t = i.get_text()
        if len(t) > 0:
            t = t.replace('window.__SEARCH_RESULT__ = ', '')
            x = json.loads(t)
            y = x["engine_search_result"]


            for j in range(1, len(y)):
                company_names = y[j]['company_name']
                job_names = y[j]['job_name']
                salarys = y[j]['providesalary_text']
                hrefs = y[j]['job_href']

                worksheet.write(z, 0, z)
                worksheet.write(z, 1, y[j]['job_name'])
                worksheet.write(z, 2, y[j]['company_name'])
                worksheet.write(z, 3, y[j]['workarea_text'])
                worksheet.write(z, 4, y[j]['companytype_text'])
                worksheet.write(z, 5, y[j]['attribute_text'])
                worksheet.write(z, 6, y[j]['job_href'])
                urls = hrefs
                infos = Request(urls, headers=header)
                htmls = urlopen(infos)
                bss = bs4.BeautifulSoup(htmls, 'html.parser')
                try:
                    texts = bss.find('div', {"class": 'bmsg job_msg inbox'}).get_text().split()
                    job_requests = "".join(texts)
                    worksheet.write(z, 7, label=job_requests)
                except:
                    worksheet.write(z, 7, label=' ')
                z+=1
    workbook.save('qcwygz.xls')

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值