zhaopin

import requests
from lxml import etree
import re
import json
import csv
import time
header = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36',
    "cookie":"urlfrom=121113803; urlfrom2=121113803; adfbid=0; adfbid2=0; x-zp-client-id=8b91112b-10f9-4f3a-9a79-4bd8a65d2cda; sts_deviceid=17aaa2c6e37e5-00a2a5f9c93e51-3d385d08-2073600-17aaa2c6e38e6; sts_sg=1; sts_sid=17aaa2c6e3e2e-01c204e0824154-3d385d08-2073600-17aaa2c6e3f67; sts_chnlsid=121113803; zp_src_url=https%3A%2F%2Fwww.baidu.com%2Fother.php%3Fsc.Kf00000TzIOILY9n8fGUSPnvCjXz91Lzvs6CTU8a9qXQHnXh304oKtchuMdbk0h58txcjCiA43eZogbpDqhsejLaZ4NjTKUk0RIjnVr23jt4iPIloCHYeEkgElqClG0pz9t_jJkpHlyDWNfUn7r2vu-irLpSqAISdpA6QjJ8N3lhSgiycfJs2ZUZcIX1wueINkg6CT08BwndmxiosuLmVe7Zzgos.7Y_NR2Ar5Od669BCXgjRzeASFDZtwhUVHf632MRRt_Q_DNKnLeMX5Dkgbooo3eQr5gKPwmJCRnTxOoKKsTZK4TPHQ_U3bIt7jHzk8sHfGmEukmnTr59l32AM-YG8x6Y_f3lZgKfYt_QCJamJjArZZsqT7jHzs_lTUQqRHArZ5Xq-dKl-muCyrMWYv0.TLFWgv-b5HDkrfK1ThPGujYknHb0THY0IAYqd_xKJ6KdTvNzgLw4TARqn0K9u7qYXgK-5Hn0IvqzujLyktAJ0ZFWIWYk0ZNzU7qGujYkPHcLnHcsPjbz0Addgv-b5HDYnW6LnH630AdxpyfqnH04PHDYPjb0UgwsU7qGujYknWcLnsKsI-qGujYs0A-bm1dcHbc0TA-b5HDv0APGujY1P1D0mLFW5Hf1P1Ts%26ck%3D5303.1.84.394.155.394.155.87%26dt%3D1626352621%26wd%3D%25E6%2599%25BA%25E8%2581%2594%26tpl%3Dtpl_12273_25457_20875%26l%3D1527120492%26us%3DlinkName%253D%2525E6%2525A0%252587%2525E9%2525A2%252598-%2525E4%2525B8%2525BB%2525E6%2525A0%252587%2525E9%2525A2%252598%2526linkText%253D%2525E3%252580%252590%2525E6%252599%2525BA%2525E8%252581%252594%2525E6%25258B%25259B%2525E8%252581%252598%2525E3%252580%252591%2525E5%2525AE%252598%2525E6%252596%2525B9%2525E7%2525BD%252591%2525E7%2525AB%252599%252520%2525E2%252580%252593%252520%2525E5%2525A5%2525BD%2525E5%2525B7%2525A5%2525E4%2525BD%25259C%2525EF%2525BC%25258C%2525E4%2525B8%25258A%2525E6%252599%2525BA%2525E8%252581%252594%2525E6%25258B%25259B%2525E8%252581%252598%2525EF%2525BC%252581%2526linkType%253D; sajssdk_2015_cross_new_user=1; at=7a097eef75d4468993fb77354277e4d6; rt=8232518f51c14b1b9b791b0a0429e77b; acw_tc=276082a816263529833152876e9fce81f07c590bb3f5b105448f1519351320; ZP-ENV-FLAG=gray; sts_evtseq=3; ZP_OLD_FLAG=false; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221103913743%22%2C%22first_id%22%3A%2217aaa2c70f79d-0e47f11c840521-3d385d08-2073600-17aaa2c70f89ba%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E4%BB%98%E8%B4%B9%E5%B9%BF%E5%91%8A%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_utm_source%22%3A%22baidupcpz%22%2C%22%24latest_utm_medium%22%3A%22cpt%22%7D%2C%22%24device_id%22%3A%2217aaa2c70f79d-0e47f11c840521-3d385d08-2073600-17aaa2c70f89ba%22%7D; _uab_collina=162635303430102616818484; Hm_lvt_38ba284938d5eddca645bb5e02a02006=1626353034; Hm_lpvt_38ba284938d5eddca645bb5e02a02006=1626353034; LastCity=%E6%AD%A6%E6%B1%89; LastCity%5Fid=736"
}
def get_context(number):
    url = "https://fe-api.zhaopin.com/c/i/similar-positions?number="+number
    urll='https://jobs.zhaopin.com/'+number+'.htm'
    html = requests.get(url=url, headers=header)
     #print(html.json()['data']['data']['list'])
    companyName,companyNumber,companySize,salary60,workCity,education,\
    workingExp,property,companyUrl,positionURL,name,welfareLabel,number,cityId,cityDistrict,applyType,score,tag="","","","","","","","","","","","","","","","","",""
    try:
        for i in html.json()['data']['data']['list']:
            companyName = i['companyName']  # 公司
            companyNumber = i['companyNumber']  # ID
            companySize = i['companySize']  # 规模
            salary60 = i['salary60']  # 薪水
            workCity = i['workCity']  # 城市
            education = i['education']  # 学历
            workingExp = i['workingExp']  # 工作经验
            property = i['property']  #企业性质
            companyUrl = i['companyUrl']  # 公司网址
            positionURL = i['positionURL']  # 求职网址
            name = i['name']  # 职位名称
            # welfareLabel = i['welfareLabel']  # 福利
            number = i['number']  # 编号
            cityId = i['cityId']  # 城市id
            cityDistrict = i['cityDistrict']  # 城市区域
            applyType = i['applyType']  # 公司类型
            score = i['score']  # 公司分数
            tag=[] #标签
            for j in i['welfareLabel']:
                tag.append(j['value'])
            tag="/".join(tag)
    except:
        pass

    html = requests.get(url=urll,headers=header)
    html_xpath = etree.HTML(html.text)
    # miaosu = re.findall('<div class="describtion__detail-content">(.*?)</div></div><div class="job-address clearfix">', html.text)
    miaosu = html_xpath.xpath('string(//*[@class="describtion__detail-content"])')      # 提取子标签所有文本
    print("----------------------"+miaosu)
    miaosu = ''.join(miaosu)
    # time.sleep(1)
    fp = open('智联招聘_大数据.csv', 'a', newline='')
    write = csv.writer(fp)
    row = (companyName,name, tag ,companyNumber ,companySize, salary60,workCity,
           education,workingExp,property,companyUrl,positionURL,name,number,cityId,cityDistrict,applyType,score,miaosu)
    write.writerow(row)
    print('正在写入----'+workCity+'----的职位数据'+'----------'+name)
    fp.close()

# Web前端

def get_url(city):
    key = '大数据'      # 搜索关键字

    url = 'https://fe-api.zhaopin.com/c/i/sou?pageSize=4000&cityId='+city+'&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1' \
            '&kw='+key+'&kt=3&lastUrlQuery=%7B%22pageSize%22:%2260%22,%22jl%22:%22489%22,%22kw%22:%22%E5%A4%A7%E6%95%B0%E6%8D%AE%22,%22kt%22:%223%22%7D'

    number  = ''
    url_head = 'https://jobs.zhaopin.com/'

    html = requests.get(url = url, headers = header)
    try:
        for i in html.json()['data']['results']:
            print("-----------"+i['number'])
            get_context(i['number'])          # 内容爬虫开始---/
    except:
        pass


url = 'https://sou.zhaopin.com/?jl=852&sf=0&st=0&kw=%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90%E5%B8%88&kt=3'
html = requests.get(url = url, headers = header).text
data =re.findall('<script>__INITIAL_STATE__=(.*?)</script>',html)
datas = json.loads(data[0])
try:
    for i in datas["basic"]["dict"]["location"]["province"]:
        get_url(i["code"])
except:
    pass

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值