【爬虫】大街网爬虫

import requests,csv,time

def save_data(row):
    f=open('大街网职位爬虫.csv','a',encoding='GBK',newline='',errors='ignore')
    csv_writer = csv.writer(f)
    csv_writer.writerow(row)
    f.close()

def ParserResponse(response):
    #解析函数
    for i in range(30):
        corpId = response['data']['list'][i]['corpId']           #职位ID
        jobName = response['data']['list'][i]['jobName']         #招聘名称
        compName = response['data']['list'][i]['compName']       #公司名
        pubCity = response['data']['list'][i]['pubCity']         #城市名
        salary = response['data']['list'][i]['salary']           #薪水

        try:
            pubEdu = response['data']['list'][i]['pubEdu']  # 学历要求
        except:
            pubEdu ="无学历要求"

        try:
            pubEx = response['data']['list'][i]['pubEx']  # 经验要求
        except:
            pubEx = "无经验要求"

        try:
            industryName = response['data']['list'][i]['industryName']      #行业
        except:
            industryName = '无行业分类'
        a = [corpId,pubCity,jobName,compName,salary,industryName,pubEx,pubEdu]
        save_data(a)
        print(a)
    #     list1.append([corpId,pubCity,jobName,compName,salary,industryName,pubEx,pubEdu])
    # print(list1)

def main():
    #主函数
    firstUrl = 'https://so.dajie.com/job/search'
    for page in range(23):
        # 东莞的大街
        url = 'https://so.dajie.com/job/ajax/search/filter?keyword=&order=0&city=441900&recruitType=&salary=&experience=&page={}&positionFunction=&_CSRFToken=&ajax=1'.format(page)
        # 全国的大街
        # url = 'https://so.dajie.com/job/ajax/search/filter?keyword=&order=0&city=&recruitType=&salary=&experience=&page={}&positionFunction=&_CSRFToken=&ajax=1'.format(page)
        session = requests.session()
        headers={
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.87 Safari/537.36'
        }
        session.get(firstUrl)
        session.headers['referer'] = firstUrl
        response = session.get(url).json()
        ParserResponse(response)
        # time.sleep(3)
        # break


if __name__=='__main__':
    main()



  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值