python 爬取鹅肠招聘职位和要求(包含大多数城市)

简单的爬取了鹅肠在不同城市的招聘岗位和要求,包括大多数城市,没有什么反爬,所以伪装浏览器都给省了。

import requests
import json
import time

class TxJob():
    def __init__(self,page,direction,city):
        self.page = page
        self.direction = direction
        self.city = city
        self.cityid ={
            '北京': 2,
            '清远':2601,
        '深圳':1,
        '张家口':2602,
        '广州':5,
        '上海':3,
        '成都':8,
        '武汉':6,
        '香港':37,
        '台湾':46,
        '长沙':18,
        '重庆':14,
        '长春':25,
        '天津':31,
        '大连':39,
        '福州':4,
        '贵阳':17,
        '哈尔滨':20,
        '杭州':7,
        '合肥':30,
        '呼和浩特':23,
        '济南':24,
        '昆明':16,
        '兰州':21,
        '南宁':27,
        '南昌':19,
        '南京':11,
        '沈阳':10,
        '石家庄':28,
        '太原':26,
        '乌鲁木齐':22,
        '西安':9,
        '西宁':15,
        '厦门':29,
        '郑州':12,
        '青岛':47,
        '无锡':48,
        '烟台':49,
        '苏州':50,
        '海口':53,
        '淄博':54,
        '宁波':55,
        '银川':56,
        '扬州':57,
        '汕尾':58,
        '顺德':84,
        '桂林':86,
        '澳门':87,
        '珠海':89,
        '贵安':91,
        '佛山':93,
        '保定':94,
        '雄安新区':95,
        '南通':96,
        '拉萨':97

        }

    def get_data(self):
        for index in range(1,self.page+1):
            url1 = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1613905943241&cityId={}&keyword={}&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(self.cityid[self.city],direction,index)
            resp1 = requests.get(url1)
            data1 = json.loads(resp1.text)
            for i in data1['Data']['Posts']:
                postid = i['PostId']
                url2 = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format(
                    str(int(time.time())), postid)
                resp2 = requests.get(url2)
                data2 = json.loads(resp2.text)
                RecruitPostName = data2['Data']['RecruitPostName']
                Responsibility = data2['Data']['Responsibility']
                Requirement = data2['Data']['Requirement']
                self.save_data(RecruitPostName,Responsibility,Requirement)


    def save_data(self,RecruitPostName,Responsibility,Requirement):
        try:
            with open('./tx岗位招聘/python岗位.txt', 'a') as f:
                f.write(RecruitPostName)
                f.write('\n')
                f.write('岗位职责:\n')
                f.write(Responsibility)
                f.write('\n\n')
                f.write('任职要求:\n')
                f.write(Requirement)
                f.write('\n\n')
        except Exception as e:
            print(e)


    def main(self):
        self.get_data()

if __name__ == '__main__':
    page = int(input('要爬取的页数:'))
    direction = input('要从事什么方向:')
    city = input('工作城市:')
    txjob = TxJob(page,direction,city)
    txjob.main()


  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值