python 爬取鹅肠招聘职位和要求(包含大多数城市)

最新推荐文章于 2024-08-03 14:18:54 发布

LLLLLLLLLLLLIU

最新推荐文章于 2024-08-03 14:18:54 发布

阅读量199

点赞数

分类专栏： Python 文章标签： python

本文链接：https://blog.csdn.net/weixin_44851971/article/details/109411310

版权

Python 专栏收录该内容

45 篇文章 8 订阅

订阅专栏

简单的爬取了鹅肠在不同城市的招聘岗位和要求，包括大多数城市，没有什么反爬，所以伪装浏览器都给省了。

import requests
import json
import time

class TxJob():
    def __init__(self,page,direction,city):
        self.page = page
        self.direction = direction
        self.city = city
        self.cityid ={
            '北京': 2,
            '清远':2601,
        '深圳':1,
        '张家口':2602,
        '广州':5,
        '上海':3,
        '成都':8,
        '武汉':6,
        '香港':37,
        '台湾':46,
        '长沙':18,
        '重庆':14,
        '长春':25,
        '天津':31,
        '大连':39,
        '福州':4,
        '贵阳':17,
        '哈尔滨':20,
        '杭州':7,
        '合肥':30,
        '呼和浩特':23,
        '济南':24,
        '昆明':16,
        '兰州':21,
        '南宁':27,
        '南昌':19,
        '南京':11,
        '沈阳':10,
        '石家庄':28,
        '太原':26,
        '乌鲁木齐':22,
        '西安':9,
        '西宁':15,
        '厦门':29,
        '郑州':12,
        '青岛':47,
        '无锡':48,
        '烟台':49,
        '苏州':50,
        '海口':53,
        '淄博':54,
        '宁波':55,
        '银川':56,
        '扬州':57,
        '汕尾':58,
        '顺德':84,
        '桂林':86,
        '澳门':87,
        '珠海':89,
        '贵安':91,
        '佛山':93,
        '保定':94,
        '雄安新区':95,
        '南通':96,
        '拉萨':97

        }

    def get_data(self):
        for index in range(1,self.page+1):
            url1 = 'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1613905943241&cityId={}&keyword={}&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(self.cityid[self.city],direction,index)
            resp1 = requests.get(url1)
            data1 = json.loads(resp1.text)
            for i in data1['Data']['Posts']:
                postid = i['PostId']
                url2 = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp={}&postId={}&language=zh-cn'.format(
                    str(int(time.time())), postid)
                resp2 = requests.get(url2)
                data2 = json.loads(resp2.text)
                RecruitPostName = data2['Data']['RecruitPostName']
                Responsibility = data2['Data']['Responsibility']
                Requirement = data2['Data']['Requirement']
                self.save_data(RecruitPostName,Responsibility,Requirement)


    def save_data(self,RecruitPostName,Responsibility,Requirement):
        try:
            with open('./tx岗位招聘/python岗位.txt', 'a') as f:
                f.write(RecruitPostName)
                f.write('\n')
                f.write('岗位职责:\n')
                f.write(Responsibility)
                f.write('\n\n')
                f.write('任职要求:\n')
                f.write(Requirement)
                f.write('\n\n')
        except Exception as e:
            print(e)


    def main(self):
        self.get_data()

if __name__ == '__main__':
    page = int(input('要爬取的页数：'))
    direction = input('要从事什么方向：')
    city = input('工作城市：')
    txjob = TxJob(page,direction,city)
    txjob.main()

LLLLLLLLLLLLIU

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python 爬取鹅肠招聘职位和要求(包含大多数城市)

TX招聘网站信息爬取import jsonimport reimport timeimport requestsclass TenxunSpider(): def __init__(self): self.headers = { "user-agent":"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240
复制链接

扫一扫

专栏目录