获取某厂招聘岗位信息

原创已于 2025-02-18 14:37:05 修改 · 1.5w 阅读

0 ·

CC 4.0 BY-SA版权

文章标签：

#爬虫 #pymysql #阿里招聘岗位信息 #大厂招聘岗位

于 2025-02-18 14:31:49 首次发布

今天方向一个爬虫案例，爬取某厂招聘岗位信息数据，通过程序可以学习pymysql的使用，通过pycharm工具获取数据，并且导入mysql数据库中。

1 导入必要的包

import requests
import pymysql

2 主体代码


class Baidu(object):
    def __init__(self):
        self.db = pymysql.connect(host="127.0.0.1", user="root", password="88888888", db="test_db")
        self.cursor = self.db.cursor()
        self.url = 'https://talent.alibaba.com/position/search'
        self.headers = {
            'cookie': '自己的cookie',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/547.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/547.36'
        }
        self.params = {
            "_csrf": "09d5fe8f-08a2-4d3c-a43f"
        }

    def get_data(self, page):  # 获取地址和User-Agent
        data = {
            "channel": "group_official_site",
            "language": "zh",
            "batchId": "",
            "categories": "",
            "deptCodes": [],
            "key": "",
            "pageIndex": page,
            "pageSize": 19, 
            "regions": "",
            "subCategories": ""
        }
        response = requests.post(url=self.url, params=self.params, headers=self.headers, json=data)
        return response.json()

    def parse_data(self, response):
        # print(response)
        data_list = response["content"]['datas']
        for node in data_list:
            workLocations = ','.join(node['workLocations'])
            name = node['name']
            requirement = node['requirement']
            self.save_data(workLocations, name, requirement)

    def create_table(self):
        # 使用预处理语句创建表
        sql = '''
                CREATE TABLE IF NOT EXISTS ali_quarter_bill(
                    id int primary key auto_increment not null,
                    workLocations VARCHAR(255) NOT NULL, 
                    name VARCHAR(255) NOT NULL, 
                    requirement TEXT)
        '''
        try:
            self.cursor.execute(sql)
            print("CREATE TABLE SUCCESS.")
        except Exception as ex:
            print(f"CREATE TABLE FAILED,CASE:{ex}")
    def save_data(self, workLocations, name, requirement):
        # SQL 插入语句
        sql = 'INSERT INTO ali(id, workLocations, name, requirement) values(%s, %s, %s, %s)'
        # 执行 SQL 语句
        try:
            self.cursor.execute(sql, (0, workLocations, name, requirement))
            # 提交到数据库执行
            self.db.commit()
            print('数据插入成功...')
        except Exception as e:
            print(f'数据插入失败: {e}')
            # 如果发生错误就回滚
            self.db.rollback()
    def run(self):
        self.create_table()
        for i in range(1, 19):
            response = self.get_data(i)
            self.parse_data(response)
        # 关闭数据库连接
        self.db.close()
if __name__ == '__main__':
    baidu = Baidu()
    baidu.run()

结果：
在这里插入图片描述