# !user/bin/env python3
# -*-coding: utf-8 -*-
__author__ = 'xxx'
import requests
import re
import json
def handle(page):
r = requests.get(
'https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1571759757407&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'.format(
page)).content.decode('utf-8')
# print(r)
urls = re.findall(r'"PostId":"(\d+)","RecruitPostId"', r, re.S)
# print(urls)
for url in urls:
new_url = 'https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp=1571760048441&postId={}&language=zh-cn'.format(
url)
new_r = requests.get(new_url).content.decode()
# print(new_r)
position = re.findall(r'"RecruitPostName":"(.*?)","LocationId"', new_r, re.S)
contents = re.findall(r'"Responsibility":"(.*?)","Requirement":"(.*?)","LastUpdateTime"', new_r, re.S)
# print(position, content[0], content[-1])
# print(len(content))
# print(content)
content = contents.__str__().split(',')
responsibility = content[0].__str__().replace('\\n', '').replace('\\r', '').replace(', ', '').replace('\\', '')
requirement = content[-1].__str__().replace('\\n', '').replace('\\r', '').replace(', ', '').replace('\\', '')
print(position, responsibility, requirement)
data = {
'职责': position,
'义务': responsibility,
'需求': requirement,
}
with open('feizhai3.json', 'a+', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=4)
if __name__ == '__main__':
for page in range(1, 412):
handle(page)
抓包 代码
最新推荐文章于 2024-03-16 22:10:22 发布