# -*- coding: utf-8 -*-
import scrapy
import json
class HrSpider(scrapy.Spider):
name = 'hr'
allowed_domains = ['careers.tencent.com']
one_url='https://careers.tencent.com/tencentcareer/api/post/Query?timestamp=1587436273920&countryId=&cityId=&bgIds=&productId=&categoryId=&parentCategoryId=&attrId=&keyword=&pageIndex={}&pageSize=10&language=zh-cn&area=cn'
two_url='https://careers.tencent.com/tencentcareer/api/post/ByPostId?timestamp=1587444657148&postId={}&language=zh-cn'
start_urls = [one_url.format(0)]
def parse(self, response):
for page in range(0,10):
url=self.one_url.format(page)
yield scrapy.Request(
url=url,
callback=self.parse_one
)
def parse_one(self,response):
# print(type(response.text))
for i in json.loads(response.text)['Data']['Posts']:
item={}
item['工作性质']=i['CategoryName']
item['工作职责']=i['Responsibility'].replace('\n',' ').replace('\r',' ')
# print(item)
id_=i['PostId']
yield scrapy.Request(
url=self.two_url.format(id_),
callback=self.parse_two,
meta={'item':item}
)
def parse_two(self,response):
item=response.meta['item']
# print(item)
h=json.loads(response.text)
item['要求']=h['Data']['Requirement'].replace('\n',' ')
yield item
scrapy 爬取腾讯招聘
最新推荐文章于 2021-09-26 22:53:59 发布