判断网页对应信息的数据类型
json
先去XHR查找,没有再去all查找
单线程爬取腾讯招聘网站json信息实战
import time
import requests
from user_agent import headers
import cchardet
from retrying import retry
import json
import jsonpath
# https://careers.tencent.com/tencentcareer/api/post/Query?&keyword=python&pageIndex=5&pageSize=10
class Tenxun(object):
def __init__(self):
self.url = 'https://careers.tencent.com/tencentcareer/api/post/Query?'
self.proxies = {'http':'121.237.88.178:3000'}
def get_url(self):
'''生成url列表'''
# 网址的其他参数靠传参的方式生成
params = []
for i in range(1,5):
param = {
'keyword':'python',
'pageIndex': i,
'pageSize':'10'
}
params.append(param)