由于反爬点不多,提供采集思路,数据可以根据自己的需求编写
注:
需要根据前一次的请求拿到ID做后续页面的翻页标记
def her():
header = {
'os-version': '23',
'model': 'MuMu',
'resolution': '900x1440',
'x-jike-app-id': 'XeITUMa6kGKF',
'app-buildno': '2241',
'applicationid': 'com.ruguoapp.jike',
'market': 'wandoujiahuihui',
'manufacturer': 'Netease',
'os': 'Android',
'x-jike-device-id': '48b8e268-8013-42a0-abdd-db9ae9b2041b',
'app-version': '7.12.0',
'source': '',
'app-permissions': '4',
'x-jike-device-properties': str(
{"uuid": "48b8e268-8013-42a0-abdd-db9ae9b2041b", "android_id": "9097d6e661481661", "oaid": "", "vaid": "",
"aaid": ""}),
'king-card-status': 'unknown',
'accept-encoding': 'gzip',
'cookie': 'abtest_info.sig=TpMSLxutJSIb6SX-RcpsEJ9rvBM; abtest_info={}; jike_lbs_location=0.0,0.0',
'user-agent': 'okhttp/4.9.0',
}
return header
def refresh_token(refresh_token):
"""
返回token函数
:param refresh_token:
:return:
"""
url = "https://api.ruguoapp.com/1.0/app_auth_tokens.refresh"
s = her()
s["x-jike-refresh-token"] = str(refresh_token)
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJkYXRhIjoiUGdiMjNOczh3akRBVHVSVkJUQ2UyXC8yVTJ4dkN0ajVwaG5PZ0VmZ3dYdnRWaFVlTTZTdFlDNWNvc1c5WUlJWmdvSnhVY2lYRHc3VmpENGVpVytlMWR0dnc1VjN6ZzVmb0RNbVwvMDZtOUVndWFTdTBHSEIrSGVEeENlVzlHZlBoQ2E5bzA3VUhcL1l3U2hneGZINnBCSndya2t3WUxENlNKSk1Hd2lLT3pNOUZ5T0xsQnlIWUZpZlBFc1wvY2xWTVZaZjR3Q1JUNkk2cWJsYUJPa1RBcGtvUHdoYUc5NU9IMnZlVG1PaXFRUEJ1OGdHQjZGSWkxMUdSdG5nZXdBamJ3SGRycXRibGNlQ1RsXC9rUm5RVm5ZM1BqdlJLTlBzaW4rdmt0Mm54MWg4cnJ2eGZHZnFJXC9wblE0QzlRNTI0TXViTWZkZHBkdHB3YVE5cHZcLzJsZ0RFZzBlTFNma3Y0TWtacXlLNkJuemRjcFlUekVxWnp5MlwvREdsNkcrU2xieXJRSEJ0RlhMQU1Kc2lRblJNMEE5RkE4MGdBPT0iLCJ2IjozLCJpdiI6InBoeFBhUEZtOEVZMW1RT1dFbGFYcEE9PSIsImlhdCI6MTYxNTI2NjU0OS44OTV9.HQn4rSGSANioRm7Uy01PGSyygtci2hNngHWLHOU9k6c"
s['x-jike-access-token'] = str(token)
r = requests.get(url,headers= s,verify=False)
content = r.json()
# print(content['x-jike-access-token'])
return content['x-jike-access-token'],content['x-jike-refresh-token']
def get_data(token1,token2):
"""
圈子详情数据
:return:
"""
url = 'https://api.ruguoapp.com/1.0/topics/tabs/square/feed'
headers = {
'os-version': '23',
'resolution': '900x1440',
'x-jike-app-id': 'XeITUMa6kGKF',
'app-buildno': '2241',
'applicationid': 'com.ruguoapp.jike',
'market': 'wandoujiahuihui',
'manufacturer': 'Netease',
'os': 'Android',
'x-jike-device-id': '48b8e268-8013-42a0-abdd-db9ae9b2041b',
'app-version': '7.12.0',
'source': '',
'app-permissions': '4',
'x-jike-device-properties': str({"uuid":"48b8e268-8013-42a0-abdd-db9ae9b2041b","android_id":"9097d6e661481661","oaid":"","vaid":"","aaid":""}),
'king-card-status': 'unknown',
'content-type': 'application/json;charset=utf-8',
'content-length': '52',
'accept-encoding': 'gzip',
'cookie': 'abtest_info.sig=TpMSLxutJSIb6SX-RcpsEJ9rvBM; abtest_info={}; jike_lbs_location=0.0,0.0',
'user-agent': 'okhttp/4.9.0',
}
headers["x-jike-refresh-token"] = str(token2)
headers["x-jike-access-token"] = str(token1)
data = json.dumps({"topicId":"5a5c55c0805cb70016874cf4","debug":False})
r = requests.post(url, headers=headers,data=data, verify=False)
if r.status_code == 200:
data = r.json()
res = data.get('data')
for i in res:
print(i.get('content'))
def get_quan(token1,token2):
url = 'https://api.ruguoapp.com/1.0/personalUpdate/single'
header = {
'os-version': '23',
'model': 'MuMu',
'resolution': '900x1440',
'x-jike-app-id': 'XeITUMa6kGKF',
'app-buildno': '2241',
'applicationid': 'com.ruguoapp.jike',
'market': 'wandoujiahuihui',
'manufacturer': 'Netease',
'os': 'Android',
'x-jike-device-id': '48b8e268-8013-42a0-abdd-db9ae9b2041b',
'app-version': '7.12.0',
'source': '',
'app-permissions': '4',
'x-jike-device-properties':str({"uuid":"48b8e268-8013-42a0-abdd-db9ae9b2041b","android_id":"9097d6e661481661","oaid":"","vaid":"","aaid":""}),
'king-card-status': 'unknown',
'content-type': 'application/json;charset=utf-8',
'content-length': '62',
'accept-encoding': 'gzip',
'cookie': 'abtest_info.sig=TpMSLxutJSIb6SX-RcpsEJ9rvBM; abtest_info={}; jike_lbs_location=0.0,0.0',
'user-agent': 'okhttp/4.9.0',
}
header["x-jike-refresh-token"] = str(token2)
header["x-jike-access-token"] = str(token1)
func = {"username": "01E7DE69-A362-45D8-8C66-1CD50D55A959", "limit": 25}
s = 1
num = list()
while s<4:
res = requests.post(url,headers=header,data=json.dumps(func),verify=False)
func['loadMoreKey'] = json.loads(res.text).get('loadMoreKey')
data = json.loads(res.text)
a = data.get('data')
for i in a:
num.append(i.get('id'))
s += 1
print(len(num))
t1,t2 = refresh_token(re_token)
get_quan(t1,t2)