最近帮朋友写一个推特的登录功能,普通账号购买比较便宜但是容易被封,有2fa认证的账号可以采集的数据更多,不容易被封禁。所以登录的流程更麻烦一些。于是找到了我。前面我写2fa认证就是为了这个做准备2fa(双因素身份验证) 是什么?应用场景和算法原理及实现_2fa接码-CSDN博客。
再找也写过x的抓取,但是没有自动登录相关的内容,这里就是入和吧上面的代码整合到一起。x的authorization获取 登录和请求接口的限制_x-authorization-CSDN博客
第一步,获取头部信息通过发送请求尝试可以发现,请求会验证x-guest-token和authorization字段。
通过查找x-guest-token的值发现在第一请求页面的原来代码中,有一个设置cookie的值的代码中。
链接:https://x.com/?mx=2 发现请求首页后会跳转一次。可以直接请求这个页面用正则获取。
authorization通过查找是一个js文件中,js文件名通过查找是上一个请求的源码中。
用三个正则抽取 可以获取上面两个参数,拼接成完整头。
第二部,数据用户名和密码登录。
点击登录后弹出输入账号的页面。
flow_token0 的获取,看到页面会请求https://api.x.com/1.1/onboarding/task.json这个地址请求参数是固定的如图。
返回的参数是输入用户名时候所需要的如下图最后是的flow_token。
第二次请求https://api.x.com/1.1/onboarding/task.json
需要用flow_token0的值,入下图。
返回flow_token1的值。
上面这些参数是有时效性的。
账号正确,会返回flow_token7,用作账号验证成功。
密码成功会返回flow_token13,用来验证2fa的成功,没问题在就可以成功登陆,获取到cookie了。
#!/usr/bin/env python3',
import json
import os
import ssl
import pyotp
import re
# 初始化驱动
class x_token(object):
def __init__(self):
self.proxies = {
'http': 'proxyMeta',
'https': 'proxyMeta',
}
self.IS_PROXY = 1 # 0表示使用代理
if self.IS_PROXY==0:
self.client = httpx.Client(http2=True,proxies=self.proxies,verify=ssl_context)#verify=False
else:
self.client = httpx.Client(http2=True,verify=ssl_context)#verify=False
self.headers = {
"accept": "*/*",
"accept-language": "zh-CN,zh;q=0.9",
"cache-control": "no-cache",
"content-type": "application/json",
"origin": "https://x.com",
"pragma": "no-cache",
"priority": "u=1, i",
"referer": "https://x.com/",
"sec-ch-ua": "\"Not A(Brand\";v=\"8\", \"Chromium\";v=\"132\", \"Microsoft Edge\";v=\"132\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
"x-twitter-active-user": "yes",
"x-twitter-client-language": "zh-cn"
}
#获取headers 中的x-guest-token和authorization
def get_token_json(self, homepage_url):
homepage_url = 'https://x.com/?mx=2'
req = self.client.get(homepage_url,headers=self.headers)
self.h = req.headers
self.html= req.text
guest_token = self.get_guest_token()
print("guest_token",guest_token)
js_url = self.get_token_js_url()
print('js_url',js_url)
self.html = self.client.get(js_url,headers=self.headers).text
# print(json.dumps(self.html))
authorization = self.get_authorization()
print(authorization)
self.headers["x-guest-token"] = guest_token
self.headers["authorization"] = 'Bearer ' + authorization
self.headers["Referer"] = homepage_url
print(self.headers)
return self.headers
def get_guest_token(self):
# print(self.h)
guest_token = None
guest_token_list = re.findall(r"gt=([\s\S]*?);", str(self.h))
if guest_token_list:
guest_token = guest_token_list[0]
else:
guest_token_list = re.findall(r"document\.cookie=\"gt=([\s\S]*?);", str(self.html))
if guest_token_list:
guest_token = guest_token_list[0]
return guest_token
def get_token_js_url(self):
js_url = 'https://abs.twimg.com/responsive-web/client-web-legacy/main.686f535a.js'
js_url_list = re.findall(r'"(https://abs\.twimg\.com/responsive-web/client-web-legacy/main\.[\s\S]*?\.js)"', self.html)
# js_url_list = re.findall(r'"(https://abs\.twimg\.com/responsive-web/client-web/main\.[\s\S]*?\.js)"', self.html)
if js_url_list:
js_url= js_url_list[0]
return js_url
def get_authorization(self):
authorization = None
# authorization_list = re.findall(r'B=\(\)=>"([\s\S]*?)"', self.html)
authorization_list = re.findall(r'":"(AAAAAA[A-Za-z0-9%]{30,})"', self.html)
if authorization_list:
authorization= authorization_list[0]
if len(authorization) < 20:
authorization_list = re.findall(r'i\="ACTION_REFRESH"[\s\S]*?,l\="([\s\S]*?)"', self.html)
if authorization_list:
authorization= authorization_list[0]
else:
authorization_list = re.findall(r'"(AAAAAA[A-Za-z0-9%]{30,})"', self.html)
if authorization_list:
authorization= authorization_list[0]
return authorization
def get_flow_token(self):
url = "https://api.x.com/1.1/onboarding/task.json"
params = {
"flow_name": "login"
}
data = {
"input_flow_data": {
"flow_context": {
"debug_overrides": {},
"start_location": {
"location": "splash_screen"
}
}
},
"subtask_versions": {
"action_list": 2,
"alert_dialog": 1,
"app_download_cta": 1,
"choice_selection": 3,
"contacts_live_sync_permission_prompt": 0,
"cta": 7,
"email_verification": 2,
"end_flow": 1,
"enter_date": 1,
"enter_email": 2,
"enter_password": 5,
"enter_phone": 2,
"enter_recaptcha": 1,
"enter_text": 5,
"enter_username": 2,
"generic_urt": 3,
"in_app_notification": 1,
"interest_picker": 3,
"js_instrumentation": 1,
"menu_dialog": 1,
"notifications_permission_prompt": 2,
"open_account": 2,
"open_home_timeline": 1,
"open_link": 1,
"phone_verification": 4,
"privacy_options": 1,
"security_key": 3,
"select_avatar": 4,
"select_banner": 2,
"settings_list": 7,
"show_code": 1,
"sign_up": 2,
"sign_up_review": 4,
"tweet_selection_urt": 1,
"update_users": 1,
"upload_media": 1,
"user_recommendations_list": 4,
"user_recommendations_urt": 1,
"wait_spinner": 3,
"web_modal": 1
}
}
response = self.client.post(url, headers=self.headers, params=params, json=data)
print('get_flow_token',response)
json_data = response.json()
flow_token = json_data['flow_token']
print('flow_token',flow_token)
return flow_token
def get_flow_token_1(self):
url = "https://api.x.com/1.1/onboarding/task.json"
flow_token = self.get_flow_token()
data = {
"flow_token": flow_token,
"subtask_inputs": [
{
"subtask_id": "LoginJsInstrumentationSubtask",
"js_instrumentation": {
"response": "{\"rf\":{\"\":0,\"f525e590bfe7548a82e68ba05b82a59cfa9cdcaec0a46d6e013d2dca9b91cd01\":0,\"a9c73eb9860a844bea24b3a31b1433f4ecc2630a6c3b7686d3ec5121e4c91ad6\":0,\"\":0},\"s\":\"xu0S_KtvdvRf3XjJJP1EgZ7yQEFSnMcg4HLL8zZwHw7mCuUrpNeH5vZg9vMkgXmYkN9iESBg-IthEfbHZDOfWi1baMeNVKypG17HYkLUiVzJazl-4D-p6JvvJUUBdhjsD48WB7CAHLUtcDpckjPKFNQMOMbT4dRqJIsCWDOdrBhXXtFx-8pDVkwEOV9Bb03JGVrGcUTh9Mo1CxNZYqyDbMnd7gEqdnLHUFZ3kJxhDrKLQzBA5WdSqmEf-i0VijOzB1qRAEPj-wQIueq6CCDITO-pBUPWwfUfyGGAxRN49b4K4x6IwQI903zW9tmx6j4K88wyM9btrcmV4gTJ9XJXWQAAAZSWNFJs\"}",
"link": "next_link"
}
}
]
}
response = self.client.post(url, headers=self.headers,json=data)
print('get_flow_token',response)
json_data = response.json()
flow_token = json_data['flow_token']
print('flow_token',flow_token)
return flow_token
def get_flow_token_7(self,user_name):
flow_token_1 = self.get_flow_token_1()
url = "https://api.x.com/1.1/onboarding/task.json"
data = {
"flow_token": flow_token_1,
"subtask_inputs": [
{
"subtask_id": "LoginEnterUserIdentifierSSO",
"settings_list": {
"setting_responses": [
{
"key": "user_identifier",
"response_data": {
"text_data": {
"result": user_name
}
}
}
],
"link": "next_link"
}
}
]
}
response = self.client.post(url, headers=self.headers, json=data)
print('get_flow_token',response)
json_data = response.json()
flow_token = json_data['flow_token']
print('flow_token',flow_token)
return flow_token
def x_login(self,user_name,password):
flow_token_7 = self.get_flow_token_7(user_name)
url = "https://api.x.com/1.1/onboarding/task.json"
data = {
"flow_token": flow_token_7,
"subtask_inputs": [
]
}
response = self.client.post(url, headers=self.headers, json=data)
print('get_flow_token',response)
json_data = response.json()
flow_token = json_data['flow_token']
print('flow_token',flow_token)
return flow_token
def get_cookies(self, user_name,password,secret_key):#生成2fa秘钥
# 创建一个 TOTP 对象
totp = pyotp.TOTP(secret_key)
# 获取当前时间的一次性密码
current_otp = totp.now()
flow_token_9 = self.x_login(user_name,password)
url = "https://api.x.com/1.1/onboarding/task.json"
data = {
"flow_token": flow_token_9,
"subtask_inputs": [
{
"subtask_id": "LoginTwoFactorAuthChallenge",
"enter_text": {
"text": current_otp,
"link": "next_link"
}
}
]
}
response = self.client.post(url, headers=self.headers, json=data)
print(response.text)
print(response,dict(response.cookies))
return dict(response.cookies)
这是我写的相关代码,有用到的可以去尝试。写的不好,异常啥的都没处理。
大家不过不用2fa的认证,可以参考别人开源的代码。如下