说明
最近弄得最热的就是什么互联网寒冬,裁员等关键字,于是脉脉,领英的平台的热度就上去了,我就做了脉脉,领英的爬虫。
操作
脉脉模拟登陆难度不大,只是需要验证码处理
我用的云打码,数字的加减法
1、通过抓包分析,登陆的地址为 https://acc.maimai.cn/login ,一个post请求。其中包含用户名,密码,验证码。其中验证码是有时候需要,有时候不需要。
data = {
"m": self.username,
"p": self.password,
"v": ""
"to": "",
"pa": "+86"
}
2、登陆成功之后就可以直接保存cookie,在爬虫的过程中就可以直接使用。
import requests
from yundamahttp import YDMHttp
class Login:
def __init__(self):
self.username = ""
self.password = ""
self.session = requests.session()
self.headers = {
'authority': "acc.maimai.cn",
'cache-control': "max-age=0,no-cache",
'origin': "https://acc.maimai.cn",
'upgrade-insecure-requests': "1",
'content-type': "application/x-www-form-urlencoded",
'user-agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36",
'accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
'referer': "https://acc.maimai.cn/login",
'accept-encoding': "gzip, deflate, br",
'accept-language': "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7",
}
def identify_code(self):
ydm = YDMHttp()
code = ydm.yundama()
return code
def login_normal(self):
data = {
"m": self.username,
"p": self.password,
"to": "",
"pa": "+86"
}
log_url = "https://acc.maimai.cn/login"
response = self.session.post(log_url, data=data, headers=self.headers)
if "share_data" in response.text:
print("没有验证码登陆成功")
print(response.text)
else:
self.get_code()
self.login_code()
def get_code(self):
codeurl = "https://acc.maimai.cn/code"
r = self.session.get(codeurl)
with open("code.jpg", "wb") as f:
f.write(r.content)
def login_code(self):
url = "https://acc.maimai.cn/login"
data = {
"m": self.username,
"p": self.password,
"v": self.identify_code(),
"to": "",
"pa": "+86"
}
response = self.session.request("POST", url, data=data)
if "share_data" in response.text:
print("登陆成功")
print(response.text)
else:
self.login_normal()
def cookie(self):
cookie = ""
cookies = self.session.cookies
for c in cookies:
cookie = cookie + c.name + "=" + c.value + "; "
return cookie
def run(self):
self.login_normal()
if __name__ == "__main__":
l = Login()
l.run()
print(l.cookie())