1、登录人人网
携带cookies模拟登录
要将cookie通过字典推导式转化为字典的格式不然无法运用get请求
import scrapy
class RenSpider(scrapy.Spider):
name = 'rrrr'
allowed_domains = ['renren.com']
start_urls = ['http://www.renren.com/975006052/profile']
cookies='anonymid=kdfgrmqf92dw82; _r01_=1; taihe_bi_sdk_uid=b07af0e4c1fc9a99d0c233f5d1f06229; _de=46FB6C30F9AAB9F9D5E8B9E700EB8CC2; depovince=GW; ick_login=2f7c6358-d906-4aa6-b902-4b45d59f1779; taihe_bi_sdk_session=26744adf0c78fc431fa71656d442bd09; t=11b15dc5a6908a7dc576ee489d61ec062; societyguester=11b15dc5a6908a7dc576ee489d61ec062; id=975006052; xnsid=cdb83c7a; jebecookies=978ed4be-4911-47bd-84ef-2c06f55d66b3|||||; JSESSIONID=abca_eJHU9m2nern8rYqx; ver=7.0; loginfrom=null; wp_fold=0'
cookies = {i.split('=')[0]:i.split('=')[1] for i in cookies.split('; ')}
def start_requests(self):
yield scrapy.Request(
url=self.start_urls[0],
cookies = self.cookies,
callback=self.parse
)
def parse(self, response):
with open('renren.html','w',encoding='utf8') as file:
file.write(response.body.decode())
2、登录github
2.1通过post请求携带表单数据登录
# -*- coding: utf-8 -*-
import scrapy
# https://github.com/login 起始的url地址
# https://github.com/session 发送post表单请求
'''
commit: Sign in
authenticity_token: daEuLUefyDXj8MLw/cD5JjYrcqNbZe/FiIgEnDtpKwF1CYefkaus9VGNMQqJMkR2DVAQMW9irgiPbU2a/k+89Q==
ga_id: 287622012.1592305586
login: LogicJerry
password: 123456
webauthn-support: supported
webauthn-iuvpaa-support: supported
return_to:
required_field_84fa:
timestamp: 1598532226351
timestamp_secret: cbc64832cf60571a5dc3649c0cb1b707c5e598ea75887850681ae0183bb3e519
'''
class GithubSpider(scrapy.Spider):
name = 'github'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
commit = 'Sign in'
authenticity_token = response.xpath("//input[@name='authenticity_token']/@value").extract_first()
# ga_id = response.xpath("//input[@name='ga_id']/@value").extract_first()
login = '账号'
password = '密码'
timestamp = response.xpath("//input[@name='timestamp']/@value").extract_first()
timestamp_secret = response.xpath("//input[@name='timestamp_secret']/@value").extract_first()
# 定义一个字典提交数据
data = {
'commit': commit,
'authenticity_token': authenticity_token,
# 'ga_id': ga_id,
'login': login,
'password': password,
'webauthn-support': 'supported',
'webauthn-iuvpaa-support': 'unsupported',
'timestamp': timestamp,
'timestamp_secret': timestamp_secret,
}
# 提交数据发送请求
yield scrapy.FormRequest(
# 提交的地址
url='https://github.com/session',
# 提交数据
formdata=data,
# 响应方法
callback=self.after_login
)
def after_login(self,response):
# print(response)
# 保存文件
with open('github3.html','w',encoding='utf-8') as f:
f.write(response.body.decode())
2.2直接运用from_response动态添加表单数据,自己写用户密码就可以模拟登录
# -*- coding: utf-8 -*-
import scrapy
class Github2Spider(scrapy.Spider):
name = 'github2'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
yield scrapy.FormRequest.from_response(
# 请求响应结果
response=response,
# 提交数据
formdata={'login_field':'账号','password':'密码'},
# 回调函数
callback=self.after_login
)
def after_login(self,response):
# print(response)
# 保存文件
with open('github4.html','w',encoding='utf-8') as f:
f.write(response.body.decode())