发送POST请求第一种方式:scrapy.Request(method="POST")
项目名/spiders/爬虫名.py(爬虫,发送POST请求):
# -*- coding: utf-8 -*-
import scrapy
class Github2Spider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
# 自动从response中寻找form表单,并发送请求
yield scrapy.Request(
"https://github.com/session",
method="POST", # 发送POST请求。 默认GET
callback=self.parse
)
发送POST请求第二种方式:scrapy.FormRequest()
项目名/spiders/爬虫名.py(爬虫,scrapy.FormRequest发送POST请求):
# -*- coding: utf-8 -*-
import scrapy
class GithubSpider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
post_data = dict(
username="用户名",
password="密码"
)
# FormRequest发送POST请求
yield scrapy.FormRequest(
"https://github.com/session",
formdata=post_data,
callback=self.after_login
)
def after_login(self,response):
print(response.body.decode())
发送POST请求第二种方式:scrapy.FormRequest.from_response() 自动寻找表单并发送POST请求提交
项目名/spiders/爬虫名.py(爬虫,scrapy.FormRequest.from_response()自动寻找表单并请求):
# -*- coding: utf-8 -*-
import scrapy
class GithubSpider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
# 自动从response中寻找form表单,并发送请求
yield scrapy.FormRequest.from_response(
response, # 自动从response中寻找form表单,并发送请求到表单action的url地址。
# formid=None, # 也可以通过id属性定位form表单
# formname=None, # 也可以通过name属性定位form表单
# formxpath=None, # 也可以通过XPath定位form表单
formdata={"username":"用户名","password":"密码"},
callback = self.after_login
)
def after_login(self,response):
print(response.body.decode())