发送POST请求第一种方式:scrapy.Request(method=“POST”)
项目名/spiders/爬虫名.py(爬虫,发送POST请求):
# -*- coding: utf-8 -*-
import scrapy
class Github2Spider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
# 自动从response中寻找form表单,并发送请求
yield scrapy.Request(
"https://github.com/session",
method="POST", # 发送POST请求。 默认GET
callback=self.parse
)
发送POST请求第二种方式:scrapy.FormRequest()
项目名/spiders/爬虫名.py(爬虫,scrapy.FormRequest发送POST请求):
# -*- coding: utf-8 -*-
import scrapy
class GithubSpider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
post_data = dict(
username="用户名",
password="密码"
)
# FormRequest发送POST请求
yield scrapy.FormRequest(
"https://github.com/session",
formdata=post_data,
callback=self.after_login
)
def after_login(self,response):
print(response.body.decode())
发送POST请求第二种方式:scrapy.FormRequest.from_response() 自动寻找表单并发送POST请求提交
项目名/spiders/爬虫名.py(爬虫,scrapy.FormRequest.from_response()自动寻找表单并请求):
# -*- coding: utf-8 -*-
import scrapy
class GithubSpider(scrapy.Spider):
name = '爬虫名'
allowed_domains = ['github.com']
start_urls = ['https://github.com/login']
def parse(self, response):
# 自动从response中寻找form表单,并发送请求
yield scrapy.FormRequest.from_response(
response, # 自动从response中寻找form表单,并发送请求到表单action的url地址。
# formid=None, # 也可以通过id属性定位form表单
# formname=None, # 也可以通过name属性定位form表单
# formxpath=None, # 也可以通过XPath定位form表单
formdata={"username":"用户名","password":"密码"},
callback = self.after_login
)
def after_login(self,response):
print(response.body.decode())
# -*- coding: utf-8 -*-
import scrapy
from douban.items import DoubanItem
import re
from urllib import parse
class Douban250Spider(scrapy.Spider):
name = 'login'
allowed_domains = ['renren.com']
start_urls = ['https://renren.com']
def start_requests(self):
url = "http://www.renren.com/PLogin.do"
data = {"email":"970138074@qq.com", "password": "pythonspider"}
request = scrapy.FormRequest(url, formdata=data, callback=self.parse_page)
yield request
def parse_page(self):
request = scrapy.Request(url='http://www.renren.com/880151247/profile', callback=self.parse_profile)
yield request
def parse_profile(self, response):
with open('dp.html', 'w', encoding='utf-8') as fp:
fp.write(response.text)