import scrapy
class ZhihucookieSpider(scrapy.Spider):
name = 'zhihucookie'
allowed_domains = ['https://www.zhihu.com']
start_urls = ['https://www.zhihu.com/question/361173250/answer/968438958']
def start_requests(self):
cookies = '对应的cookies内容'
cookies = {i.split("=")[0]: i.split("=")[1] for i in cookies.split("; ")}
yield scrapy.Request(
self.start_urls[0],
callback=self.parse,
cookies=cookies
)
def parse(self, response):
res = response.xpath('//span[@class="RichText ztext CopyrightRichText-richText"]/p/text()').extract()
res =response.css('div.RichContent-inner span p::text').extract()
print(res)
scrapy框架cookies设置,xpath以及css基本使用记录
最新推荐文章于 2024-06-22 16:33:22 发布