这里是scrapy,spider代码
import scrapy
from boos.items import BoosItem
class BoosSpiderSpider(scrapy.Spider):
name = 'boos_spider'
allowed_domains = ['www.zhipin.com']
start_urls = ["https://www.zhipin.com/c101280100/?query=python%E7%88%AC%E8%99%AB%E5%AE%9E%E4%B9%A0%E7%94%9F&page={}&ka=page-{}"]
def start_requests(self):
cookies="lastCity=101281600; historyState=state; wd_guid=df60089a-ae51-4543-8127-66cae1388d58; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1652897181,1652901322,1652973789,1653034293; acw_tc=0a099dce16530560173477392e01561bbfeaf9e92f94a3c96f43320cd10279; __zp_seo_uuid__=22549c76-5002-4003-8a3d-f4a1c77f9264; __g=-; __l=r=https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DNF1GQymEXpbZskLCufX1wDwEMzObVy6jauZKZ-CU05E_pM088HhtnccdMzNMdluF%26wd%3D%26eqid%3D9c6fece50001d909000000026287a227&l=%2Fwww.zhipin.com%2Fdongguan%2F&s=1&g=&s=3&friend_source=0; __c=1653034293; __a=73636670.1620838646.1652973790.1653034293.435.28.11.123; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1653056055; __zp_stoken__=971bdKQwTEC1dO2lDWyAqXVNjL15xNnk5XDFgSg40BChSdiB%2FJzA0OThxFXVbLgVsBUcJMDMRZTU2fgkmTzoIYmQ%2BM39YDBdlRXg8aXBZGjpoIE1jGRd6LiRqXDF7B0EnR347TmAbED8KOg0%3D"
# 转换成字典
cookie = {i.split("=")[0]:i.split("=")[1] for i in cookies.split("; ")}
yield scrapy.Request(
url=self.start_urls[0],
callback=self.parse,
# headers=header
cookies=cookie, # 携带cookie进行请求
)
def parse(self, response):
lis=response.xpath("//div[@class='job-list']//ul/li")
for li in lis:
position=li.xpath(".//span[@class='job-name']/a/text()").extract()[0]
# print(position)
address=li.xpath(".//span[@class='job-area-wrapper']/span[@class='job-area']/text()").extract()[0]
company=li.xpath(".//div[@class='info-company']//h3/a//text()").extract()[0]
salary=li.xpath(".//div[@class='job-limit clearfix']/span/text()").extract()[0]
education=li.xpath(".//div[@class='job-limit clearfix']/p/text()").extract()[1]
scale=li.xpath(".//div[@class='info-company']//p/text()").extract()
if len(scale)!=2:
scale=None
else:
scale=scale[1]
print(position,address,company,salary,education,scale)
# item=BoosItem()
# item["position"]=position
# item["address"]=address
# item["company"]=company
# item["salary"]=salary
# item["education"]=education
# item["scale"]=scale
# yield item
break
得到的结果是添加cookie成功,数据也请求到了
可以看到红色线指向是可以请求到数据的说明cookie传入成功,一开始在练习的时候其实在黄色线遇到重定向的问题,直接改变我传入的cookie信息,导致数据请求失败(这个方法有时可以有时不可以也不知道是啥问题,但测试结果cookie还是能传的)settings这个方法可以不用打开下面这个方法
当打开COOKIES_ENABLED=False,
结果就是上面的方法,数据请求不到
当打开COOKIES_ENABLED=True,
结果是可以拿到数据
希望有点帮助,(可以试试用手机热点测试(限速的情况)运行带cookie请求的数据是拿不到数据,改变了cookie信息,也发生了重定向,也不知道和网速有没有关系,查了一些重定向的问题还是没有找到方法)