Access session cookie in scrapy spiders
def parse(self, response):
return [FormRequest.from_response(response,
formname='login_form',
formdata={'email': '...', 'pass':'...'},
callback=self.after_login)]
def after_login(self, response):
# process response
.....
# access the cookies of that session to access another URL in the
# same domain with the autehnticated session.
# Something like:
session_cookies = XXX.get_session_cookies()
data = another_function(url,cookies)
def after_login(self, response):
# testing to see if I can get the session cookies
cookieJar = response.meta.setdefault('cookie_jar', CookieJar())
cookieJar.extract_cookies(response, response.request)
cookies_test = cookieJar._cookies
print "cookies - test:",cookies_test
# URL access with authenticated session
url = "http://site.org/?id=XXXX"
request = Request(url=url,callback=self.get_pict)
return [request]
cookies - test: {}
2012-01-02 22:44:39-0800 [myspider] DEBUG: Sending cookies to: <GET http://www.facebook.com/profile.php?id=529907453>
Cookie: xxx=3..........; yyy=34.............; zzz=.................; uuu=44..........
from scrapy.http.cookies import CookieJar
class MySpider(BaseSpider):
def parse(self, response):
cookieJar = response.meta.setdefault('cookie_jar', CookieJar())
cookieJar.extract_cookies(response, response.request)
request = Request(nextPageLink, callback = self.parse2,
meta = {'dont_merge_cookies': True, 'cookie_jar': cookieJar})
cookieJar.add_cookie_header(request) # apply Set-Cookie ourselves
class CookiesMiddleware(object):
def _debug_cookie(self, request, spider):
if self.debug:
cl = request.headers.getlist('Cookie')
if cl:
msg = "Sending cookies to: %s" % request + os.linesep
msg += os.linesep.join("Cookie: %s" % c for c in cl)
log.msg(msg, spider=spider, level=log.DEBUG)
def parse(self, response):
return [FormRequest.from_response(response,
formname='login_form',
formdata={'email': '...', 'pass':'...'},
callback=self.after_login)]
def after_login(self, response):
# process response
.....
# access the cookies of that session to access another URL in the
# same domain with the autehnticated session.
# Something like:
session_cookies = XXX.get_session_cookies()
data = another_function(url,cookies)
def after_login(self, response):
# testing to see if I can get the session cookies
cookieJar = response.meta.setdefault('cookie_jar', CookieJar())
cookieJar.extract_cookies(response, response.request)
cookies_test = cookieJar._cookies
print "cookies - test:",cookies_test
# URL access with authenticated session
url = "http://site.org/?id=XXXX"
request = Request(url=url,callback=self.get_pict)
return [request]
cookies - test: {}
2012-01-02 22:44:39-0800 [myspider] DEBUG: Sending cookies to: <GET http://www.facebook.com/profile.php?id=529907453>
Cookie: xxx=3..........; yyy=34.............; zzz=.................; uuu=44..........
from scrapy.http.cookies import CookieJar
class MySpider(BaseSpider):
def parse(self, response):
cookieJar = response.meta.setdefault('cookie_jar', CookieJar())
cookieJar.extract_cookies(response, response.request)
request = Request(nextPageLink, callback = self.parse2,
meta = {'dont_merge_cookies': True, 'cookie_jar': cookieJar})
cookieJar.add_cookie_header(request) # apply Set-Cookie ourselves
class CookiesMiddleware(object):
def _debug_cookie(self, request, spider):
if self.debug:
cl = request.headers.getlist('Cookie')
if cl:
msg = "Sending cookies to: %s" % request + os.linesep
msg += os.linesep.join("Cookie: %s" % c for c in cl)
log.msg(msg, spider=spider, level=log.DEBUG)