豆瓣爬虫 (CookieJar练习 :爬取用户登入后的响应页面)

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib
import urllib.request
import urllib.parse
from http.cookiejar import CookieJar
import ssl

# 全局取消证书验证
# ssl._create_default_https_context = ssl._create_unverified_context()  报错! 不能加括号!!!!
ssl._create_default_https_context = ssl._create_unverified_context

# 模拟登陆,登入豆瓣
# post url + form data
post_url = 'https://www.douban.com/accounts/login'

form_data = {
    'source':'index_nav',
    'form_email':'17887949901@163.com',
    'form_password':'794662577.a'
}

form_data = urllib.parse.urlencode(form_data).encode('utf-8')

headers = {
	# POST /accounts/login HTTP/1.1
    'Host':	'www.douban.com',
    'Connection': 'keep-alive',
    # Content-Length	75
    # Cache-Control	max-age=0
    'Origin': 'https://www.douban.com',
    'Upgrade-Insecure-Requests': '1',
    'Content-Type':	'application/x-www-form-urlencoded',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Referer': 'https://www.douban.com/',
    # Accept-Encoding	gzip, deflate, br
    'Accept-Language':	'zh-CN,zh;q=0.9',

    # 此处不实用cookie,cookie要使用CookieJar获取
    # Cookie	ll="108296"; bid=lwYON7qLDB0; _vwo_uuid_v2=D6B57AC4D84F5BF63BB9BF2F603A57C9F|74e508c40ebb0e5436dc9e123f92e0e0; douban-fav-remind=1; __yadk_uid=q8wonPa1qlhCYUmqxzt79JrBfns6OaMk; __utmz=30149280.1540563774.6.5.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; _ga=GA1.2.353390240.1533607830; ps=y; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1541079301%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DY6G2SggcvG5d6ytGOTnajJSTkxsgS9i-Zy3LDoJuxHFe_CH5PQdQPqYWl-ST_CKG%26wd%3D%26eqid%3De15b72a60001805f000000035bd32328%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.353390240.1533607830.1540563774.1541079303.7; __utmc=30149280; __utmt=1; ue="17887949901@163.com"; ap_v=0,6.0; push_noty_num=0; push_doumail_num=0; __utmv=30149280.18672; _pk_id.100001.8cb4=71fe7549ce4bb89a.1533607824.4.1541079633.1540563771.; __utmb=30149280.4.10.1541079303
}

cookiejar = CookieJar()
cookiejar_handler = urllib.request.HTTPCookieProcessor(cookiejar)
opener = urllib.request.build_opener(cookiejar_handler)
request = urllib.request.Request(url=post_url, data=form_data, headers=headers)
response = opener.open(request)
content = response.read().decode('utf-8')

with open('./douban.html', mode='wb') as fp:
    fp.write(content.encode('utf-8'))



 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值