#encoding:utf-8
import urllib
import urllib2
import cookielib
from bs4 import BeautifulSoup
filename = 'cookie_csdn.txt'
#声明一个MozillaCookieJar对象实例来保存cookie,之后写入文件
cookie = cookielib.MozillaCookieJar(filename)
#利用urllib2库的HTTPCookieProcessor对象来创建cookie处理器
handler = urllib2.HTTPCookieProcessor(cookie)
#通过handler来构建opener
opener = urllib2.build_opener(handler)
loginUrl = "https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn"
#登陆前准备:获取lt和exection
response = opener.open(loginUrl)
soup = BeautifulSoup(response.read())
for input in soup.form.find_all("input"):
if input.get("name") == "lt":
lt = input.get("value")
if input.get("name") == "execution":
execution = input.get("value")
#post信息
postdata = {
"username":"xxxxx@qq.com",
"password":"xxxxxx",
"lt":lt,
"execution":execution,
"_eventId":"submit"
}
postdata = urllib.urlencode(postdata)
opener.addheaders = [("User-Agent","Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36")]
#模拟登录,保存cookie到cookie.txt中
result = opener.open(loginUrl, postdata)
#保存cookie
cookie.save(ignore_discard=True, ignore_expires=True)
#登陆后我们随意跳转到博客获取内容
url = "http://blog.csdn.net"
response = opener.open(url)
#读取内容保存到html文件中,方便查看
f = open('csdn_index.html', 'w')
f.write(response.read());
f.close()
print 'ok'
python 3 代码
# -*- coding: UTF-8 -*-
import urllib
from urllib import request
from http import cookiejar
from bs4 import BeautifulSoup
if __name__ == '__main__':
#设置保存cookie的文件的文件名,相对路径,也就是同级目录下
filename = 'cookie.txt'
#创建MozillaCookieJar实例对象
cookie = cookiejar.MozillaCookieJar(filename)
#从文件中读取cookie内容到变量
#cookie.load(filename, ignore_discard=True, ignore_expires=True)
#利用urllib.request库的HTTPCookieProcessor对象来创建cookie处理器,也就CookieHandler
handler=request.HTTPCookieProcessor(cookie)
#通过CookieHandler创建opener
opener = request.build_opener(handler)
loginUrl = "https://passport.csdn.net/account/login?from=http://my.csdn.net/my/mycsdn"
# 登陆前准备:获取lt和exection
response = opener.open(loginUrl)
#获取表单隐藏的登录信息
soup = BeautifulSoup(response.read(), 'lxml')
for input in soup.form.find_all("input"):
if input.get("name") == "lt":
lt = input.get("value")
if input.get("name") == "execution":
execution = input.get("value")
# post信息
postdata = {
"username": "xxxxx@qq.com",
"password": "xxxxx",
"lt": lt,
"execution": execution,
"_eventId": "submit"
}
postdata = urllib.parse.urlencode(postdata).encode('utf-8')
#此用opener的open方法打开网页
opener.addheaders = [("User-Agent",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36")]
result = opener.open(loginUrl, postdata)
# 保存cookie
cookie.save(ignore_discard=True, ignore_expires=True)
# 登陆后我们随意跳转到博客获取内容
url = "http://my.csdn.net/my/mycsdn"
response = opener.open(url)
html = response.read().decode('utf-8')
f = open('csdn.html', 'w')
f.write(html)
f.close()
print('ok')