登陆豆瓣
requests模块登陆豆瓣
前提:
使用pip命令安装requests和BeautifulSoup4
思路:
1.访问登陆界面,使用google自带的开发工具分析出需要post的代码
2.把验证码下载下来,进行手动输入
3.访问登陆之后的页面,测试是否成功
# -*-coding:utf-8 -*-4
import requests
from bs4 import BeautifulSoup
#拿到验证码,写入硬盘
def verifyCode(verifyCodeData):
with open("verifyCode.jpg" , "wb") as f:
f.write(verifyCodeData)
text = raw_input("请输入验证码:")
return text
def doubanLogin():
#构建一个Session对象,可以保存页面的Cookie
sess = requests.Session()
#请求报头
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
#获取登陆页面的html,get表示发送的是get请求
html = sess.get("https://www.douban.com/accounts/login" , headers = headers).text
#解析html页面
bs = BeautifulSoup(html , "lxml")
#拿到captcha_id,因为验证码就是根据这个captcha_id生成的
captcha_id = bs.find("input" , attrs = {"name" : "captcha-id"}).get("value")
print captcha_id
#拼接出完整的验证码地址
captcha_url = "https://www.douban.com/misc/captcha?id="+captcha_id+"&size=s"
#发送请求,获取验证码图片数据
verifyCodeData = sess.get(captcha_url , headers = headers).content()
#获取用户输入的验证码
text = verifyCode(verifyCodeData)
#构造post数据
data = {
"form_email" : "你的账号",
"form_password" : "你的密码",
"captcha-solution" : text,
"captcha-id" : captcha_id
}
#发送post请求
response = sess.post("https://accounts.douban.com/login", data = data , headers = headers)
#访问需要登陆权限的地址
response = sess.get("https://www.douban.com/people/155260137/" , headers = headers)
if __name__ == "__main__":
doubanLogin()
urllib2模块登陆豆瓣
思路与上面相似
# -*- coding:utf-8 -*-
import urllib2
import urllib
import cookielib
from bs4 import BeautifulSoup
def verifyCode(verifyCodeData):
with open("verifyCode.jpg" , "wb") as f:
f.write(verifyCodeData)
text = raw_input("请输入验证码:")
return text
def doubanLogin():
cookie = cookielib.CookieJar()
cookie_hanler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(cookie_hanlder)
urllib2.install_opener(opener)
#请求报头
headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36"}
url = "https://www.douban.com/accounts/login"
request = urllib2.Request(url , headers = headers)
html = urllib2.urlopen(request).read()
bs = BeautifulSoup(html , "lxml")
captcha_id = bs.find("input" , attrs = {"name" : "captcha-id"}).get("value")
print captcha
captcha_url = "https://www.douban.com/misc/captcha?id="+captcha_id+"&size=s"
request = urllib2.Request(captcha_url , headers = headers)
verifyCodeData = urllib2.urlopen(request).read()
text = verifyCode(verifyCodeData)
#构造post数据
data = {
"form_email" : "你的账号",
"form_password" : "你的密码",
"captcha-solution" : text,
"captcha-id" : captcha_id
}
data = urllib.urlencode(data)
#发送post请求
url = "https://accounts.douban.com/login"
request = urllib2.Request(url , data = data , headers = headers)
response = urllib2.urlopen(request)
response = urllib2.urlopen("https://www.douban.com/people/155260137/")
print response.read()
if __name__ == "__main__":
doubanLogin()