python爬虫模拟与思考_python爬虫模拟登入一站点问题

代码:

PS:email和passwd已替换*

# /usr/bin/python

#coding:utf-8

__Date__ = "2016-10-10 14:55"

__Author__ = 'eyu Fanne'

##模拟登入seebug站点

##验证码图片识别

##https://www.seebug.org/

import requests

import urllib

import os

from bs4 import BeautifulSoup

class seebugLogin(object):

def __init__(self):

self.session = requests.Session()

self.webUrl = r'https://sso.telnet404.com'

self.loginUrl = r'https://sso.telnet404.com/cas/login/?next=/'

self.headers = {

"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",

"Accept-Encoding":"gzip, deflate, sdch, br",

"Accept-Language":"zh-CN,zh;q=0.8",

"Connection":"keep-alive",

"Host":"sso.telnet404.com",

"Referer":"https://sso.telnet404.com/cas/login/?next=/",

"Upgrade-Insecure-Requests":"1",

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36",

}

self.loginHeaders = {

"Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",

"Accept-Encoding":"gzip, deflate, br",

"Accept-Language":"zh-CN,zh;q=0.8",

"Cache-Control":"max-age=0",

"Connection":"keep-alive",

"Content-Length":"110",

"Content-Type":"application/x-www-form-urlencoded",

"Host":"sso.telnet404.com",

"Origin":"https://sso.telnet404.com",

"Referer":"https://sso.telnet404.com/cas/login/?next=/",

"Upgrade-Insecure-Requests":"1",

"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.116 Safari/537.36",

}

def getImg(self):

dataList = []

myText = self.session.get(self.loginUrl,headers=self.headers).text

mySoup = BeautifulSoup(myText,'lxml')

imgUrl = mySoup.find('img',{'class':'captcha'})

captchaUrl = '%s%s' %(self.webUrl,imgUrl.get('src'))

captchaFile = requests.get(captchaUrl,headers=self.headers).content

file('captcha.gif','wb').write(captchaFile)

dataList.append('captcha.gif')

tokenId = mySoup.find('input',{'name':'csrfmiddlewaretoken'}).get('value')

dataList.append(tokenId)

return dataList

def getData(self):

imgFile = self.getImg()

print imgFile[1]

os.system('call %s' %imgFile[0])

captchacode = raw_input('captcha:')

loginData = {

"csrfmiddlewaretoken":imgFile[1],

"email":"******",

"password":"*****",

"captcha":captchacode,

}

return loginData

def loginSeebug(self):

data = self.getData()

print data

loginWork = self.session.post(self.loginUrl,headers=self.loginHeaders,data=data)

print loginWork.status_code

loginText = self.session.get(self.webUrl,headers=self.headers).text

print loginText

if __name__ == '__main__':

login = seebugLogin()

login.loginSeebug()

最后得到的loginText结果是

bVD2Tn?w=1261&h=351

还是属于未登入状态,这是为何?

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值