# -*- coding: utf-8 -*-
# @Author : HitoChen
# @Time : 2021/11/11 2:34 下午
# @Function:
import requests
from bs4 import BeautifulSoup
class login():
def __init__(self):
self.url = 'https://so.gushiwen.cn/user/login.aspx?from=http%3a%2f%2fso.gushiwen.cn%2fuser%2fcollect.aspx'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36'
}
def Get_response(self):
response = requests.get(url = self.url,headers=self.headers)
content = response.text
return content
def Get_viewstate_viewstategenerator(self,content):
soup = BeautifulSoup(content,'lxml')
viewstate = soup.select('#__VIEWSTATE')[0].attrs.get('value')
viewstategenerator = soup.select('#__VIEWSTATEGENERATOR')[0].attrs.get('value')
code = soup.select('#imgCode')[0].attrs.get('src')
return viewstate,viewstategenerator,code
def Get_code(self,code):
code_url = 'https://so.gushiwen.cn' + code
session = requests.session()
response_code = session.get(code_url)
content_code = response_code.content
with open('code.jpg', 'wb') as fp:
fp.write(content_code)
return session
def Get_login(self,viewstate,viewstategenerator):
code_name = input('请输入你的验证码')
data_post = {
'__VIEWSTATE': viewstate,
'__VIEWSTATEGENERATOR': viewstategenerator,
'from': 'http://so.gushiwen.cn/user/collect.aspx',
'email': '1412335438@qq.com',
'pwd': '******',
'code': code_name,
'denglu': '登录',
}
response_post = session.post(url=self.url, headers=self.headers, data=data_post)
content_post = response_post.text
with open('gushiwen.html', 'w', encoding='utf-8') as aa:
aa.write(content_post)
if __name__ == '__main__':
dologin = login()
content = dologin.Get_response()
viewstate ,viewstategenerator,code = dologin.Get_viewstate_viewstategenerator(content)
session = dologin.Get_code(code)
dologin.Get_login(viewstate,viewstategenerator)
Python requests 爬取登陆后古诗文网
最新推荐文章于 2022-11-25 14:19:17 发布