#!/usr/bin/env python
# -*- coding: utf-8 -*-
# import configparser
import datetime
import sys
import requests
from requests.cookies import requestscookiejar
from bs4 import beautifulsoup
import log_config
import time
import random
import re
def is_form_hash(tag):
return tag.has_attr('name') and tag.get('name') == 'formhash'
def is_refer(tag):
return tag.has_attr('name') and tag.get('name') == 'referer'
class haifeng_crawler:
def __init__(self, user_name, pass_word):
self.cookies = dict()
self.username = user_name
self.password = pass_word
self.session = requests.session()
def update_cookies(self, new_cookies):
for key in new_cookies:
self.cookies[key] = new_cookies[key]
def req_get(self, url):
requests.session().cookies = requests.utils.cookiejar_from_dict(self.cookies)
resp = self.session.get(url)
self.update_cookies(requests.utils.dict_from_cookiejar(resp.cookies))
print(self.cookies)
return resp
def req_post(self, url, data):
requests.session().cookies = requests.utils.cookiejar_from_dict(self.cookies)
resp = self.session.post(url, data)
self.update_cookies(requests.utils.dict_from_cookiejar(resp.cookies))
#print(self.cookies)
return resp
def login(self):
url = 'http://www.96bbs.com/member.php?mod=logging&action=login&infloat=yes&handlekey=login&inajax=1&ajaxtarget=fwin_content_login'
page_res = self.req_get(url)
#print(page_res.text)
soup = beautifulsoup(page_res.text, "html.parser")
rt = soup.find('root')
if rt:
rt = rt.text
soup = beautifulsoup(rt, "html.parser")
else:
return none;
#print(rt);
bb = is_form_hash
cc = is_refer
formhash = soup.find(bb).get("value")
referer = soup.find(cc).get("value")
print(formhash)
print(referer)
url = 'http://www.96bbs.com/member.php?mod=logging&action=login&loginsubmit=yes&handlekey=login&loginhash=lvcbx&inajax=1'
data = {
'formhash': formhash,
'referer': referer,
'username': self.username,
'password': '加密后的密码',
'questionid': 0,
'answer': ''
}
resp = self.req_post(url,data)
soup = beautifulsoup(resp.text, "html.parser")
rt = soup.find('root').text
print(rt)
def visit_home(self):
url = 'http://www.96bbs.com/forum.php'
self.req_get(url)
def visit_attachment(self,url):
resp = self.req_get(url)
print(resp.status_code)
print(resp.text)
return resp
if __name__ == "__main__":
haifeng = haifeng_crawler("你的用户名","密码需要根据页面取获取加密后的密码")
haifeng.login()
haifeng.visit_attachment("http://www.96bbs.com/forum.php?mod=attachment&aid=mji0nzq5ohw3yjnkmwmwy3wxntqwmzyxmzewfdq5nzm5oxwzntm5ntgy")