python 自学登录,采集数据DEMO
流程:
- 打开登录网址
- 提交登录信息
- 登录成功后,保存COOKIE
- 打开列表页,采集信息
附上代码:
from urllib import request
import http.cookiejar
from urllib.request import build_opener, HTTPCookieProcessor
import json
import http.cookiejar
import time
from urllib.parse import urlencode
import re
from configparser import ConfigParser
import winsound
import MySQLdb
#读取配置文件
cp=ConfigParser()
cp.read('data/xun.cfg')
section_file = cp.sections()[0]
section_user=cp.sections()[1]
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'}
xmlheader={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36',
'X-Requested-With':'XMLHttpRequest'
}
#网址配置项
r_list={
'token':'获取TOKEN的网址',
'login':'登录提交网址',
'list':'列表页网址'
}
cookie_filename = cp.get(section_file, "cookie_filename")
#登录需要获取TOKEN
def run_loign():
global is_login
print('获取token','\n---------------------')
cj = http.cookiejar.MozillaCookieJar(cookie_filename)
opener = build_opener(HTTPCookieProcessor(cj))
get_request = request.Request(r_list['token'])
get_response = opener.open(get_request)
resp = get_response.read().decode('utf-8')
reg_result = re.search(r'name="__token__" value="(.*?)"', resp)
token=reg_result.group(1);
print('token:',token)
data = urlencode({
'username': cp.get(section_user, "n_user"),
'password': cp.get(section_user, "n_pw"),
'__token__': '' + token,
'keeplogin': '1'
}).encode('utf-8')
print('data:',data)
req = request.Request(r_list['login'], data, header)
resp = opener.open(req)
resp = resp.read().decode("utf-8")
reg_result = re.search(r'<h1>(.*?)</h1>', resp)
if(reg_result.group(1)=='登录成功!'):
print('登录成功!')
#纪录cookie
cj.save(ignore_discard=True, ignore_expires=True) # 保存cookie到cookie.txt中
is_login = True
else:
print('登录失败!')
is_login = False
def get_list():
print('获取list','\n----------------------')
cookie = http.cookiejar.MozillaCookieJar(cookie_filename)
cookie.load(cookie_filename, ignore_discard=True, ignore_expires=True)
handler = request.HTTPCookieProcessor(cookie)
opener = request.build_opener(handler)
get_request = request.Request(r_list['list'],headers=xmlheader,method='get')
try:
get_response = opener.open(get_request)
except Exception as e:
print(e)
run_loign()
get_list()
else:
print('no error')
resp = get_response.read().decode('utf-8')
if (resp[0] == '<'):
print('未登录')
else:
print('login')
r_json = json.loads(resp)