python 自学登录，采集数据DEMO

最新推荐文章于 2024-04-17 21:50:32 发布

阿飞博客

最新推荐文章于 2024-04-17 21:50:32 发布

阅读量482

点赞数 1

分类专栏： python 登录采集

本文链接：https://blog.csdn.net/teiksky/article/details/90376017

版权

python 同时被 3 个专栏收录

1 篇文章 0 订阅

订阅专栏

1 篇文章 0 订阅

订阅专栏

采集

1 篇文章 0 订阅

订阅专栏

python 自学登录，采集数据DEMO

流程：

打开登录网址
提交登录信息
登录成功后，保存COOKIE
打开列表页，采集信息

附上代码：

from urllib import request
import http.cookiejar
from urllib.request import build_opener, HTTPCookieProcessor
import json
import http.cookiejar
import time
from urllib.parse import urlencode
import re
from configparser import ConfigParser
import winsound
import MySQLdb
#读取配置文件
cp=ConfigParser()
cp.read('data/xun.cfg')
section_file = cp.sections()[0]
section_user=cp.sections()[1]
header={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36'}
xmlheader={
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.146 Safari/537.36',
     'X-Requested-With':'XMLHttpRequest'
}
#网址配置项
r_list={
    'token':'获取TOKEN的网址',
    'login':'登录提交网址',
    'list':'列表页网址'
}
cookie_filename = cp.get(section_file, "cookie_filename")
#登录需要获取TOKEN
def run_loign():
    global is_login
    print('获取token','\n---------------------')
    cj = http.cookiejar.MozillaCookieJar(cookie_filename)
    opener = build_opener(HTTPCookieProcessor(cj))
    get_request = request.Request(r_list['token'])
    get_response = opener.open(get_request)
    resp = get_response.read().decode('utf-8')
    reg_result = re.search(r'name="__token__" value="(.*?)"', resp)
    token=reg_result.group(1);
    print('token:',token)
    data = urlencode({
        'username': cp.get(section_user, "n_user"),
        'password': cp.get(section_user, "n_pw"),
        '__token__': '' + token,
        'keeplogin': '1'
    }).encode('utf-8')
    print('data:',data)
    req = request.Request(r_list['login'], data, header)
    resp = opener.open(req)
    resp = resp.read().decode("utf-8")
    reg_result = re.search(r'<h1>(.*?)</h1>', resp)
    if(reg_result.group(1)=='登录成功!'):
        print('登录成功!')
        #纪录cookie
        cj.save(ignore_discard=True, ignore_expires=True)  # 保存cookie到cookie.txt中
        is_login = True
    else:
        print('登录失败!')
        is_login = False
def get_list():
    print('获取list','\n----------------------')
    cookie = http.cookiejar.MozillaCookieJar(cookie_filename)
    cookie.load(cookie_filename, ignore_discard=True, ignore_expires=True)
    handler = request.HTTPCookieProcessor(cookie)
    opener = request.build_opener(handler)
    get_request = request.Request(r_list['list'],headers=xmlheader,method='get')
    try:
        get_response = opener.open(get_request)
    except Exception as e:
        print(e)
        run_loign()
        get_list()
    else:
        print('no error')
        resp = get_response.read().decode('utf-8')
        if (resp[0] == '<'):
            print('未登录')
        else:
            print('login')
            r_json = json.loads(resp)