python爬取网页数据(模拟用户名密码登录)

最新推荐文章于 2024-07-31 14:30:10 发布

一只行走鸟

最新推荐文章于 2024-07-31 14:30:10 发布

阅读量4.9k

点赞数

文章标签： python xpath json

本文链接：https://blog.csdn.net/weixin_40368523/article/details/105949155

版权

python爬取网页数据(模拟用户登录)
简介：python模拟用户登录，获取网页数据。

# -*- coding: utf-8 -*-

import requests
import json
import http.cookiejar as cookielib
from bs4 import BeautifulSoup
from lxml import etree
import re
import xlwt

wangyuSession = requests.session()
wangyuSession.cookies = cookielib.LWPCookieJar(filename="wangyuCookies")
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
headers = {
    "Referer": "http://192.168.14.2:88/leadsec-cvs/cvs/checkItem/itemDetail?ci_ids=4659&alias=/server/Linux",
    'User-Agent': userAgent,
}

def Login():
    wookbook = xlwt.Workbook()
    sheet = wookbook.add_sheet('sheet2',cell_overwrite_ok=True)
    row = 0
    for i in range(1,10000):
        print("网页登录")
        postUrl = "http://192.168.14.2:88/leadsec-cvs/signin"
        postData = {
            "username": 'username',
            "password": 'password',
            "redirect": '/cvs/checkItem/itemDetail?ci_ids={}&amp;alias=/server/Linuxhttps://hao.360.com/2020.html?src=lm&amp;ls=n478bfd1a95'.format(i),
            'pwd-encrypted': 'True',
        }
        # 使用session直接发起post请求
        responseRes = wangyuSession.post(postUrl, data=postData, headers=headers, json=True )
        print(f"statusCode = {responseRes.status_code}")
        wangyuSession.cookies.save()
        mes = responseRes.content.decode(encoding='utf-8')
        html = etree.HTML(mes)
        ###获取id
        linux_id = html.xpath("/html/body/div[1]/form/div[2]/div[1]/ul/li[3]/div/input[@id='benchmark']/@value")
        ###获取json串
        result = re.findall(r'baselineDataStr\s=\s({(?:.|\n)*})?\r\n\tvar', str(mes))
        str_result = ''.join(result)
        json_result = json.loads(str_result)
        ns = 0
        for r in json_result['beans']:
            row += 1
            ns += 1
            sheet.write(row, 0, row)
            sheet.write(row, 1, linux_id)
            sheet.write(row, 2, ns-1)
            sheet.write(row, 3, r['info'])
            sheet.write(row, 4, r['opSign'])
            sheet.write(row, 5, r['valueItems'])
        sheet.write(row, 6, json_result['relation'])
        wookbook.save('wangyu1.xls')
if __name__ == '__main__':
    Login()

一只行走鸟

关注

0
点赞
踩
26

收藏

觉得还不错? 一键收藏
0
评论
python爬取网页数据(模拟用户名密码登录)

python爬取网页数据(模拟用户登录)   简介：python模拟用户登录，获取网页数据。# -*- coding: utf-8 -*-import requestsimport jsonimport http.cookiejar as cookielibfrom bs4 import BeautifulSoupfrom lxml import et...
复制链接

扫一扫