python爬取网页数据(模拟用户名密码登录)

python爬取网页数据(模拟用户登录)
   简介:python模拟用户登录,获取网页数据。

# -*- coding: utf-8 -*-

import requests
import json
import http.cookiejar as cookielib
from bs4 import BeautifulSoup
from lxml import etree
import re
import xlwt

wangyuSession = requests.session()
wangyuSession.cookies = cookielib.LWPCookieJar(filename="wangyuCookies")
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
headers = {
    "Referer": "http://192.168.14.2:88/leadsec-cvs/cvs/checkItem/itemDetail?ci_ids=4659&alias=/server/Linux",
    'User-Agent': userAgent,
}

def Login():
    wookbook = xlwt.Workbook()
    sheet = wookbook.add_sheet('sheet2',cell_overwrite_ok=True)
    row = 0
    for i in range(1,10000):
        print("网页登录")
        postUrl = "http://192.168.14.2:88/leadsec-cvs/signin"
        postData = {
            "username": 'username',
            "password": 'password',
            "redirect": '/cvs/checkItem/itemDetail?ci_ids={}&alias=/server/Linuxhttps://hao.360.com/2020.html?src=lm&ls=n478bfd1a95'.format(i),
            'pwd-encrypted': 'True',
        }
        # 使用session直接发起post请求
        responseRes = wangyuSession.post(postUrl, data=postData, headers=headers, json=True )
        print(f"statusCode = {responseRes.status_code}")
        wangyuSession.cookies.save()
        mes = responseRes.content.decode(encoding='utf-8')
        html = etree.HTML(mes)
        ###获取id
        linux_id = html.xpath("/html/body/div[1]/form/div[2]/div[1]/ul/li[3]/div/input[@id='benchmark']/@value")
        ###获取json串
        result = re.findall(r'baselineDataStr\s=\s({(?:.|\n)*})?\r\n\tvar', str(mes))
        str_result = ''.join(result)
        json_result = json.loads(str_result)
        ns = 0
        for r in json_result['beans']:
            row += 1
            ns += 1
            sheet.write(row, 0, row)
            sheet.write(row, 1, linux_id)
            sheet.write(row, 2, ns-1)
            sheet.write(row, 3, r['info'])
            sheet.write(row, 4, r['opSign'])
            sheet.write(row, 5, r['valueItems'])
        sheet.write(row, 6, json_result['relation'])
        wookbook.save('wangyu1.xls')
if __name__ == '__main__':
    Login()


  • 0
    点赞
  • 26
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值