python爬取网页数据(模拟用户登录)
简介:python模拟用户登录,获取网页数据。
# -*- coding: utf-8 -*-
import requests
import json
import http.cookiejar as cookielib
from bs4 import BeautifulSoup
from lxml import etree
import re
import xlwt
wangyuSession = requests.session()
wangyuSession.cookies = cookielib.LWPCookieJar(filename="wangyuCookies")
userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
headers = {
"Referer": "http://192.168.14.2:88/leadsec-cvs/cvs/checkItem/itemDetail?ci_ids=4659&alias=/server/Linux",
'User-Agent': userAgent,
}
def Login():
wookbook = xlwt.Workbook()
sheet = wookbook.add_sheet('sheet2',cell_overwrite_ok=True)
row = 0
for i in range(1,10000):
print("网页登录")
postUrl = "http://192.168.14.2:88/leadsec-cvs/signin"
postData = {
"username": 'username',
"password": 'password',
"redirect": '/cvs/checkItem/itemDetail?ci_ids={}&alias=/server/Linuxhttps://hao.360.com/2020.html?src=lm&ls=n478bfd1a95'.format(i),
'pwd-encrypted': 'True',
}
# 使用session直接发起post请求
responseRes = wangyuSession.post(postUrl, data=postData, headers=headers, json=True )
print(f"statusCode = {responseRes.status_code}")
wangyuSession.cookies.save()
mes = responseRes.content.decode(encoding='utf-8')
html = etree.HTML(mes)
###获取id
linux_id = html.xpath("/html/body/div[1]/form/div[2]/div[1]/ul/li[3]/div/input[@id='benchmark']/@value")
###获取json串
result = re.findall(r'baselineDataStr\s=\s({(?:.|\n)*})?\r\n\tvar', str(mes))
str_result = ''.join(result)
json_result = json.loads(str_result)
ns = 0
for r in json_result['beans']:
row += 1
ns += 1
sheet.write(row, 0, row)
sheet.write(row, 1, linux_id)
sheet.write(row, 2, ns-1)
sheet.write(row, 3, r['info'])
sheet.write(row, 4, r['opSign'])
sheet.write(row, 5, r['valueItems'])
sheet.write(row, 6, json_result['relation'])
wookbook.save('wangyu1.xls')
if __name__ == '__main__':
Login()