import asyncio
from pyppeteer import launch
import pandas as pd
import openpyxl
width, height = 1366, 768 # 固定好页面大小
#时间,取昨天
import datetime
today = datetime.date.today() #今天日期
yes=today - datetime.timedelta(days=1)
yes=yes.strftime('%Y/%m/%d')
yesterday = str(today - datetime.timedelta(days=1)) #今天日期减掉一天
strday='&sdate='+yesterday+'&edate='+yesterday #始终时间
def parse_page(html):
# pandas库中函数read_html()可提取网页表格
tb1 = pd.read_html(html)[-1]
tb2= pd.read_html(html)[-2]
tb=pd.concat([tb2,tb1],axis=1)
return tb
async def main():
# 打开登录页面
browser = await launch(headless=False, args=[f'--window-size={width},{height}', '--disable-infobars'])
page = await browser.newPage()
await page.setViewport({'width': width, 'height': height})
await page.goto('http://172.30.1.216/report/user!login.action')
await asyncio.sleep(2)
await page.evaluate(
'''() =>{ Object.defineProperties(navigator,{ webdriver:{ get: () => false } }) }''')
# 填写登录信息
await page.type('[class="user"]', "");
await asyncio.sleep(2)
await page.type('#pwd', '');
await asyncio.sleep(2)
# 点击登录
await page.click('[class="loginbtn"]')
await asyncio.sleep(4)
#
await page.goto(')
await asyncio.sleep(3)
dasou_text=await page.content()
dasou=parse_page(dasou_text)
dasou1=dasou.iloc[0:-8]
dasou1.insert(loc=0, column=0, value=yes, allow_duplicates=True)
#
await page.goto()
await asyncio.sleep(3)
xxl_text = await page.content()
xxl = parse_page(xxl_text)
xxl1 = xxl.iloc[0:-8]
xxl1.insert(loc=0, column=0, value=yes, allow_duplicates=True)
#
await page.goto()
await asyncio.sleep(3)
dasou12=await page.content()
dasou12=parse_page(dasou12)
dasou12=dasou12.iloc[0:-4]
dasou12=dasou12.dropna(axis=0,how="all")
dasou12.insert(loc=0, column=0, value=yes, allow_duplicates=True)
ss_xxl=pd.concat([dasou1,xxl1],axis=0)
writer=pd.ExcelWriter('C:\\Desktop\\'+yesterday+'.xlsx')
# # 合并
ss_xxl.to_excel(writer,sheet_name='')
dasou12.to_excel(writer,sheet_name='')
xxl12.to_excel(writer,sheet_name='')
writer.save()
writer.close()
asyncio.get_event_loop().run_until_complete(main())
html操作
最新推荐文章于 2024-09-14 19:55:48 发布