python开发报表系统_利用Python实现经济子系统数据自动检索报表,进行,分系统,取数...

# -*- coding:utf-8 -*- # 建议所有都加编码

from selenium import webdriver #导入webdriver包

from time import sleep #等待休眠相关

import datetime #获取当前时间相关

import pandas as pd

from openpyxl import load_workbook

from selenium.webdriver import Chrome

from selenium.webdriver.chrome.options import Options

OAusername = "*****"

OApassword = "*****"

#chrome设置(最小化运行 提高运行速度)

opt = Options()

opt.add_argument('--no-sandbox') # 解决DevToolsActivePort文件不存在的报错

opt.add_argument('window-size=1920x3000') # 设置浏览器分辨率

opt.add_argument('--disable-gpu') # 谷歌文档提到需要加上这个属性来规避bug

opt.add_argument('--hide-scrollbars') # 隐藏滚动条,应对一些特殊页面

opt.add_argument('blink-settings=imagesEnabled=false') # 不加载图片,提升运行速度

opt.add_argument('--headless') # 浏览器不提供可视化界面。Linux下如果系统不支持可视化不加这条会启动失败

driver = Chrome(options=opt)

#driver = webdriver.chrome() # 使用chrom浏览器

driver.get("http://sxoanew.bf.ctc.com/loginCenter/sms/login.jsp")

driver.find_element_by_id("jusername").send_keys(OAusername)

driver.find_element_by_id("jpassword").send_keys(OApassword)

driver.find_element_by_id("ibtn_Login").click()

driver.implicitly_wait(3) # 如果找到了就继续,否则2秒等待

sleep(1)

all_handles = driver.window_handles

driver.find_element_by_link_text("经分系统").click()

sleep(1)

all_handles2 = driver.window_handles

driver.close()

newhandle = [handle for handle in all_handles2 if handle not in all_handles]

driver.switch_to.window(newhandle[0])

#以下为测试:经分获取不到cookie,通过cookie模拟登录方法搁置

#经分获取数据的实际链接,参数直接体现在链接中(使用get方法),不用构造post请求,所以较为方便。

#考虑需要日期,根据实际情况格式化

#获取前一天的日期

def getYesterday():

yesterday = datetime.date.today() + datetime.timedelta(-1)

format_yesterday = yesterday.strftime('%Y%m%d')

return format_yesterday

#前天日期获取

def the_day_before_yesterday():

the_day_before_yesterday = datetime.date.today() + datetime.timedelta(-2)

format_day = the_day_before_yesterday.strftime('%Y%m%d')

return format_day

#获取页面source

def getdatafrombody(js):

driver.execute_script(js)

sleep(3)

windows = driver.window_handles

sleep(3)

driver.close()

driver.switch_to.window(windows[-1])

sleep(3)

source = driver.find_element_by_xpath('/html/body')

text = source.text

return text

##切换至新窗口并关闭旧窗口

#def switch_windows():

# windows = driver.window_handles

# driver.switch_to.window(windows[-1])

#不覆盖整个表格写入EXCEL文档

def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,

truncate_sheet=False,

**to_excel_kwargs):

"""

Append a DataFrame [df] to existing Excel file [filename]

into [sheet_name] Sheet.

If [filename] doesn't exist, then this function will create it.

Parameters:

filename : File path or existing ExcelWriter

(Example: '/path/to/file.xlsx')

df : dataframe to save to workbook

sheet_name : Name of sheet which will contain DataFrame.

(default: 'Sheet1')

startrow : upper left cell row to dump data frame.

Per default (startrow=None) calculate the last row

in the existing DF and write to the next row...

truncate_sheet : truncate (remove and recreate) [sheet_name]

before writing DataFrame to Excel file

to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`

[can be dictionary]

Returns: None

"""

# from openpyxl import load_workbook

# import pandas as pd

# ignore [engine] parameter if it was passed

if 'engine' in to_excel_kwargs:

to_excel_kwargs.pop('engine')

writer = pd.ExcelWriter(filename, engine='openpyxl')

# Python 2.x: define [FileNotFoundError] exception if it doesn't exist

try:

FileNotFoundError

except NameError:

FileNotFoundError = IOError

try:

# try to open an existing workbook

writer.book = load_workbook(filename)

# get the last row in the existing Excel sheet

# if it was not specified explicitly

if startrow is None and sheet_name in writer.book.sheetnames:

startrow = writer.book[sheet_name].max_row

# truncate sheet

if truncate_sheet and sheet_name in writer.book.sheetnames:

# index of [sheet_name] sheet

idx = writer.book.sheetnames.index(sheet_name)

# remove [sheet_name]

writer.book.remove(writer.book.worksheets[idx])

# create an empty sheet [sheet_name] using old index

writer.book.create_sheet(sheet_name, idx)

# copy existing sheets

writer.sheets = {ws.title:ws for ws in writer.book.worksheets}

except FileNotFoundError:

# file does not exist yet, we will create it

pass

if startrow is None:

startrow = 0

# write out the new sheet

df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

# save the workbook

writer.save()

#此部分开始pandas操作excel,后续需优

path ='F:/efficiency/Files/0929.xlsx'

url = pd.read_excel('F:/efficiency/Files/list报表.xlsx', sheet_name='url')

url_lists = url.values.tolist()

order = pd.read_excel('F:/efficiency/Files/list报表.xlsx', sheet_name='order')

order = order.fillna(value="")

order_lists = order.values.tolist()

print(order_lists)

for url_list in url_lists:

print(str(url_list[0]))

if str(url_list[0])=='迁转日报(前一天)':

js = 'window.open("' + str(url_list[1]).format(the_day_before_yesterday=the_day_before_yesterday()) + '")'

else:

js = 'window.open("' + str(url_list[1]).format(yesterday=getYesterday()) + '")'

print(js)

new = order[url_list[0]].values.tolist()

while '' in new:

new.remove('')

print(new)

# sleep(3)

text = getdatafrombody(js)

sleep(3)

df = pd.read_json(text, orient='records', encoding='utf-8', )

df = df[new]

append_df_to_excel(path, df, sheet_name=url_list[0], startcol=1, startrow=4, index=False)

sleep(1)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值