首先安装谷歌驱动器
爬取四个账号每日数据
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# 创建全局的WebDriver对象
driver = None
def initialize_browser():
global driver
if driver is None:
# option = webdriver.ChromeOptions() #创建设置参数这个对象
# option.add_experimental_option("detach", True) #浏览器会话与Python脚本分离,在脚本运行结束后手动控制浏览器会话的关闭
# option.headless = False #在无头模式下,浏览器在后台运行,没有可见的图形用户界面。
driver = webdriver.Chrome()
def login_and_get_charge(username, password):
# 初始化浏览器
initialize_browser()
# 打开网页并等待
driver.get(r'http:******')
time.sleep(2)
# 查找用户名和密码输入框,并输入凭据
username_field = driver.find_element(By.NAME, "username")
password_field = driver.find_element(By.NAME, "password")
username_field.send_keys(username)
password_field.send_keys(password)
# 点击登录按钮
login_button = driver.find_element(By.XPATH, "/html/body/div/div/form/div[4]/div/input[1]")
login_button.click()
time.sleep(3)
# 切换到iframe
iframe = driver.find_element(By.XPATH, "/html/body/section/div[2]/div/iframe")
driver.switch_to.frame(iframe)
# 获取充值信息
element= driver.find_element(By.CSS_SELECTOR, value='.money')
print(username,element.text)
# 打印用户名和充值信息
return element.text
# 调用函数并传递用户名和密码
# tx1=login_and_get_charge("本部-腾讯", "Agent.Supershorse")
dy1=login_and_get_charge("本部-抖音", "Agent.Supershorse")
gz1=login_and_get_charge("广州分部", "Agent.Supershorse")
bb1=login_and_get_charge("本部", "Agent.Supershorse")
bd1=login_and_get_charge("本部-百度", "123")
# 关闭浏览器
driver.quit()
gz = round(float(gz1.replace("¥", "")),2)
bb= round(float(bb1.replace("¥", "")),2)
# tx=round(float(tx1.replace("¥", "")),2)
dy=round(float(dy1.replace("¥", "")),2)
bd=round(float(bd1.replace("¥", "")),2)
weixin=gz+bb
douyin=dy
# shenzhen=tx+dy+bb
shenzhen=dy+bb
guanzou=gz
# 获取当前时间戳
current_timestamp = time.time()
# 使用时间戳获取本地时间结构
local_time = time.localtime(current_timestamp)
# 格式化本地时间为字符串
formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", local_time)
print("--------------------------------------------------------")
# print("当前日期和时间:", formatted_time,"\n腾讯:",tx,"\n抖音:",dy,"\n微信:",weixin,"\n广州:",guanzou,"\n深圳:",shenzhen)
print("当前日期和时间:", formatted_time,"\n抖音:",dy,"\n微信:",weixin,"\n广州:",guanzou,"\n深圳:",shenzhen)
# print(tx)
print(douyin)
print(weixin)
# print(bd)
print(guanzou)
print(shenzhen)
爬取账户详细数据,可设置时间维度,输出为csv
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
from datetime import datetime, timedelta
import pandas as pd
from bs4 import BeautifulSoup
# 创建全局的WebDriver对象
driver = None
def initialize_browser():
global driver
if driver is None:
option = webdriver.ChromeOptions() #创建设置参数这个对象
option.add_experimental_option("detach", True) #浏览器会话与Python脚本分离,在脚本运行结束后手动控制浏览器会话的关闭
option.headless = False #在无头模式下,浏览器在后台运行,没有可见的图形用户界面。
driver = webdriver.Chrome(options=option)
def gettable(username, password):
initialize_browser()
driver.get(r'http://*******.com/admin/Login/index')
time.sleep(2)
username_field = driver.find_element(By.NAME, "username")
password_field = driver.find_element(By.NAME, "password")
#输入账户密码登录
username_field.send_keys(username)
password_field.send_keys(password)
login_button = driver.find_element(By.XPATH, "/html/body/div/div/form/div[4]/div/input[1]")
login_button.click()
time.sleep(3)
#点击链接"投手每日收益"
link = driver.find_element(By.XPATH, "/html/body/aside/div/dl[1]/dd/ul/li[3]/a")
link.click()
time.sleep(5)
# 切换到iframe
iframe = driver.find_element(By.XPATH, "/html/body/section/div[2]/div[2]/iframe")
driver.switch_to.frame(iframe)
#输入时间
input_star=driver.find_element(By.XPATH, "/html/body/div/div/div/div/div/div/form/div/div[1]/div[1]/input")
input_end=driver.find_element(By.XPATH, "/html/body/div/div/div/div/div/div/form/div/div[1]/div[3]/input")
query_button=driver.find_element(By.XPATH, "/html/body/div/div/div/div/div/div/form/div/div[3]/input")
yesterday = datetime.now() - timedelta(days=1)
formatted_yesterday = yesterday.strftime("%Y-%m-%d")
input_star.send_keys(formatted_yesterday)
input_end.send_keys(formatted_yesterday)
# date_string = "2024-02-16"
# date_ending = "2024-02-17"
# input_star.send_keys(date_string)
# input_end.send_keys(date_ending)
query_button.click()
#点击查询
table_element = driver.find_element(By.XPATH, "/html/body/div/div/div/div/div/div/table")
# 获取表格的HTML内容
html_content = table_element.get_attribute('outerHTML')
# 使用lxml解析器解析HTML内容
soup = BeautifulSoup(html_content, 'lxml')
# 找到表格
table = soup.find('table', class_='layui-table')
# 使用pandas的read_html函数直接读取HTML表格
df = pd.read_html(str(table))[0]
selected_columns = ['时间', '投手', '充值金额']
# 仅保留感兴趣的列
df_selected = df[selected_columns]
# 去除充值金额列中的货币符号
df_selected['充值金额'] = df_selected['充值金额'].str.replace('¥', '')
# 在DataFrame中添加一列“账户”,并设置其值为“腾讯”
df_selected['账户'] = username
return df_selected
# 打印处理后的DataFrame
# print(df_selected.to_string(index=False, header=False))
# df1=gettable("本部-腾讯", "Agent.Supershorse")
df2=gettable("本部-抖音", "Agent.Supershorse")
df3=gettable("广州分部", "Agent.Supershorse")
df4=gettable("本部", "Agent.Supershorse")
df5=gettable("本部-百度", "1234")
# 关闭浏览器
driver.quit()
# 合并三个DataFrame
# result_df = pd.concat([df1, df2, df3,df4,df5], ignore_index=True)
result_df = pd.concat([df2, df3,df4,df5], ignore_index=True)
# 获取当天的前一天日期
yesterday = datetime.now() - timedelta(days=1)
# 将合并后的DataFrame保存为CSV文件,以前一天日期命名
csv_filename = yesterday.strftime("%Y-%m-%d") + '充值.csv'
# date_ing = "2024-02-17"
# csv_filename = date_ing + '充值.csv'
result_df.to_csv(csv_filename, index=False, header=False)
# 打印合并后的DataFrame内容,不包括表头
print(result_df.to_string(index=False, header=False))
爬取昨日汇总数据
from selenium import webdriver
from selenium.webdriver.common.by import By
import time
# 创建全局的WebDriver对象
driver = None
def initialize_browser():
global driver
if driver is None:
# option = webdriver.ChromeOptions() #创建设置参数这个对象
# option.add_experimental_option("detach", True) #浏览器会话与Python脚本分离,在脚本运行结束后手动控制浏览器会话的关闭
# option.headless = False #在无头模式下,浏览器在后台运行,没有可见的图形用户界面。
driver = webdriver.Chrome()
def login_and_get_charge(username, password):
# 初始化浏览器
initialize_browser()
# 打开网页并等待
driver.get(r'http://*******.com/admin/Login/index')
time.sleep(2)
# 查找用户名和密码输入框,并输入凭据
username_field = driver.find_element(By.NAME, "username")
password_field = driver.find_element(By.NAME, "password")
username_field.send_keys(username)
password_field.send_keys(password)
# 点击登录按钮
login_button = driver.find_element(By.XPATH, "/html/body/div/div/form/div[4]/div/input[1]")
login_button.click()
time.sleep(3)
# 切换到iframe
iframe = driver.find_element(By.XPATH, "/html/body/section/div[2]/div/iframe")
driver.switch_to.frame(iframe)
# 获取充值信息
# element= driver.find_element(By.CSS_SELECTOR, value='.money')
element= driver.find_element(By.XPATH, "/html/body/div[1]/div/div/div[1]/div[2]/div/div[2]/div[1]/div[1]/p[1]")
print(username,element.text)
# 打印用户名和充值信息
return element.text
# 调用函数并传递用户名和密码
# tx1=login_and_get_charge("本部-腾讯", "Agent.Supershorse")
dy1=login_and_get_charge("本部-抖音", "Agent.Supershorse")
gz=login_and_get_charge("广州分部", "Agent.Supershorse")
bb=login_and_get_charge("本部", "Agent.Supershorse")
bd1=login_and_get_charge("本部-百度", "1234")
# 关闭浏览器
driver.quit()
gz = round(float(gz.replace("¥", "")),2)
bb = round(float(bb.replace("¥", "")),2)
# tx=round(float(tx1.replace("¥", "")),2)
dy=round(float(dy1.replace("¥", "")),2)
bd=round(float(bd1.replace("¥", "")),2)
# 获取当前时间戳
current_timestamp = time.time()
# 使用时间戳获取本地时间结构
local_time = time.localtime(current_timestamp)
# 格式化本地时间为字符串
formatted_time = time.strftime("%Y-%m-%d %H:%M:%S", local_time)
print("--------------------------------------------------------")
print("当前日期和时间:", formatted_time)
print(gz)
print(bb)
# print(tx)
print(dy)
print(bd)
print(gz+bb+dy+bd)