Selenium下载航运数据库克拉克森研究数据案例

from bs4 import BeautifulSoup as bs

from time import sleep

import re

import pandas as pd

from selenium.webdriver.common.by import By

import undetected_chromedriver.v2 as uc

######## login Timeseries ###########

browser = uc.Chrome(executable_path='./chromedriver')

sleep(10)

browser.get('https://www.cl水电费是否arks大是大非ons.net/n/#/sin/tim水电费水电费eseries/bro水电费水电费wse')

sleep(20)

####输入用户名###

browser.find_element(by=By.XPATH,value = '//*[@id="usernameText"]').send_keys(' ')

sleep(5)

####点continue####

browser.find_element(by=By.XPATH,value = '/html/body/ngb-modal-window/div/div/modal/div[2]/modal-login-body/article/div[2]/button[1]').click()

sleep(6)

###输入密码###

browser.find_element(by=By.XPATH,value = '//*[@id="passwordTextFlagged"]').send_keys('')

sleep(5)

###点击login###

browser.find_element(by=By.XPATH,value = '/html/body/ngb-modal-window/div/div/modal/div[2]/modal-login-body/article/div[2]/button[2]').click()

sleep(5)

### 登陆成功###

######## data location 本模块根该据下载的数据不同要进行定制#####

### 点击目录第8项 containership li[8] 予以定位

browser.find_element(by=By.XPATH,value='//*[@id="ProductWrapper"]/main/div[2]/app-timeseries-base/section/div/div/app-browse/kendo-splitter/kendo-splitter-pane[1]/timeseries-browse-hierarchy/kendo-treeview/ul/li[8]/div/span[1]').click()

sleep(5)

### 继续在细分目录中点击第8项earning and freight ul/li[8]/ul/li[8] 予以定位

browser.find_element(by=By.XPATH,value='//*[@id="ProductWrapper"]/main/div[2]/app-timeseries-base/section/div/div/app-browse/kendo-splitter/kendo-splitter-pane[1]/timeseries-browse-hierarchy/kendo-treeview/ul/li[8]/ul/li[8]/div/span[1]').click()

sleep(5)

### 继续在细分目录中点击第2项 spot freight rates /li[8]/ul/li[8]/ul/li[2]予以定位

browser.find_element(by=By.XPATH,value='//*[@id="ProductWrapper"]/main/div[2]/app-timeseries-base/section/div/div/app-browse/kendo-splitter/kendo-splitter-pane[1]/timeseries-browse-hierarchy/kendo-treeview/ul/li[8]/ul/li[8]/ul/li[2]/div/span[1]').click()

sleep(5)

### 继续在细分目录中点击第21项 SCFI Shanghai-Europe (base port) Container Freight Rate 前面的方框

element = browser.find_element(by=By.XPATH,value='//*[@id="ProductWrapper"]/main/div[2]/app-timeseries-base/section/div/div/app-browse/kendo-splitter/kendo-splitter-pane[1]/timeseries-browse-hierarchy/kendo-treeview/ul/li[8]/ul/li[8]/ul/li[2]/ul/li[21]/div/span/span/div/input')

browser.execute_script("arguments[0].click();", element)

#browser.find_element(by=By.XPATH,value='').click()

sleep(5)

######data location结束######

###点击Excel按钮进行下载###

browser.find_element(by=By.XPATH,value = '//*[@id="ProductWrapper"]/main/div[2]/app-timeseries-base/section/div/div/app-browse/kendo-splitter/kendo-splitter-pane[2]/app-grid-chart-data/div/article/timeseries-viewmode-panel/div/div/crsl-button[5]/button/span').click()

sleep(5)

###点击accept予以确认下载###

browser.find_element(by=By.XPATH,value = '/html/body/ngb-modal-window/div/div/modal/div[2]/app-download-modal/section/div[4]/a').click()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
要使用Selenium下载Excel文件并将其导入MySQL数据库,您需要使用Python编程语言并安装必要的库。以下是大致的步骤: 1. 安装Selenium库和Chrome浏览器驱动程序。 ```python pip install selenium ``` 2. 下载Excel文件并保存到本地文件夹。 ```python from selenium import webdriver url = 'http://example.com/file.xlsx' driver = webdriver.Chrome() driver.get(url) # 等待文件下载完成 time.sleep(5) # 保存文件到本地 driver.execute_script("window.open('');") driver.switch_to.window(driver.window_handles[1]) driver.get('chrome://downloads') elem = driver.find_element_by_xpath('//downloads-manager//iron-icon[@id="show"]') elem.click() time.sleep(5) elem = driver.find_element_by_xpath('//downloads-manager//div[@class="download-container"]//a') href = elem.get_attribute('href') driver.get(href) ``` 3. 使用pandas库读取Excel文件。 ```python import pandas as pd df = pd.read_excel('file.xlsx') ``` 4. 将数据插入MySQL数据库。 ```python import mysql.connector # 连接MySQL数据库 cnx = mysql.connector.connect(user='your_username', password='your_password', host='127.0.0.1', database='your_database') cursor = cnx.cursor() # 插入数据 for index, row in df.iterrows(): query = "INSERT INTO your_table (column1, column2) VALUES (%s, %s)" values = (row['column1'], row['column2']) cursor.execute(query, values) cnx.commit() # 关闭连接 cursor.close() cnx.close() ``` 以上是一个基本的框架,您可以根据自己的需要进行进一步的修改和调整。同时,为了确保程序的稳定性和可靠性,您还需要考虑异常处理、日志记录等方面的问题。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值