Python爬取动态网站-集信达案例

槿花荣

已于 2024-02-27 21:25:25 修改

阅读量2.5k

点赞数 6

分类专栏：作业文章标签： chrome 前端 python

于 2023-11-03 15:59:23 首次发布

本文链接：https://blog.csdn.net/m0_63359528/article/details/134205262

版权

作业专栏收录该内容

6 篇文章

订阅专栏

本文介绍了如何使用Python的Selenium库自动化登录JiXinDa网站，实现短信验证码验证和数据抓取，将抓取的数据以JSON格式保存。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

from selenium import webdriver
import time
import json
from selenium.webdriver.support.wait import WebDriverWait
from  selenium.webdriver.support import  expected_conditions as EC
from  selenium.webdriver.common.by import By

class JiXinDa:
    def __init__(self):
        self.url = 'http://jxd.itheima.net/#/login'
        self.driver = webdriver.Chrome()

    def login_to_find(self):
        #发送请求
        self.driver.get(self.url)
        self.driver.implicitly_wait(10)
        #设置窗口最大化
        self.driver.maximize_window()
        #登录
        self.driver.find_element(By.CLASS_NAME,'el-button').click()
        self.driver.implicitly_wait(20)
        # 定位短信服务元素：//*[@id="app"]/div/div[1]/ul/li[2]/div/div/span
        sms_service_element = self.driver.find_element(By.XPATH,'//*[@id="app"]/div/div[1]/ul/li[2]/div/div/span')
        webdriver.ActionChains(self.driver).move_to_element(sms_service_element).click(sms_service_element).perform()
        #单击
        service_manager = self.driver.find_element(By.XPATH,
        '/html/body/div[1]/div/div[1]/ul/li[2]/ul/li/ul/li[4]/span')
        webdriver.ActionChains(self.driver).move_to_element(service_manager).click(service_manager).perform()
        #显示等待<input>标签加载
        WebDriverWait(self.driver,30).until(
            EC.presence_of_element_located((By.XPATH,
                '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[1]/div/div/div/input')))
        #输入签名名称为“黑马头条”
        self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[1]/div/div/div/input').send_keys('黑马头条')
        #单击"搜索"按钮
        WebDriverWait(self.driver,30).until(
            EC.presence_of_element_located((By.XPATH,
            '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[2]/div/div/div/button[1]'))
        )
        self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[1]/form/div/div[2]/div/div/div/button[1]').click()

    def get_data(self):
        #提取网页表格的数据
        data=[]
        title = self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[3]/div[1]/div/div/div[1]/span').text
        data.append(title)
        IP = self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[3]/div[1]/div/div/div[2]/p[1]/span[2]').text
        data.append(IP)
        Createtime = self.driver.find_element(By.XPATH,
        '//*[@id="app"]/div/div[2]/div[1]/div/div/div/div/div[3]/div[1]/div/div/div[2]/p[4]/span[2]').text
        data.append(Createtime)
        return  data

    def save_data(self,data):
        try:
            with open('jixindata',mode='a+',encoding='utf-8') as file:
                file.write(json.dumps(data,ensure_ascii=False))
        except Exception as e:
            print(e)
        return False

    def run(self):
        self.login_to_find()
        data = self.get_data()
        self.save_data(data)
        print(f'正在保存数据')
        print('剪辑也算是计算机专业就业')
        print(data)

if __name__=='__main__':
    jixindata = JiXinDa()
    jixindata.run()