python爬虫练习#动态数据翻页爬取

案例需求:

  • 采集沪深京股A股的所有数据,并存储到MongoDB中
  • 在这里插入图片描述

代码实现:

  • 导入需要用到的模块
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pymongo
  • 使用selenium爬取动态数据
url = 'http://quote.eastmoney.com/center/gridlist.html#hs_a_board'
driver = webdriver.Edge()
driver.get(url)
driver.implicitly_wait(10)

client = pymongo.MongoClient('localhost', 27017)
db = client.money.china

def find_elements(xpath):
    return driver.find_elements(By.XPATH, xpath)

try:
    while True:
        wait = WebDriverWait(driver, 10)

        # 查找所有需要的元素
        num = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[1]')
        code = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[2]/a')
        name = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[3]/a')
        stock_bar = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[1]')
        capital_flow = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[2]')
        record = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[3]')
        latest_price = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[5]/span')
        chg = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[6]/span')
        rise_and_fall_amount = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[7]/span')
        turnover = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[8]')
        transaction_amount = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[9]')
        amplitude = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[10]')
        highest = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[11]/span')
        lowest = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[12]/span')
        open_now = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[13]/span')
        received_yesterday = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[14]')
        quantity_ratio = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[15]')
        turnover_rate = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[16]')
        per = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[17]')
        pbr = find_elements('//*[@id="table_wrapper-table"]/tbody/tr/td[18]')

        for i in range(len(num)):
            data = {
                '序号': num[i].text,
                '代码': code[i].text,
                '名称': name[i].text,
                '股吧': stock_bar[i].get_attribute('href'),
                '资金流': capital_flow[i].get_attribute('href'),
                '数据': record[i].get_attribute('href'),
                '最新价': latest_price[i].text,
                '涨跌幅': chg[i].text,
                '涨跌额': rise_and_fall_amount[i].text,
                '成交量(手)': turnover[i].text,
                '成交额': transaction_amount[i].text,
                '振幅': amplitude[i].text,
                '最高': highest[i].text,
                '最低': lowest[i].text,
                '今开': open_now[i].text,
                '昨收': received_yesterday[i].text,
                '量比': quantity_ratio[i].text,
                '换手率': turnover_rate[i].text,
                '市盈率(动态)': per[i].text,
                '市净率': pbr[i].text,
            }
            db.insert_one(data)
            print(data)

        # 检查下一页按钮是否可点击
        try:
            next_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="main-table_paginate"]/a[2]')))
            next_button.click()
            time.sleep(1)
        except Exception as e:
            print(f"翻页操作失败或已到达最后一页: {e}")
            break
except Exception as e:
    print(f"主循环异常: {e}")
finally:
    driver.close()

实现结果

在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值