案例需求:
采集沪深京股A股的所有数据,并存储到MongoDB中
代码实现:
import time
from selenium import webdriver
from selenium. webdriver. common. by import By
from selenium. webdriver. support. ui import WebDriverWait
from selenium. webdriver. support import expected_conditions as EC
import pymongo
url = 'http://quote.eastmoney.com/center/gridlist.html#hs_a_board'
driver = webdriver. Edge( )
driver. get( url)
driver. implicitly_wait( 10 )
client = pymongo. MongoClient( 'localhost' , 27017 )
db = client. money. china
def find_elements ( xpath) :
return driver. find_elements( By. XPATH, xpath)
try :
while True :
wait = WebDriverWait( driver, 10 )
num = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[1]' )
code = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[2]/a' )
name = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[3]/a' )
stock_bar = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[1]' )
capital_flow = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[2]' )
record = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[4]/a[3]' )
latest_price = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[5]/span' )
chg = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[6]/span' )
rise_and_fall_amount = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[7]/span' )
turnover = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[8]' )
transaction_amount = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[9]' )
amplitude = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[10]' )
highest = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[11]/span' )
lowest = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[12]/span' )
open_now = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[13]/span' )
received_yesterday = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[14]' )
quantity_ratio = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[15]' )
turnover_rate = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[16]' )
per = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[17]' )
pbr = find_elements( '//*[@id="table_wrapper-table"]/tbody/tr/td[18]' )
for i in range ( len ( num) ) :
data = {
'序号' : num[ i] . text,
'代码' : code[ i] . text,
'名称' : name[ i] . text,
'股吧' : stock_bar[ i] . get_attribute( 'href' ) ,
'资金流' : capital_flow[ i] . get_attribute( 'href' ) ,
'数据' : record[ i] . get_attribute( 'href' ) ,
'最新价' : latest_price[ i] . text,
'涨跌幅' : chg[ i] . text,
'涨跌额' : rise_and_fall_amount[ i] . text,
'成交量(手)' : turnover[ i] . text,
'成交额' : transaction_amount[ i] . text,
'振幅' : amplitude[ i] . text,
'最高' : highest[ i] . text,
'最低' : lowest[ i] . text,
'今开' : open_now[ i] . text,
'昨收' : received_yesterday[ i] . text,
'量比' : quantity_ratio[ i] . text,
'换手率' : turnover_rate[ i] . text,
'市盈率(动态)' : per[ i] . text,
'市净率' : pbr[ i] . text,
}
db. insert_one( data)
print ( data)
try :
next_button = wait. until( EC. element_to_be_clickable( ( By. XPATH, '//*[@id="main-table_paginate"]/a[2]' ) ) )
next_button. click( )
time. sleep( 1 )
except Exception as e:
print ( f"翻页操作失败或已到达最后一页: { e} " )
break
except Exception as e:
print ( f"主循环异常: { e} " )
finally :
driver. close( )
实现结果