python 获取东方财富网站的数据

python 获取东方财富网站的数据

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import pandas as pd
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from datetime import datetime
import time


def get_data(url):
    browser = webdriver.Chrome()
    WebDriverWait(browser, 10)
    browser.get(url)
    WebDriverWait(browser, 5, 0.5).until(EC.presence_of_element_located((By.ID, 'sidemenu')))
    above = browser.find_element_by_css_selector(
        "#sidemenu > div > div.level-list > ul > li.sub-items.menu-hsindex-wrapper")
    ActionChains(browser).move_to_element(above).perform()
    lis = browser.find_elements_by_xpath("//*[@id='sidemenu']/div/div[2]/ul/li[7]/div/ul//li")
    for li in lis:
        print(li)
        if "指数成份" in li.text:
            # li.click()
            # get_page_num(browser, "指数成份")
            print()
        elif "上证系列指数" in li.text:
            # li.click()
            # get_page_num(browser, "上证系列指数")
            print()
        elif "深证系列指数" in li.text:
            li.click()
            get_page_num(browser, "深证系列指数")


def get_page_num(browser, name):
    wait = WebDriverWait(browser, 10)
    try:
        wait.until(EC.presence_of_element_located((By.ID, 'main-table_next')))
        page = int(browser.find_element_by_xpath("//*[@id='main-table_paginate_page']/a[last()]").text)
    except Exception as e:
        print(e)
        page = 0
    i = 1
    df = pd.DataFrame()
    while i <= page:
        input_num = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@class="paginate_input"]')))
        input_num.click()
        input_num.clear()
        input_num.send_keys(i)
        submit = wait.until(EC.element_to_be_clickable(
            (By.XPATH, '//*[@class="paginte_go"]')))
        submit.click()
        data = pd.read_html(browser.page_source, converters={'代码': str})[0]
        data.drop(['序号'], axis=1, inplace=True)
        data['代码'].astype(str)
        print("正在爬取第" + str(i) + "页")
        df = df.append(data)
        i = i + 1
        time.sleep(2)
    df['指数'] = name
    df.to_csv(datetime.now().strftime('%Y%m%d') + name + ".csv", index=False)
    print(name, "抓取数据成功")
    browser.implicitly_wait(10)


if __name__ == '__main__':
    get_data('http://quote.eastmoney.com/center/boardlist.html#boards-BK01501')
    # search_next = browser.find_element_by_css_selector("#main-table_next")
    # while search_next.is_enabled():
    #     print("点击下一页")
    #     search_next.click()
    #     print(pd.read_html(browser.page_source)[0])

 完整代码下载:https://github.com/tanjunchen/SpiderProject/tree/master/EastWealthWebsite

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

远方的飞猪

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值