python自动筛选爬取晨星网基金

写在前面:本文只做技术分享和学习,并非以此牟利,如有侵犯,请联系删除。

因为需要模拟浏览器登录和执行js,所以本文用到了浏览器驱动,用的是谷歌浏览器驱动(chromedriver),当然也可以用火狐等等浏览器以及其驱动。
谷歌驱动环境搭建(点击我)
好了。直接上代码,如果你的计算机python环境中缺失哪些库,记得pip一下哦

import sys
from lxml import etree
import time
import requests
import csv
from selenium import webdriver   # 导入webdriver模块
from bs4 import BeautifulSoup
from time import sleep

class ChengxingspiderItem:
    def __init__(self):
        self.fund_code = ""
        self.fund_name = ""
        self._i = 0
    def __iter__(self):
        return self

    def __next__(self):
        if self._i == 0:
            self._i += 1
            return self.fund_code
        elif self._i == 1:
            self._i += 1
            return self.fund_name
        else:
            raise StopIteration()
def FormatHtml(_html,rows):
    fund_xpath = etree.HTML(_html)
    trs = fund_xpath.xpath('//*[@id="ctl00_cphMain_gridResult"]/tbody/tr')
    for i in range(1,len(trs)):
        item = ChengxingspiderItem()
        fund_code = fund_xpath.xpath(f'//*[@id="ctl00_cphMain_gridResult"]/tbody/tr[{i+1}]/td[3]/a/text()')
        fund_name = fund_xpath.xpath(f'//*[@id="ctl00_cphMain_gridResult"]/tbody/tr[{i+1}]/td[4]/a/text()')
        item.fund_name = fund_name[len(fund_name)-1]
        item.fund_code = fund_code[len(fund_code)-1]
        rows.append(list(item))
    return rows
if __name__ == '__main__':
    options = webdriver.ChromeOptions()
    options.add_argument("--no-sandbox")
    # 我的驱动位置放置于C:/Program Files/Google/Chrome/Application/chromedriver.exe,可以根据你的而定
    driver = webdriver.Chrome('C:/Program Files/Google/Chrome/Application/chromedriver.exe', chrome_options=options)
    _url = "http://cn.morningstar.com/quickrank/default.aspx"
    driver.get(_url)
    username = driver.find_element_by_id('emailTxt')
    password = driver.find_element_by_id('pwdValue')
    # 账号密码
    username.send_keys('这是你的账号')  
    password.send_keys('这是你的密码')
    submit = driver.find_element_by_id('loginGo')
    a = submit.text
    submit.click()
    # 睡眠时间根据所在网络而定
    sleep(3)
    # 添加筛选
    driver.find_element_by_id('ctl00_cphMain_cblStarRating_0').click()
    driver.find_element_by_id('ctl00_cphMain_cblStarRating5_0').click()
    driver.find_element_by_id('ctl00_cphMain_cblGroup_0').click()
    driver.find_element_by_id('ctl00_cphMain_cblCategory_0').click()
    # 搜索
    driver.find_element_by_id('ctl00_cphMain_btnGo').click()
    countText = driver.find_element_by_id('ctl00_cphMain_TotalResultLabel').text
    rows = []
    count = int(countText)//25+1
    for i in range(count):
        js ="__doPostBack('ctl00$cphMain$AspNetPager1','"+str(i)+"')"
        driver.execute_script(js)
        _html = driver.page_source
        rows = FormatHtml(_html,rows)
        print(rows)
    headers = ['fund_code', 'fund_name']
    # 此文件同路径下新建一个test.csv文件
    with open('test.csv', 'w', newline='')as f:
        f_csv = csv.writer(f)
        f_csv.writerow(headers)
        f_csv.writerows(rows)
    
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值