使用python爬取猫眼电影、房王、股吧论坛、百度翻译、有道翻译、高德天气、华夏基金、扇贝单词、糗事百科（华夏基金）

最新推荐文章于 2024-02-05 13:14:00 发布

物喜己悲

最新推荐文章于 2024-02-05 13:14:00 发布

阅读量360

点赞数 1

分类专栏：爬虫文章标签：爬虫华夏基金

本文链接：https://blog.csdn.net/yu1860110/article/details/90581758

版权

import requests,re

#获取整个网页
# with open('华夏基金.html','w',encoding='utf-8') as f:
#     f.write(html)

class Huaxia():

    # def __init__(self):
    #     self.base_html()

    def __call__(self, *args, **kwargs):
        self.base_html()

    def base_html(self):
        '''
        获取华夏基金(全部基金页面：http://fund.chinaamc.com/portal/cn/include/newproducthome.jsp
        :return: 华夏基金网页文件
        '''
        base_url = 'http://fund.chinaamc.com/portal/cn/include/newproducthome.jsp'
        html = requests.get(base_url).text
        # print(type(html)) #<class 'str'>
        # print('base_html is run')
        self.all_table_data(html)

    def all_table_data(self,html):
        '''
        获取所有网站table内容
            规则：
                开头：<table width="100%" border="0" cellspacing="0" cellpadding="0" style=
                结尾：</table>
                贪婪模式
        :return:返回所有符合 首：table width="100%" border="0" cellspacing="0" cellpadding="0" style=  尾：</table> 的table
        '''
        #创建贪婪模式规则获取不同类型的基金
        pattern_big_table_rule = re.compile('<table width="100%" border="0" cellspacing="0" cellpadding="0" style=(.*?)</table>',re.S)
        big_tables = pattern_big_table_rule.findall(html)
        # print(type(big_tables))
        # print(len(big_tables))
        # count=1
        # for table in big_tables:
        #     big_table_name = '华夏基金第'+str(count)+'个table'+'.html'
        #     with open(big_table_name,'w',encoding='utf-8') as f:
        #         f.write(table)
        #         count+=1
        # print('all_table_data is run ')

        #匹配table类型：一共四种
        #   1、股票型、指数型、混合型、债券型、ETF
        #   2、货币型
        #   3、理财型
        #   4、封闭型

        #数据分发
        type1 = big_tables[0]
        # self.type_1(type1)
        type2 = big_tables[1]
        self.typer_2(type2)
        type3 = big_tables[2]
        # self.typer_3(type3)
        type4 = big_tables[3]
        # self.typer_4(type4)

    '''
#这是typr_1
    '''
    def type_1(self,type1):

        '''
        type1: 这是第一类基金类型:基金 股票型、指数型、混合型、债券型、ETF 类基金
        需要获取的内容：
                1.基金简称
                2.基金代码
                3.净值日期
                4.净值
                5.累计净值
                6.涨跌幅
                7.成立日期
                8.申购状态
                9.赎回状态
                10.定投状态
                11.网上交易
        :param
        :return:第一类 基金 股票型、指数型、混合型、债券型、ETF 类基金
                数据结构：字典
                字典结构：一只基金一个
        '''

#一、整理数据
        # 1、获取type1基金导航栏信息

        type1_nvg_titles_rule = re.compile('class="p16_libe">(.*?)</span>')
        # 基金导航栏字典
        # 只需要前9个
        # ['基金简称', '基金代码', '净值日期', '净值', '累计净值', '涨跌幅', '成立日期', '申购状态', '赎回状态', '定投状态', '网上交易', '添加自选']
        type1_nvg_tltles = type1_nvg_titles_rule.findall(type1)
        # print(type1_nvg_tltles)  # 基金导航栏列

        #2、获取type1中 每一只基金列表
        #基金列表  funds_list
        funds_rule = re.compile('position: relative(.*?)style="color:red',re.S)
        funds_list = funds_rule.findall(type1)
        # print(len(funds_list))#期望： 195  时间5019/5/25


#二、获取每一只基金的信息
        #1、获取单只基金信息
        count =1
        for fund in funds_list:
        #获取每一只基金的 详细信息

            #独立规则

最低0.47元/天解锁文章

物喜己悲

关注

1
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
使用python爬取猫眼电影、房王、股吧论坛、百度翻译、有道翻译、高德天气、华夏基金、扇贝单词、糗事百科（华夏基金）

import requests,re#获取整个网页# with open('华夏基金.html','w',encoding='utf-8') as f:# f.write(html)class Huaxia(): # def __init__(self): # self.base_html() def __call__(self, *args...
复制链接

扫一扫