python中登录基金账户获取基金数据

最新推荐文章于 2024-10-03 09:58:48 发布

品尚公益团队

最新推荐文章于 2024-10-03 09:58:48 发布

阅读量1k

点赞数

分类专栏： python 文章标签： python 爬虫

本文链接：https://blog.csdn.net/u010719791/article/details/120106807

版权

python 专栏收录该内容

35 篇文章

订阅专栏

该博客展示了如何使用Python进行网络爬虫，分别从好买基金网、同花顺爱基金和雪球蛋卷基金抓取基金信息，包括基金名称、基金代码、金额和收益等，并将数据存储为json格式，最后将数据导入Excel表格进行展示。

摘要由CSDN通过智能技术生成

完整代码好买基金网：

from unicodedata import decimal


import requests
from bs4 import BeautifulSoup
from datetime import datetime
import json
import xlwt
import xlwings as xw

# 新闻链接
from numpy.core import double

url = ' https://i.howbuy.com/member/property/'
session = requests.session()
cookie = ''
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36',
    'Cookie': cookie
}
res = session.get(url=url, headers=headers)
# soup = BeautifulSoup(res.text, 'lxml')
# 'Host': 'www.douban.com',
# 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
# 'Connection': 'keep-alive',
with open('rr.html', 'w', encoding='utf-8') as f:
    f.write(res.content.decode())

html = BeautifulSoup(res.text, 'html.parser')
# html = page.read()
# print(html )
html.list=html.find_all('div',class_='typBox mt20')
# print(html.list)

fund=[]
for item in html.list:
    fundName=item.find('p',class_='title').text.strip(',')
    fundSum = item.find('div',class_='lt w160').p.text.strip(',')[0:].replace(',','')
    fundProfit=item.find('p',class_='cRed').text.strip(',')[0:].replace(',','')
    print(fundName,fundSum,fundProfit,sep='/')
    # print(item.find_all('div',class_='clearfix mt20')[0])
    fund.append({
        'fundName': fundName,
        'fundSum': float(fundSum),
        'fundProfit': float(fundProfit)
    })
with open('.fund.json', 'w', encoding='utf-8')as f:
    json.dump(fund, f, indent=1, ensure_ascii=False)

    # 读取json
with open(".fund.json", 'r', encoding='utf-8') as f:
    data = json.load(f)

# 创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding='utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
title = ["序号","平台","分类","基金名称","基金代码", "金额","份额", "收益","收益率","投资日期","记录"]
for i in range(len(title)):
    worksheet.write(0, i, title[i])
    # 写入excel参数对应 行, 列,值
    # print(data[i]("fundSum"))
for i in range(len(data)):
    worksheet.write(i + 1, 0, label=i+1)#序号
    worksheet.write(i + 1, 3, label=data[i]['fundName'])  # label=data[1]["fundName"])z中的label可以不用
    worksheet.write(i + 1, 5, label=data[i]["fundSum"]*1)#float(str(data[i]["fundSum"].strip('\t').split())))
    worksheet.write(i + 1, 7, label=data[i]["fundProfit"]*1)#float(str(data[i]["fundProfit"].strip('\t').split())))
    workbook.save('Excel.xls')

# def format(x):
#     try:
#         return float(x)
#     except ValueError:
#         x = 0.0
#         return x

同花顺旗下爱基金完整代码：


from lxml.html import fromstring
from bs4 import BeautifulSoup
import xlwt
import json

with open('Fund.html' ,'r' ,encoding='utf-8') as f:
    # Soup = BeautifulSoup(f, 'lxml')
    html = BeautifulSoup(f, 'html.parser')
    html.list=html.find_all('div',class_='box_cont_0 cb')
    # print(html.list)
fund=[]

for item in html.list:
    fundName=item.find('strong',class_='fund_name fl').a.text.strip('').split('（')[0]
    fundName_a = item.find('strong', class_='fund_name fl').a.text.strip('').split('（')[1]#获取基金代码
    fundSum = item.find_all('tr')[1].contents[1].string#获取第二tr标签下的第二个元素
    fundProfit = item.find_all('tr')[1].contents[4].string[1:]#去掉+-号#获取第二tr标签下的第二个元素
    # print(fundName,fundSum,fundProfit,sep='/')
    # print(fundName_a[0:6])
    # print(type(fundProfit))
    fund.append({
        'fundName':fundName,
        'fundSum':float(fundSum) ,#数值型
        'fundProfit':float(fundProfit)
    })

with open('.fund.json', 'w', encoding='utf-8')as f:
    json.dump(fund, f, indent=1, ensure_ascii=False)

# 读取json
with open(".fund.json", 'r', encoding='utf-8') as f:
    data = json.load(f)


#创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding='utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
title = ["序号","平台","分类","基金名称","基金代码", "金额","份额", "收益","收益率","投资日期","记录"]
for i in range(len(title)):
    worksheet.write(0, i, title[i])
    # 写入excel参数对应 行, 列,值
    # print(data[i]("fundSum"))
for i in range(len(data)):
    worksheet.write(i + 1, 0, label=i)#序号
    worksheet.write(i + 1, 3, label=data[i]['fundName'])  # label=data[1]["fundName"])z中的label可以不用
    worksheet.write(i + 1, 5, label=data[i]["fundSum"])
    worksheet.write(i + 1, 7, label=data[i]["fundProfit"])
    workbook.save('Excel.xls')

雪球旗下蛋卷基金完整代码：

from lxml.html import fromstring
from bs4 import BeautifulSoup
import xlwt
import json

with open('fund.html', 'r', encoding='utf-8') as f:
    # Soup = BeautifulSoup(f, 'lxml')
    html = BeautifulSoup(f, 'html.parser')
    html.list = html.find_all('div', class_='inner')
    # print(html.list)
fund = []

for i, item in enumerate(html.list):
    try:
        fundName = item.find('div', class_='p-name').text.strip()
        fundSum = item.find_all('div', class_='small-box')[0].contents[2].string.strip().replace(',','') # [1].contents[1].string#获取第二tr标签下的第二个元素
        fundProfit = item.find_all('div', class_='col2')[0].contents[0].string.strip()[1:]  # 获取第二tr标签下的第二个元素
        print(fundName,fundSum,fundProfit,sep='/')
        # print(len(fundSum))
        fund.append({
            'fundName': fundName if fundName else '',
            'fundSum': float(fundSum) if float(fundSum) else '',
            'fundProfit': float(fundProfit)  if float(fundProfit) else ''
        })

    except:
        print(i)




with open('.fund.json', 'w', encoding='utf-8')as f:
    json.dump(fund, f, indent=1, ensure_ascii=False)

# 读取json
with open(".fund.json", 'r', encoding='utf-8') as f:
    data = json.load(f)

# 创建一个workbook 设置编码
workbook = xlwt.Workbook(encoding='utf-8')
# 创建一个worksheet
worksheet = workbook.add_sheet('Worksheet')
title = ["序号", "平台", "分类", "基金名称", "基金代码", "金额", "份额", "收益", "收益率", "投资日期", "记录"]
for i in range(len(title)):
    worksheet.write(0, i, title[i])
    # 写入excel参数对应 行, 列,值
    # print(data[i]("fundSum"))
for i in range(len(data)):
    worksheet.write(i + 1, 0, label=i+1)  # 序号
    worksheet.write(i + 1, 3, label=data[i]['fundName'])  # label=data[1]["fundName"])z中的label可以不用
    worksheet.write(i + 1, 5, label=data[i]["fundSum"])
    worksheet.write(i + 1, 7, label=data[i]["fundProfit"])
    workbook.save('Excel.xls')