Tushare获取数据并保存到MySQL数据库

import pandas as pd
import mysql.connector
from sqlalchemy import create_engine
from config import user, password, ts_token
import tushare as ts

def fetch_company_data(cnx, stockCode):
    cursor = cnx.cursor(dictionary=True)
    query = ("""
        select  ts_code,
                end_date	
        from fin_data
        where ts_code = %s
    """)
    cursor.execute(query, (stockCode,))
    result = cursor.fetchall()
    cursor.close()
    return result

# 定义数据库连接
def connect_to_db():
    config = {
        'user': user,
        'password': password,
        'host': 'localhost',
        'database': 'FinancialDataBase'
    }
    cnx = mysql.connector.connect(**config)
    return cnx

def tushare2SQL(company_code, start_year, end_year):
    '''
    输入股票代码,输入起止年份, 返回包含起止年份的区间财报数据

    Parameters
    ----------
    company_code : str
        公司股票代码
    start_year : TYPE
        年报起始年份
    end_year : TYPE
        年报终止年份

    Returns
    -------
    查到的表

    '''
    # 建立数据库
    conn = mysql.connector.connect( buffered = True,
                                    host="localhost",
                                    user = user,
                                    password = password# 这里改成你的信息
                                  )
    mycursor = conn.cursor()  # 游标,通过游标来操作数据库内容
    
    # 创建数据库
    try:
        mycursor.execute("CREATE DATABASE  IF NOT EXISTS FinancialDataBase")# 执行sql语句
        print('已建立数据库FinancialDataBase')
    except Exception as e:
        print(e)
    conn.close()
    
    # 生成年份列表
    year_list = list(range(start_year, end_year+1))
    year_list = [str(i) + '1231' for i in year_list]
    start_year = str(start_year) + '0101' # 年报公布日
    end_year = str(end_year + 1) + '1231'  
    
    # 链接tushare数据库
    ts.set_token(ts_token)
    pro = ts.pro_api()
    
    # 获取利润表数据
    # 注意这里的end_date是年报公布日, 需要在年报的年份上+1 如获取2023年报需要提到2024
    income_df = pro.income(ts_code = company_code, start_date = start_year, end_date = end_year, report_type='1')
    income_selected = income_df[['ts_code', 'ann_date', 'end_date', 'n_income', 'fin_exp_int_exp']]
    income_selected = income_selected[income_selected['end_date'].isin(year_list)]
    income_selected = income_selected.drop_duplicates()
    print('已经查询到' + company_code + '的利润表数据, 共' + str(len(income_selected)) + '条')
    
    # 获取资产负债表数据
    balancesheet_df = pro.balancesheet(ts_code=company_code, start_date = start_year, end_date= end_year, report_type='1')
    balancesheet_selected = balancesheet_df[['ts_code', 'ann_date', 'end_date', 'total_cur_assets', 'total_cur_liab']]
    balancesheet_selected= balancesheet_selected[balancesheet_selected['end_date'].isin(year_list)]
    balancesheet_selected=balancesheet_selected.drop_duplicates()
    print('已经查询到' + company_code + '的资产负债表数据, 共' + str(len(balancesheet_selected)) + '条')
    
    # 获取现金流量表数据
    cashflow_df = pro.cashflow(ts_code=company_code, start_date = start_year, end_date = end_year, report_type='1')
    cashflow_selected = cashflow_df[['ts_code', 'ann_date', 'end_date', 'depr_fa_coga_dpba', 'amort_intang_assets', 'lt_amort_deferred_exp' ,'c_pay_acq_const_fiolta']]
    cashflow_selected= cashflow_selected[cashflow_selected['end_date'].isin(year_list)]
    cashflow_selected=cashflow_selected.drop_duplicates()
    print('已经查询到' + company_code + '的现金流量表数据, 共' + str(len(cashflow_selected)) + '条')
    
    # 拼接三张报表
    merged_df = pd.merge(income_selected, balancesheet_selected, on=['ts_code', 'end_date', 'ann_date'], how='inner')
    merged_df = pd.merge(merged_df, cashflow_selected, on=['ts_code', 'end_date', 'ann_date'], how='inner')
    
    # 查询一下库里有哪些年份的数据了
    cnx = connect_to_db()
    company_data = fetch_company_data(cnx, company_code)
    # 关闭数据库连接
    cnx.close()
    # 转换数据
    df = pd.DataFrame(company_data)
    df = df.drop_duplicates()
    
    if len(df) == 0:
        # 如果库里没有就全部落库
        engine = create_engine('mysql+mysqlconnector://'+ user +':'+ password + '@localhost/FinancialDataBase')
        merged_df.to_sql('fin_data', con = engine, if_exists='append', index=False)
        print('已经落库新的' + company_code + '的数据, 共' + str(len(merged_df)) + '条')
        return merged_df
    
    # 如果库里有就只落库库里没有的
    need_append_year = set(df['end_date']) - set(merged_df['end_date'])
    merged_df_need = merged_df.loc[merged_df['end_date'].isin(need_append_year),:]
    # 使用create_engine创建MySQL连接
    engine = create_engine('mysql+mysqlconnector://'+ user +':'+ password + '@localhost/FinancialDataBase')
    
    # 将DataFrame存入MySQL数据库中的表中
    # 如果存在该数据表, 那么追加到上面
    merged_df_need.to_sql('fin_data', con = engine, if_exists='append', index=False)
    print('已经落库新的' + company_code + '的数据, 共' + str(len(merged_df_need)) + '条')
    return merged_df

if __name__ == '__main__':
    '''
        测试用例,用于测试能否正常获取数据
    '''
    # 传递参数
    company_code='000963.SZ' # 股票代码
    start_year = 2020        # 年报初始年份
    end_year = 2023          # 年报终止年份
    df = tushare2SQL(company_code, start_year, end_year)
    

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值