import pandas as pd import mysql.connector from sqlalchemy import create_engine from config import user, password, ts_token import tushare as ts def fetch_company_data(cnx, stockCode): cursor = cnx.cursor(dictionary=True) query = (""" select ts_code, end_date from fin_data where ts_code = %s """) cursor.execute(query, (stockCode,)) result = cursor.fetchall() cursor.close() return result # 定义数据库连接 def connect_to_db(): config = { 'user': user, 'password': password, 'host': 'localhost', 'database': 'FinancialDataBase' } cnx = mysql.connector.connect(**config) return cnx def tushare2SQL(company_code, start_year, end_year): ''' 输入股票代码,输入起止年份, 返回包含起止年份的区间财报数据 Parameters ---------- company_code : str 公司股票代码 start_year : TYPE 年报起始年份 end_year : TYPE 年报终止年份 Returns ------- 查到的表 ''' # 建立数据库 conn = mysql.connector.connect( buffered = True, host="localhost", user = user, password = password# 这里改成你的信息 ) mycursor = conn.cursor() # 游标,通过游标来操作数据库内容 # 创建数据库 try: mycursor.execute("CREATE DATABASE IF NOT EXISTS FinancialDataBase")# 执行sql语句 print('已建立数据库FinancialDataBase') except Exception as e: print(e) conn.close() # 生成年份列表 year_list = list(range(start_year, end_year+1)) year_list = [str(i) + '1231' for i in year_list] start_year = str(start_year) + '0101' # 年报公布日 end_year = str(end_year + 1) + '1231' # 链接tushare数据库 ts.set_token(ts_token) pro = ts.pro_api() # 获取利润表数据 # 注意这里的end_date是年报公布日, 需要在年报的年份上+1 如获取2023年报需要提到2024 income_df = pro.income(ts_code = company_code, start_date = start_year, end_date = end_year, report_type='1') income_selected = income_df[['ts_code', 'ann_date', 'end_date', 'n_income', 'fin_exp_int_exp']] income_selected = income_selected[income_selected['end_date'].isin(year_list)] income_selected = income_selected.drop_duplicates() print('已经查询到' + company_code + '的利润表数据, 共' + str(len(income_selected)) + '条') # 获取资产负债表数据 balancesheet_df = pro.balancesheet(ts_code=company_code, start_date = start_year, end_date= end_year, report_type='1') balancesheet_selected = balancesheet_df[['ts_code', 'ann_date', 'end_date', 'total_cur_assets', 'total_cur_liab']] balancesheet_selected= balancesheet_selected[balancesheet_selected['end_date'].isin(year_list)] balancesheet_selected=balancesheet_selected.drop_duplicates() print('已经查询到' + company_code + '的资产负债表数据, 共' + str(len(balancesheet_selected)) + '条') # 获取现金流量表数据 cashflow_df = pro.cashflow(ts_code=company_code, start_date = start_year, end_date = end_year, report_type='1') cashflow_selected = cashflow_df[['ts_code', 'ann_date', 'end_date', 'depr_fa_coga_dpba', 'amort_intang_assets', 'lt_amort_deferred_exp' ,'c_pay_acq_const_fiolta']] cashflow_selected= cashflow_selected[cashflow_selected['end_date'].isin(year_list)] cashflow_selected=cashflow_selected.drop_duplicates() print('已经查询到' + company_code + '的现金流量表数据, 共' + str(len(cashflow_selected)) + '条') # 拼接三张报表 merged_df = pd.merge(income_selected, balancesheet_selected, on=['ts_code', 'end_date', 'ann_date'], how='inner') merged_df = pd.merge(merged_df, cashflow_selected, on=['ts_code', 'end_date', 'ann_date'], how='inner') # 查询一下库里有哪些年份的数据了 cnx = connect_to_db() company_data = fetch_company_data(cnx, company_code) # 关闭数据库连接 cnx.close() # 转换数据 df = pd.DataFrame(company_data) df = df.drop_duplicates() if len(df) == 0: # 如果库里没有就全部落库 engine = create_engine('mysql+mysqlconnector://'+ user +':'+ password + '@localhost/FinancialDataBase') merged_df.to_sql('fin_data', con = engine, if_exists='append', index=False) print('已经落库新的' + company_code + '的数据, 共' + str(len(merged_df)) + '条') return merged_df # 如果库里有就只落库库里没有的 need_append_year = set(df['end_date']) - set(merged_df['end_date']) merged_df_need = merged_df.loc[merged_df['end_date'].isin(need_append_year),:] # 使用create_engine创建MySQL连接 engine = create_engine('mysql+mysqlconnector://'+ user +':'+ password + '@localhost/FinancialDataBase') # 将DataFrame存入MySQL数据库中的表中 # 如果存在该数据表, 那么追加到上面 merged_df_need.to_sql('fin_data', con = engine, if_exists='append', index=False) print('已经落库新的' + company_code + '的数据, 共' + str(len(merged_df_need)) + '条') return merged_df if __name__ == '__main__': ''' 测试用例,用于测试能否正常获取数据 ''' # 传递参数 company_code='000963.SZ' # 股票代码 start_year = 2020 # 年报初始年份 end_year = 2023 # 年报终止年份 df = tushare2SQL(company_code, start_year, end_year)
05-14
368
01-16
2584
08-15
3440