import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#生成数据
close_data=np.random.normal(loc=10.0,scale=1.0,size=1000)
print(f'close_data:\n{close_data[:5]}')
open_data=np.roll(close_data,1)
print(f'open_data:\n{open_data[:5]}')
high_data=np.where((open_data>close_data),open_data,close_data)
print(f'high_data:\n{high_data[:5]}')
low_data=np.where(open_data<=close_data,open_data,close_data)
print(f'low_data:\n{low_data[:5]}')
open_data[0],close_data[0],high_data[0],low_data[0]=np.nan,np.nan,np.nan,np.nan
#生成index参数
dd=pd.date_range('2010-01-01',freq='D',periods=1000)
print(f'datelist=\n{dd}')
data_index=dd
#生成columns参数
#创建dataframe数据
df_stock=pd.DataFrame({'open':open_data,'close':close_data,
'high':high_data,'low':low_data},index=data_index)
print(f'模拟股票数据:\n{df_stock[:5]}')
#dataframe应用
print(f'{df_stock.head()}\n{df_stock.tail()}\n{df_stock.index}\n{df_stock.columns}')
print(f'{df_stock.shape}\n{df_stock.describe()}\n{df_stock.info()}')
#可视化初步
#df_visual=df_stock.loc['2010-01-01':'2011-12-09',['high','low']].plot(linewidth=1,figsize=(8,6)).legend()
#plt.show()
#dataframe缺失值处理
print(f'{df_stock.isnull().head()}\n{df_stock.notnull().head()}')
print(df_stock[df_stock.isnull().T.any().T])#???
##填充缺失值
df_stock.fillna(method='bfill',axis=0)
##删除缺失值
df_stock.dropna(axis=0,how='any',inplace=True)
##改变精度
df_stock=df_stock.round(2)
print(df_stock.info())
#生成成交量
volume_data=np.random.randint(100000,200000,1000)
df_volume=pd.DataFrame({'volume':volume_data},index=data_index)
#合并数据:concat,merge,join
df_stock_concat=pd.concat([df_stock,df_volume],axis=1,join='inner')
df_stock_merge=pd.merge(df_stock,df_volume,left_index=True,right_index=True,how='inner')
df_stock_join=df_stock.join(df_volume,how='inner')
print(df_stock_join)
#遍历
#for in遍历
def forin_loop(df):
df=df.assign(pct_change=0)
for i in np.arange(0,df.shape[0]):
df.iloc[i,df.columns.get_loc('pct_change')]=(df.iloc[i]['high']-df.iloc[i]['low'])/df.iloc[i]['open']
return df
print(forin_loop(df_stock_concat)[:5])
#iterrows生成器
def iterrows_loop(df):
df=df.assign(pct_change=0)
for index,row in df.iterrows():
df.loc[index,'pct_change']=(row['high']-row['low'])/row['open']
return df
##print(iterrows_loop(df_stock_concat[:5]))
#dataframe.apply
##df_stock_concat['pct_change']=df_stock_concat.apply(lambda row:((row['high']-row['low'])/row['open']),axis=1)
#矢量化
##df_stock_concat['pct_change']=(df_stock_concat['high']-df_stock_concat['low'])/df_stock_concat['open']
df_stock_concat['pct_change']=(df_stock_concat['high'].values-df_stock_concat['low'].values)/df_stock_concat['open'].values
#数据储存
df_stock_concat.to_csv('C:\\Users\\England Mouse\\Desktop\\example.csv',
columns=df_stock_concat.columns,
sep=",",na_rep="NA",float_format='%.2f',
index=True,header=True,
mode='w',encoding=None)
#数据加载
df_read=pd.read_csv('C:\\Users\\England Mouse\\Desktop\\example.csv',
index_col=0,header=0,parse_dates=True)
print(df_read)
dataframe的规整化
最新推荐文章于 2024-12-08 19:28:40 发布
该博客展示了如何使用Python进行金融数据的处理,包括生成随机股票数据、处理缺失值、绘制图表以及合并数据。涉及numpy、pandas和matplotlib等库,还探讨了数据帧的遍历、应用和缺失值处理方法。
219

被折叠的 条评论
为什么被折叠?



