'''
简单数据分析流程:
1、明确目的
2、理解数据
3、数据清洗
4、数据分析和可视化
5、结论和建议
分析方法:
1、常用的统计方法:常见统计量、构建相关指标
2、图表法
'''
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import os
import numpy
data = pd.read_csv("data.csv", dtype = {'trade_date' : 'object'},encoding='utf-8')
print(data.info())
data.apply(lambda x:sum(x.isnull())/len(x),axis=0)
df = data.dropna(how='any').copy()
df.trade_date = pd.to_datetime(df.trade_date,errors='coerce')
df[df['pe'] > 0]['eps'].drop_duplicates()
df.drop(df[df['holder_num']==0].index,inplace=True)
df['per_shrare']=df.apply(lambda x:x[7]/x[16],axis=1)
corre = df.groupby('per_undp').sum()['profit_yoy'].sort_values(ascending=False).head(10)
import plotly as py
import plotly.graph_objs as go
py.offline.init_notebook_mode()
pyplot = py.offline.iplot
trace_basic=[go.Bar(x=corre.index.tolist(),y=corre.values.tolist(),marker=dict(color='orange'),opacity=0.50)]
layout = go.Layout(title='每股未分配利润和利润环比增长的关系',xaxis=dict(title='每股未分配利润'))
figure = go.Figure(data=trace_basic,layout=layout)
pyplot(figure)
![在这里插入图片描述](https://img-blog.csdnimg.cn/e499219361a240afb154b353d6d03292.png?x-oss-process=image/watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBAWEJfdG9udGljYw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
corre = df.groupby('per_undp').sum()['holder_num'].sort_values(ascending=False).head(10)
trace_basic=[go.Bar(x=corre.index.tolist(),y=corre.values.tolist(),marker=dict(color='red'),opacity=0.50)]
layout = go.Layout(title='每股未分配利润和持股人数的关系--前十',xaxis=dict(title='每股未分配利润'))
figure = go.Figure(data=trace_basic,layout=layout)
pyplot(figure)
![在这里插入图片描述](https://img-blog.csdnimg.cn/c0da89db5ede4a85b077192985a325bd.png?x-oss-process=image/watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBAWEJfdG9udGljYw==,size_20,color_FFFFFF,t_70,g_se,x_16#pic_center)
df['month']=pd.to_datetime(df.trade_date,errors='coerce').dt.month
sns.set(style='darkgrid',context='notebook',font_scale=1.2)
df.groupby('month').sum()['holder_num'].sort_values(ascending=False).head(10).plot(kind='bar')
![在这里插入图片描述](https://img-blog.csdnimg.cn/2c7755bbef3540df89b8e2245df5d3cd.png?x-oss-process=image/watermark,type_ZHJvaWRzYW5zZmFsbGJhY2s,shadow_50,text_Q1NETiBAWEJfdG9udGljYw==,size_10,color_FFFFFF,t_70,g_se,x_16)
holder = df['holder_num'].groupby('month').agg({'pe':'nunique','holder_num':np.sum,'per_undo':np.sum})
holder.discribe()
最后结论和建议........