# -*- coding: utf-8 -*-
"""
Created on Tue Jun 27 09:19:25 2017
@author: yunjinqi
E-mail:yunjinqi@qq.com
Differentiate yourself in the world from anyone else.
"""
import pandas as pd
import scipy.stats as sts
import numpy as np
#葛洲坝
df=pd.read_excel('C:/Users/HXWD/Desktop/600068.xlsx')
df.head()
############################################计算数据的基本统计量:均值,方差,偏度,峰度等
index=list(df.columns)
stock068=[]
for i in range(1,4):
scores=np.array(df.ix[::,i])
pe=df.ix[::,i].describe()
pe.name='葛洲坝'+index[i]
print(pe)
pe['偏度']=sts.skew(scores)
pe['峰度']=sts.kurtosis(scores)
stock068.append(pe)
stock068=pd.DataFrame(stock068).T
print (stock068)
#海澜之家
df=pd.read_excel('C:/Users/HXWD/Desktop/600398.xlsx')
df.head()
############################################计算数据的基本统计量:均值,方差,偏度,峰度等
index=list(df.columns)
stock398=[]
for i in range(1,4):
scores=np.array(df.ix[::,i])
pe=df.ix[::,i].describe()
pe.name='海澜之家'+index[i]
print(pe)
pe['偏度']=sts.skew(scores)
pe['峰度']=sts.kurtosis(scores)
stock398.append(pe)
stock398=pd.DataFrame(stock398).T
print (stock398)
#data=pd.concat([stock068,stock398],axis=1, join_axes=[data.index])
data=stock068.join(stock398)
print (data)
data.to_csv('统计.csv')
#常用统计量的计算
#2018.01.16重新回来看峰度的计算,发现sts.kurtosis(),df.kurt()两个计算均存在某些问题,与eviews的描述性统计结果不一致,可能是计算口径不同。借用一篇别人编写纯代码计算的公式,得到了和eviews一样的结果,代码如下
import math
def calc(data):
n = len(data)
niu = 0.0
niu2 = 0.0
niu3 = 0.0
for a in data:
niu += a
niu2 += a**2
niu3 += a**3
niu/= n #这是求E(X)
niu2 /= n #这是E(X^2)
niu3 /= n #这是E(X^3)
sigma = math.sqrt(niu2 - niu*niu) #这是D(X)的开方,标准差
return [niu,sigma,niu3] #返回[E(X),标准差,E(X^3)]
def calc_stat(data):
[niu,sigma,niu3] = calc(data)
n = len(data)
niu4 = 0.0
for a in data:
a -= niu
niu4 += a ** 4
niu4 /= n
skew = (niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3)
kurt = niu4/(sigma**4)
return [niu,sigma,skew,kurt] #返回了均值,标准差,偏度,峰度
建议计算峰度的程序使用下面的代码