应用多元统计分析（4）第三章多元正太分布

最新推荐文章于 2022-10-26 16:19:09 发布

HarryStudyPython_ing

最新推荐文章于 2022-10-26 16:19:09 发布

阅读量1.2k

点赞数 1

分类专栏： Python 应用多元统计分析（教材：王学明）

本文链接：https://blog.csdn.net/qq_47805483/article/details/109020986

版权

Python 应用多元统计分析（教材：王学明）专栏收录该内容

9 篇文章 12 订阅

订阅专栏

第三章多元正太分布

- 课堂代码及注释
- 第四次作业

课堂代码及注释

from scipy import stats
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pandas as pd

##一元正太分布密度曲线
rv=stats.norm(2,3)
dir(rv)

rv.cdf(2)    ## （-inf,2）的概率密度
rv.pdf(2)    ## x=2的时候其密度函数值是多少
rv.ppf(0.975)    ## 概率密度为0.975的时候对应的x的值

x=np.linspace(-7,11,num=100)
rv.pdf(x)
plt.plot(x,rv.pdf(x))

####    二元正太分布密度曲面
def norm2(x1,x2,u1,u2,s1,s2,r):
    x1_std=(x1-u1)/s1
    x2_std=(x2-u2)/s2
    return (((2*np.pi*s1*s2*(1-r**2)**0.5))**(-1))*np.exp((-(2*(1-r**2))**(-1))*((x1_std**2-2*r*x1_std*x2_std+x2_std**2)))

#example
norm2(1,5,2,5,2,0.8,0.6)

##      画图
#   1表面图ax.plot_surface(Xa,Ya,Za)
mu=[2,5]
S=[2,0.8,0.6]

x1s=np.linspace(-4,8,101)
x2s=np.linspace(2.6,7.4,101)

np.meshgrid(x1s,x2s)##形成格子
Xa,Ya=np.meshgrid(x1s,x2s)
Za=np.array(norm2(np.ravel(Xa),np.ravel(Ya),mu[0],mu[1],S[0],S[1],S[2])).reshape(Xa.shape)

fig=plt.figure()##生成画板
ax=fig.gca(projection="3d")
ax.plot_surface(Xa,Ya,Za)
plt.show()

#   2等高线图plt.contour(Xa,Ya,Za,5)
plt.contour(Xa,Ya,Za,5)    ##level=5 指定画等分的五个等高线图
plt.contour(Xa,Ya,Za,[0.02,0.1])    ##指定画哪个地方的截面
plt.contourf(Xa,Ya,Za,[0.02,0.1])    ##f fill

ct1=plt.contour(Xa,Ya,Za,[0.02,0.1],colors=['red','blue'])
plt.clabel(ct1)     ##c label   contour label


###----------------------------------------example (可能存在问题)
###三元正态分布等高体###
def norm3e(x1,x2,x3,u1,u2,u3,s1,s2,s3,r12,r13,r23):
    xs=np.array([x1,x2,x3])
    us=np.array([u1,u2,u3])
    S=np.array([s1**2,s1*s2*r12,s1*s3*r13,s1*s2*r12,s2**2,s2*s3*r23,s1*s3*r13,s2*s3*r23,s3**2]).reshape(3,3)
    return 1/((2*np.pi)**(3/2)*np.linalg.det(S)**0.5)*np.exp(0.5*(xs-us).T.dot(np.linalg.inv(S)).dot(xs-us))

vnorm3=np.vectorize(norm3)

#example
norm3e(1,2,3,2,3,4,1,2,3,0.0,0.5,0.7)

#画图
x1s=np.linspace(-1,5,num=51)
x2s=np.linspace(-3,9,num=51)
x3s=np.linspace(-5,13,num=51)

X1,X2,X3=np.meshgrid(x1s,x2s,x3s)
Y=np.array(norm3e(np.ravel(X1),np.ravel(X2),np.ravel(X3),2,3,4,1,2,3,0.0,0.5,0.7)).reshape(X1.shape)
##-----------------------------------------

##-----------------------------------------(正解)
###     三元正态分布等高体
def norm3(xs,us,s1,s2,s3,r12,r13,r23):
    ##xs=np.array([x1,x2,x3])
    ##us=np.array([u1,u2,u3])
    S=np.array([s1**2,s1*s2*r12,s1*s3*r13,s1*s2*r12,s2**2,s2*s3*r23,s1*s3*r13,s2*s3*r23,s3**2]).reshape(3,3)
    return 1/((2*np.pi)**(3/2)*np.linalg.det(S)**0.5)*np.exp(0.5*(xs-us).T.dot(np.linalg.inv(S)).dot(xs-us))

#norm3(xs=np.array([1,2,3]), us=np.array([3,5,7]), s1=1,s2=2,s3=3, r12=0,r13=0.5,r23=0.7)

norm3(np.array([1,2,4]),np.array([3,5,7]),s1=1,s2=2,s3=3,r12=0.0,r13=0.5,r23=0.7)

def norm32(xs):
    return norm3(xs,us=np.array([3,5,7]),s1=1,s2=2,s3=3,r12=0.0,r13=0.5,r23=0.7)

norm32(np.array([1,2,3]))
norm32(np.array([4,5,6]))

xs=np.array([[1,2,4],[4,5,6]])
norm32(xs[0,])

np.apply_along_axis(norm32,1,xs)    ##np.apply_along_axis()函数 

np.apply_along_axis(print,1,xs)    ## 1 对行进行操作
np.apply_along_axis(print,0,xs)    ## 0 对列进行操作

np.apply_along_axis(sum,1,xs)    ## 参数说明 ： function ; col/icol ; matrix
np.apply_along_axis(sum,0,xs)


#画图
x1s=np.linspace(0,6,num=51)
x2s=np.linspace(-3,9,num=51)
x3s=np.linspace(-5,13,num=51)

X1,X2,X3=np.meshgrid(x1s,x2s,x3s)

np.stack((np.array([1,2,3]),np.array([3,6,9])))

xs=np.stack((np.ravel(X1),np.ravel(X2),np.ravel(X3))).T
Y=np.apply_along_axis(norm32,1,xs).reshape(X1.shape)

from mayavi.mlab import *
contour3d(Y,contours=4,transparent=True)
outline()
axes()
show()


x, y, z = np.ogrid[-5:5:64j, -5:5:64j, -5:5:64j]
scalars = x * x * 0.5 + y * y + z * z * 2.0
contour3d(scalars, contours=4, transparent=True)
show()

##    计算样本离差阵(1)
trees=pd.read_csv("F:\\基础数学课\\应用多元统计分析\\trees.csv")
type(trees)
trees.mean()
trees.cov()
trees.corr()
trees.var()
trees.std()

(31-1)*trees.cov()    ##计算样本离差阵

trees.values.T.dot(np.eye(31)-1/31*np.ones((31,1)).dot(np.ones((1,31)))).dot(trees.values)    ##对比得到方差  等于 (31-1)*trees.cov()   

1/31*trees.values.T.dot(np.ones((31,1)))    ##得到均值向量  等于  trees.mean()

##    计算样本离差阵(2)
trees=pd.read_csv("F:\\基础数学课\\应用多元统计分析\\trees.csv")
x_bar = trees.mean()
trees.cov()
trees.corr()

x1 = pd.DataFrame(trees.iloc[1,]-x_bar)
x1.dot(x1.T)

x2 = pd.DataFrame(trees.iloc[2,]-x_bar)
x2.dot(x2.T)

x30 = pd.DataFrame(trees.iloc[30,]-x_bar)
x30.dot(x30.T)

A = np.zeros((3,3))
for i in range(31):
    xi = pd.DataFrame(trees.iloc[i,]-x_bar)
    temp = xi.dot(xi.T)
    A = A+temp.values

B = np.eye(31)-(1/31)*np.ones((31,31))
S = trees.T.dot(B).dot(trees)

Sigma = 1/30 * S

Sigma = trees.cov()
S = Sigma * (n-1)


###课后思考
#定义三元正态分布的密度函数
def norm3(xs,us,S):
    return 1/((2*np.pi)**(3/2)*np.linalg.det(S)**0.5)*np.exp(-1/2*(xs-us).T.dot(np.linalg.inv(S)).dot(xs-us))

def norm3_trees(xs):
    return norm3(xs,us = us_hat.reshape(3,),S = S_hat)

第四次作业

1.写出二元正态分布密度函数数学公式并定义其函数，使用 $\mu_{1}$ 、 $\mu_{2}$ 、 $\sigma_{1}$ 、 $\sigma_{2}$ ， $\rho$ 五个参数。

2.作出 $\mu_{1}=1$ 、 $\mu_{2}=2$ 、 $\sigma_{1}=4$ 、 $\sigma_{2}=9$ ， $\rho=0.8$ 的曲面和等高线。

3.作出 $\mu_{1}=1$ 、 $\mu_{2}=2$ 、 $\sigma_{1}=4$ 、 $\sigma_{2}=9$ ， $\rho=0$ 的曲面和等高线。

import numpy as np
import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d import Axes3D

##      1. 写出二维正态分布密度函数数学公式并定义其函数，使用 $\mu_{1}$, $\mu_{2}$ , $\sigma_{1}$ , $\sigma_{2}$ , $\rho$ 五个参数。
#定义二元正态分布的密度函数
def norm2(x1,x2,u1,u2,s1,s2,p):
    x1_std = (x1-u1)/s1
    x2_std = (x2-u2)/s2
    return 1/(2*np.pi*s1*s2*(1-p**2)**(1/2))*np.exp(-1/(2*(1-p**2))*(x1_std**2+x2_std**2-2*p*x1_std*x2_std))
    
def norm2_1(x1,x2):
    return norm2(x1,x2,1,2,4,9,0.8)

x1s = np.linspace(-11,13,101)
x2s = np.linspace(-25,29,101)

X1,X2 = np.meshgrid(x1s,x2s)

Y1 = np.array(norm2_1(X1,X2))

##      2. 作出 $\mu_{1}=1$, $\mu_{2}=2$ , $\sigma_{1}=4$ , $\sigma_{2}=9$ , $\rho=0.8$ 的曲面和等高线。
#二元正态分布曲面
fig=plt.figure()
ax=fig.gca(projection="3d")
ax.plot_surface(X1,X2,Y1)
plt.show()

#二元正态分布等高线
plt.contour(X1,X2,Y1,10)
plt.contourf(X1,X2,Y1,10)

##      3. 作出 $\mu_{1}=1$, $\mu_{2}=2$ , $\sigma_{1}=4$ , $\sigma_{2}=9$ , $\rho=0$ 的曲面和等高线。
def norm2_2(x1,x2):
    return norm2(x1,x2,1,2,4,9,0)

Y2 = np.array(norm2_2(X1,X2))

#二元正态分布曲面
fig=plt.figure()
ax=fig.gca(projection="3d")
ax.plot_surface(X1,X2,Y2)
plt.show()

#二元正态分布等高线
plt.contour(X1,X2,Y2,10)
plt.contourf(X1,X2,Y2,10)

使用trees数据集，编写程序：

1.用样本均值向量估计总体均值向量；

2.用样本协方差阵估计总体协方差矩阵；

3.计算样本相关系数矩阵；

4.假设trees数据集服从三维正态分布，写出估计出的三维随机向量的密度函数形式（用向量矩阵函数）并画出其等高体。

##      4. 使用trees数据集，编写程序：
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d import Axes3D
from mayavi.mlab import *

trees = pd.read_csv("F:\\基础数学课\\应用多元统计分析\\trees.csv")

##      (1) 用样本均值向量估计总体均值向量；
#样本均值、协方差阵、相关阵
trees.mean()
trees.cov()
trees.corr()

##      (2) 用样本协方差矩阵估计总体协方差矩阵；
##      (3) 计算样本相关系数矩阵。
mean_hat = np.array(trees.mean()).reshape(3,1) ;mean_hat
cov_hat = np.array(trees.cov()).reshape(3,3) ;cov_hat
corr_hat  = np.array(trees.corr()).reshape(3,3) ;corr_hat


##      (4) 假设trees数据集服从三维正态分布，写出估计出的三维随机向量的密度函数形式(用向量矩阵函数)并画出其等高体。
#定义三元正态分布的密度函数
def norm3(xs,us,S):
    return 1/((2*np.pi)**(3/2)*np.linalg.det(S)**0.5)*np.exp(-1/2*(xs-us).T.dot(np.linalg.inv(S)).dot(xs-us))

def norm3_trees(xs):
    return norm3(xs,us = mean_hat.reshape(3,),S = cov_hat)

##计算np.linspace()的前面两个参数
x1_min = mean_hat[0]-3*(cov_hat[0,0])**0.5
x1_max = mean_hat[0]+3*(cov_hat[0,0])**0.5
x2_min = mean_hat[1]-3*(cov_hat[1,1])**0.5
x2_max = mean_hat[1]+3*(cov_hat[1,1])**0.5
x3_min = mean_hat[2]-3*(cov_hat[2,2])**0.5
x3_max = mean_hat[2]+3*(cov_hat[2,2])**0.5

x1s=np.linspace(x1_min,x1_max,101)
x2s=np.linspace(x2_min,x2_max,101)
x3s=np.linspace(x3_min,x3_max,101)

X1,X2,X3 = np.meshgrid(x1s,x2s,x3s)

xs=np.stack((np.ravel(X1),np.ravel(X2),np.ravel(X3))).T

Y = np.apply_along_axis(norm3_trees,1,xs).reshape(X1.shape)

contour3d(Y,contours=4,transparent=True)
outline()
axes()
show()