二项分布在最大似然估计
import numpy as np # 导入NumPy库,用于生成随机数据和数学操作
import matplotlib.pyplot as plt # 导入Matplotlib库,用于绘制图表
from scipy.stats import norm # 导入SciPy库中的正态分布模块
from scipy.optimize import minimize # 导入SciPy库中的参数优化函数
# 创建一个模拟数据集,这里假设数据服从正态分布
np.random.seed(40) # 设置随机种子,以便结果可重复
data = np.random.normal(loc=0, scale=1, size=160) # 生成一个包含100个数据点的正态分布样本
# 定义负对数似然函数,我们要最小化这个函数
def negative_log_likelihood(params, data):
mu, sigma = params # 解包参数(均值和标准差)
ll = -np.sum(norm.logpdf(data, loc=mu, scale=sigma)) # 计算负对数似然值
return ll
# 初始参数猜测值
initial_guess = [0, 1] # 初始猜测的均值和标准差
# 使用最小化函数来估计参数
result = minimize(negative_log_likelihood, initial_guess, args=(data,)) # 通过最小化负对数似然函数来估计参数
estimated_mu, estimated_sigma = result.x # 从结果中提取估计的均值和标准差
# 打印估计结果
print("估计的均值:", estimated_mu)
print("估计的标准差:", estimated_sigma)
# 绘制直方图
plt.hist(data, bins=20, density=True, alpha=0.6, color='y', label='Histogram') # 创建直方图
# 生成估计的正态分布曲线
x = np.linspace(min(data), max(data), 100) # 生成一系列用于绘制曲线的数据点
estimated_normal = norm.pdf(x, loc=estimated_mu, scale=estimated_sigma) # 计算估计的正态分布概率密度函数值
plt.plot(x, estimated_normal, 'r--', label='Estimated Normal Distribution') # 绘制估计的正态分布曲线
# 设置图表标签和标题
plt.xlabel('Value') # X轴标签
plt.ylabel('Probability') # Y轴标签
plt.legend() # 添加图例
plt.title('Histogram and Estimated Normal Distribution') # 设置图表标题
# 显示图表
plt.show()
SVD特征值与特征向量应用
from PIL import Image
import numpy as np
def get_approx_SVD1(data, percent):
U, s, VT = np.linalg.svd(data)
Sigma = np.zeros(np.shape(data))
Sigma[:len(s), :len(s)] = np.diag(s)
count = int(sum(s) * percent)
k = -1
curSum = 0
while curSum <= count:
k += 1
curSum += s[k]
D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
D[D < 0] = 0
D[D > 255] = 255
return np.rint(D).astype("uint8")
def get_approx_SVD2(data, percent):
U, s, VT = np.linalg.svd(data)
Sigma = np.zeros(np.shape(data))
Sigma[:len(s), :len(s)] = np.diag(s)
k = int(len(s) * percent)
D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
D[D < 0] = 0
D[D > 255] = 255
return np.rint(D).astype("uint8")
def rebuild_img(filename, p, get_approx_SVD):
img = Image.open(filename, 'r')
a = np.array(img)
R0 = a[:, :, 0]
G0 = a[:, :, 1]
B0 = a[:, :, 2]
R = get_approx_SVD(R0, p)
G = get_approx_SVD(G0, p)
B = get_approx_SVD(B0, p)
I = np.stack((R, G, B), 2)
Image.fromarray(I).save(str(p * 100) + ".jpg")
img = Image.open(str(p * 100) + ".jpg", "r")
img.show()
filename = "lena.png"
for p in np.arange(0.2, 1.2, 0.2):
rebuild_img(filename, p, get_approx_SVD1)
for p in np.arange(0.2, 1.2, 0.2):
rebuild_img(filename, p, get_approx_SVD2)
lena全身照下次补发
又变成违规图片了,咋回事!照片在资源里想看的哥们别客气去资源里面瞅
SVD特征值与特征向量应用
from PIL import Image
import numpy as np
def get_approx_SVD1(data, percent):
U, s, VT = np.linalg.svd(data)
Sigma = np.zeros(np.shape(data))
Sigma[:len(s), :len(s)] = np.diag(s)
count = int(sum(s) * percent)
k = -1
curSum = 0
while curSum <= count:
k += 1
curSum += s[k]
D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
D[D < 0] = 0
D[D > 255] = 255
return np.rint(D).astype("uint8")
def get_approx_SVD2(data, percent):
U, s, VT = np.linalg.svd(data)
Sigma = np.zeros(np.shape(data))
Sigma[:len(s), :len(s)] = np.diag(s)
k = int(len(s) * percent)
D = U[:, :k].dot(Sigma[:k, :k].dot(VT[:k, :]))
D[D < 0] = 0
D[D > 255] = 255
return np.rint(D).astype("uint8")
def rebuild_img(filename, p, get_approx_SVD):
img = Image.open(filename, 'r')
a = np.array(img)
R0 = a[:, :, 0]
G0 = a[:, :, 1]
B0 = a[:, :, 2]
R = get_approx_SVD(R0, p)
G = get_approx_SVD(G0, p)
B = get_approx_SVD(B0, p)
I = np.stack((R, G, B), 2)
Image.fromarray(I).save(str(p * 100) + ".jpg")
img = Image.open(str(p * 100) + ".jpg", "r")
img.show()
filename = "lena.png"
for p in np.arange(0.2, 1.2, 0.2):
rebuild_img(filename, p, get_approx_SVD1)
for p in np.arange(0.2, 1.2, 0.2):
rebuild_img(filename, p, get_approx_SVD2)
生成六七张图片,目的:降噪、........
实验六:相关性计算分析
import numpy as np
import scipy.stats as stats
#data source
x = np.array([170, 150, 210, 180, 160])
y = np.array([188, 165, 190, 172, 168])
print(x)
print(y)
print()
#pearson coef
correlation,pvalue = stats.pearsonr(x,y)
#r=np.sum((x-np.mean(x))*(y-np.mean(y)))/(np.std(x)*np.std(y))/5
r=np.sum((x-np.mean(x))*(y-np.mean(y)))/(np.sqrt(np.sum((x-np.mean(x))**2))) \
/np.sqrt(np.sum((y-np.mean(y))*(y-np.mean(y))))
print ('皮尔逊函数系数',correlation)
print (‘P值',pvalue)
print('数学的皮尔逊系数',r)
print()
#spearman coef
x=stats.rankdata(x)
y = stats.rankdata(y)
print (x)
print (y)
print(x-y)
print()
correlation,pvalue = stats.spearmanr(x,y)
r=1-6*2/(5*5*5-5)
print ('斯皮尔曼函数系数',correlation)
print ('P值',pvalue)
print('斯皮尔曼数学系数 =',r)
皮尔逊相关系数的范围在-1到1之间。 当皮尔逊相关系数接近1时,这意味着两组数据之间存在非常强的正线性关系。斯皮尔曼秩相关系数的范围在-1到1之间。当斯皮尔曼秩相关系数接近1时,这意味着两组数据之间存在非常强的正单调关系。
实验七:回归分析
import numpy as np #numpy库
import pandas as pd #panda库
#机器学习的工具包machine learning
from sklearn.preprocessing import StandardScaler #标准化库
from sklearn.model_selection import train_test_split, cross_val_score
# 获取数据
data = pd.read_csv("Folds5x2_pp1.csv")
x = data[['AT', 'V', 'AP', 'RH']]
y = data[['PE']]
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=1, test_size=0.3)
# 设置70%为训练数据
from sklearn.linear_model import LinearRegression
# 建立模型
# 第2步:创建模型:线性回归
model = LinearRegression()
# model = Ridge(alpha=10)
# model = Lasso()
# model = ElasticNet()
# 第3步:训练模型
model.fit(x_train, y_train)
# 获得线性回归模型的参数
a = model.intercept_ # 截距
b = model.coef_ # 回归系数
print("最佳拟合线:截距", a, "\n回归系数:", b)
print(model.coef_[0])
# 对线性回归进行预测
y_pred = model.predict(x_test)
# 评价回归模型
from sklearn.metrics import explained_variance_score, mean_absolute_error, \
mean_squared_error, median_absolute_error, r2_score
print("电力预测线性模型的平均绝对误差为:", mean_absolute_error(y_test, y_pred))
print("电力预测线性回归模型的均方误差MSE为:", mean_squared_error(y_test, y_pred))
print("电力预测线性回归模型的中值绝对误差为:", median_absolute_error(y_test, y_pred))
print("电力预测线性回归模型的可解释方差值为:", explained_variance_score(y_test, y_pred))
print("电力预测线性回归模型的判定系数即R平方为:", r2_score(y_test, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y_test, y_pred)))
plt.show()
作业九:聚类分析
使用眼尾画数据集进行K-means应用
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.datasets import load_iris
iris = load_iris()
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data[:]
estimator = KMeans(n_clusters=2) #构造聚类器,这里聚成两类
estimator.fit(X) #聚类
label_pred = estimator.labels_ #获取聚类标签
#绘图
x0 = X[label_pred == 0]
x1 = X[label_pred == 1]
plt.scatter(x0[:, 0], x0[:, 1], c = "red", marker='o', label='label0')
plt.scatter(x1[:, 0], x1[:, 1], c = "green", marker='*', label='label1')
plt.xlabel('petal length')
plt.ylabel('petal width')
plt.legend(loc=2)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data[:]
estimator = KMeans(n_clusters=3) #构造聚类器,这里聚成两类
estimator.fit(X) #聚类
label_pred = estimator.labels_ #获取聚类标签
#绘图
x0 = X[label_pred == 0]
x1 = X[label_pred == 1]
x2 = X[label_pred == 2]
plt.scatter(x0[:, 0], x0[:, 1], c = "red", marker='o', label='label0')
plt.scatter(x1[:, 0], x1[:, 1], c = "green", marker='*', label='label1')
plt.scatter(x2[:, 0], x2[:, 1], c = "blue", marker='+', label='label2')
plt.xlabel('petal length')
plt.ylabel('petal width')
plt.legend(loc=2)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
from sklearn import datasets
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data[:]
estimator = KMeans(n_clusters=4) #构造聚类器,这里聚成两类
estimator.fit(X) #聚类
label_pred = estimator.labels_ #获取聚类标签
#绘图
x0 = X[label_pred == 0]
x1 = X[label_pred == 1]
x2 = X[label_pred == 2]
x3 = X[label_pred == 3]
plt.scatter(x0[:, 0], x0[:, 1], c = "red", marker='o', label='label0')
plt.scatter(x1[:, 0], x1[:, 1], c = "green", marker='*', label='label1')
plt.scatter(x2[:, 0], x2[:, 1], c = "blue", marker='+', label='label2')
plt.scatter(x3[:, 0], x3[:, 1], c = "orange", marker='+', label='label3')
plt.xlabel('petal length')
plt.ylabel('petal width')
plt.legend(loc=2)
plt.show()
作业八:多因素方差分析
from scipy import stats
import pandas as pd
import numpy as np
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
environmental = [5,5,5,5,5,4,4,4,4,4,3,3,3,3,3,2,2,2,2,2,1,1,1,1,1]
ingredients = [5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1,5,4,3,2,1]
score = [5,5,4,3,2,5,4,4,3,2,4,4,3,3,2,4,3,2,2,2,3,3,3,2,1]
data ={'E':environmental,'I':ingredients,'S':score}
df= pd.DataFrame(data)
print(df)
formula='S~E+I'
results =anova_lm(ols(formula,df).fit())
print(results)