第九章作业

最新推荐文章于 2025-06-10 08:29:53 发布

2402_83456563

最新推荐文章于 2025-06-10 08:29:53 发布

阅读量223

点赞数 1

文章标签： python 开发语言

本文链接：https://blog.csdn.net/2402_83456563/article/details/144213179

版权

习题9.2

import numpy as np
from scipy.stats import shapiro

# 给定数据
data = np.array([15.0, 15.8, 15.2, 15.1, 15.9, 14.7, 14.8, 15.5, 15.6, 15.3,
                 15.1, 15.3, 15.0, 15.6, 15.7, 14.8, 14.5, 14.2, 14.9, 14.9,
                 15.2, 15.0, 15.3, 15.6, 15.1, 14.9, 14.2, 14.6, 15.8, 15.2,
                 15.9, 15.2, 15.0, 14.9, 14.8, 14.5, 15.1, 15.1, 15.5, 15.5,
                 15.1, 15.0, 15.3, 15.7, 14.5, 15.5, 15.0, 15.7, 14.6, 14.2])

# 样本均值和标准差
sample_mean = 15.0780
sample_std = 0.4325

# 进行Shapiro - Wilk检验
statistic, p_value = shapiro(data)
print("Shapiro - Wilk statistic:", statistic)
print("p - value:", p_value)

if p_value > 0.05:
    print("The data is normally distributed.")
else:
    print("The data is not normally distributed.")

运行结果

习题9.3-1

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# 数据
data = np.array([
    [4.13, 3.86, 4.00, 3.88, 4.02, 4.00],
    [4.07, 3.85, 4.02, 3.88, 3.95, 3.86],
    [4.04, 4.08, 4.01, 3.91, 4.02, 3.96],
    [4.07, 4.11, 4.01, 3.95, 3.89, 3.97],
    [4.05, 4.08, 4.04, 3.92, 3.91, 4.00],
    [4.04, 4.01, 3.99, 3.97, 4.01, 3.82],
    [4.02, 4.02, 4.03, 3.92, 3.89, 3.98],
    [4.06, 4.04, 3.97, 3.90, 3.89, 3.99],
    [4.010, 3.97, 3.98, 3.97, 3.99, 4.02],
    [4.05, 3.95, 3.98, 3.90, 4.00, 3.93]
]).T

# 选择前面6列数据，通过切片操作 [:, :6] 选取
selected_data = data[:, :6]

# 创建DataFrame，指定6个列名
df = pd.DataFrame(selected_data, columns=['Lab1', 'Lab2', 'Lab3', 'Lab4', 'Lab5', 'Lab6'])

# 绘制箱线图
df.plot.box()
plt.title('Boxplot of Measurement Results in Different Labs')
plt.ylabel('P尔敏有效含量 (mg)')
plt.show()

运行结果

习题9.3-2

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.stats import f_oneway

data = np.array([
    [4.13, 3.86, 4.00, 3.88, 4.02, 4.00],
    [4.07, 3.85, 4.02, 3.88, 3.95, 3.86],
    [4.04, 4.08, 4.01, 3.91, 4.02, 3.96],
    [4.07, 4.11, 4.01, 3.95, 3.89, 3.97],
    [4.05, 4.08, 4.04, 3.92, 3.91, 4.00],
    [4.04, 4.01, 3.99, 3.97, 4.01, 3.82],
    [4.02, 4.02, 4.03, 3.92, 3.89, 3.98],
    [4.06, 4.04, 3.97, 3.90, 3.89, 3.99],
    [4.010, 3.97, 3.98, 3.97, 3.99, 4.02],
    [4.05, 3.95, 3.98, 3.90, 4.00, 3.93]
]).T

# 通过切片获取前7列数据作为不同组
lab1 = data[:, 0]
lab2 = data[:, 1]
lab3 = data[:, 2]
lab4 = data[:, 3]
lab5 = data[:, 4]
lab6 = data[:, 5]
lab7 = data[:, 6]

# 进行方差分析
f_statistic, p_value = f_oneway(lab1, lab2, lab3, lab4, lab5, lab6, lab7)
print("F - statistic:", f_statistic)
print("p - value:", p_value)

if p_value < 0.05:
    print("There is a significant difference in the means.")
else:
    print("There is no significant difference in the means.")

运行结果

习题9.4

import numpy as np
from scipy.stats import f_oneway

# 产量数据
yield_data = np.array([
    [173, 172, 173],
    [174, 176, 178],
    [177, 179, 176],
    [172, 173, 174],
    [175, 173, 176],
    [178, 177, 179],
    [174, 175, 173],
    [174, 174, 175],
    [177, 175, 176],
    [174, 174, 175],
    [170, 171, 172],
    [169, 169, 170]
])

# 分别提取各因素水平的数据
A1_yield = yield_data[0:4].flatten()
A2_yield = yield_data[4:8].flatten()
A3_yield = yield_data[8:12].flatten()
B1_yield = yield_data[0:3]
B2_yield = yield_data[3:6]
B3_yield = yield_data[6:9]
B4_yield = yield_data[9:12]

# 对品种A进行单因素方差分析
f_A, p_A = f_oneway(A1_yield, A2_yield, A3_yield)
print("For factor A (wheat variety):")
print("F - statistic:", f_A)
print("p - value:", p_A)

# 对化肥B进行单因素方差分析
f_B, p_B = f_oneway(B1_yield.flatten(), B2_yield.flatten(), B3_yield.flatten(), B4_yield.flatten())
print("For factor B (fertilizer type):")
print("F - statistic:", f_B)
print("p - value:", p_B)

运行结果

习题9.5

import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
import pandas as pd  # 添加这行代码，导入pandas库并使用pd作为别名

# 销售量数据
sales_data = np.array([
    [955, 967, 960, 950],
    [927, 949, 950, 940],
    [905, 930, 920, 910],
    [855, 860, 880, 875],
    [880, 890, 895, 900],
    [860, 840, 850, 830],
    [870, 865, 850, 860],
    [830, 850, 840, 830],
    [875, 888, 900, 892],
    [870, 850, 847, 965],
    [870, 863, 845, 855],
    [821, 842, 832, 848]
])

# 将数据转换为适合statsmodels的格式
sales_data_reshaped = sales_data.reshape(-1, 1)
factors = np.array([
    [1, 1, 1], [1, 1, 2], [1, 2, 1], [1, 2, 2],
    [2, 1, 1], [2, 1, 2], [2, 2, 1], [2, 2, 2],
    [3, 1, 1], [3, 1, 2], [3, 2, 1], [3, 2, 2]
] * 4)
df = sm.add_constant(np.hstack((factors, sales_data_reshaped)))
df = pd.DataFrame(df, columns=['const', 'A', 'B', 'C', 'sales'])

# 再次明确将A、B、C列转换为category类型
df['A'] = df['A'].astype('category')
df['B'] = df['B'].astype('category')
df['C'] = df['C'].astype('category')

# 进行三因素方差分析
model_with_one_interaction = ols('sales ~ C(A) + C(B) + C(C) + C(A):C(B)', data=df).fit()
anova_table_with_one_interaction = sm.stats.anova_lm(model_with_one_interaction, typ=2)
print(anova_table_with_one_interaction)