import numpy as np
from scipy.stats import norm
# 加载数据
file_path = '/mnt/data/data.txt'
with open(file_path, 'r') as file:
data = np.array([float(line.strip()) for line in file.readlines()])
# EM算法定义
def em_algorithm_with_initialization(data, num_iterations, initial_means, initial_variances, initial_weights):
# 初始化参数
weights = np.array(initial_weights)
means = np.array(initial_means)
variances = np.array(initial_variances)
for _ in range(num_iterations):
# E步骤:计算每个分布对每个数据点的责任
responsibilities = np.array([weights[k] * norm.pdf(data, means[k], np.sqrt(variances[k]))
for k in range(len(weights))])
responsibilities /= responsibilities.sum(axis=0)
# M步骤:更新权重、均值和方差
weights = responsibilities.mean(axis=1)
means = np.array([np.sum(responsibilities[k] * data) / np.sum(responsibilities[k])
for k in range(len(weights))])
variances = np.array([np.sum(responsibilities[k] * (data - means[k])**2) / np.sum(responsibilities[k])
for k in range(len(weights))])
return weights, means, variances
# 设置初始参数
data_mean = np.mean(data)
data_std = np.std(data)
initial_means = [data_mean * 0.9, data_mean, data_mean * 1.1]
initial_variances = [data_std**2 * 0.5, data_std**2, data_std**2 * 1.5]
initial_weights = [1/3, 1/3, 1/3]
# 运行EM算法
weights, means, variances = em_algorithm_with_initialization(data, num_iterations=100, initial_means=initial_means, initial_variances=initial_variances, initial_weights=initial_weights)
print("Weights:", weights)
print("Means:", means)
print("Variances:", variances)