python中位数为nan_Python Scipy stats.nanmedian()用法及代码示例

本文介绍了如何使用Python Scipy库的nanmedian()函数计算包含Nan值的数组的中位数。通过示例代码展示了在不同维度数据上的应用,以及与普通median()函数的区别。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

scipy.stats.nanmedian(array, axis=0)函数通过忽略沿数组指定轴的数组元素的Nan(不是数字)值来计算中位数。

参数:

array:具有元素(包括Nan值)的输入数组或对象,以计算中位数。

axis:计算中值所沿的轴。默认情况下轴= 0

返回:基于设置参数的数组元素的中位数(忽略Nan值)。

代码1:

# median

import scipy

import numpy as np

arr1 = [1, 3, np.nan, 27, 2, 5]

print("median using nanmedian:", scipy.nanmedian(arr1))

print("median without handling nan value:", scipy.median(arr1))

输出:

median using nanmedian:3.0

median without handling nan value:nan

代码2:多维数据

# median

from scipy import median

from scipy import nanmedian

import numpy as np

arr1 = [[1, 3, 27],

[3, np.nan, 6],

[np.nan, 6, 3],

[3, 6, np.nan]]

print("median is:", median(arr1))

print("median handling nan:", nanmedian(arr1))

# using axis = 0

print("\nmedian is with default axis = 0:\n",

median(arr1, axis = 0))

print("\nmedian handling nan with default axis = 0:\n",

nanmedian(arr1, axis = 0))

# using axis = 1

print("\nmedian is with default axis = 1:\n",

median(arr1, axis = 1))

print("\nmedian handling nan with default axis = 1:\n",

nanmedian(arr1, axis = 1))

输出:

median is:nan

median handling nan:3.0

median is with default axis = 0:

[ nan nan nan]

median handling nan with default axis = 0:

[ 3. 6. 6.]

median is with default axis = 1:

[ 3. nan nan nan]

median handling nan with default axis = 1:

[ 3. 4.5 4.5 4.5]

import numpy as np import pywt import matplotlib.pyplot as plt import scipy.io as scio from sklearn.cluster import DBSCAN from sklearn.manifold import TSNE from sklearn.preprocessing import StandardScaler from scipy.stats import entropy from sklearn.svm import OneClassSVM import joblib def sliding_window(sequence, window_size, step_size): """ 对一维序列进行滑窗切分。 参数: sequence (array-like): 输入的一维序列。 window_size (int): 滑窗的窗口大小。 step_size (int): 滑窗的步长。 返回: np.ndarray: 切分后的窗口数组,每一行是一个窗口。 """ sequence = np.asarray(sequence) n = len(sequence) windows = [ sequence[i:i + window_size] for i in range(0, n - window_size + 1, step_size) ] return np.array(windows) if __name__ == "__main__": # 1. 载入数据 sliding_length, sliding_step = 16384, 16384 dataFile = r'.\subei GGZQ-DIS-G11-001-05.mat' dataTemp = scio.loadmat(dataFile) dataSet = np.array(dataTemp['mm']) dataSet = np.squeeze(dataSet) print(dataSet.shape) signals = sliding_window(dataSet, sliding_length, sliding_step) print(signals.shape) feature_matrix = [] for ii in range(signals.shape[0]): # 小波特征 coeffs = pywt.swt(signals[ii, :], wavelet='db1', level=4, trim_approx=True, norm=True) wavelet_features = np.var(np.array(coeffs)[1:, :], axis=1) # 新增的4个特征 window = signals[ii, :] # 差值 diff = window[:-1] - window[1:] diff_var = np.var(diff) # 极值 extremes = np.max(window) - np.min(window) # 均值 mean_value = np.mean(window) # 交叉熵 hist, _ = np.histogram(window, bins=10) cross_entropy = entropy(hist / hist.sum()) new_features = [diff_var, extremes, mean_value, cross_entropy] combined_features = np.concatenate((wavelet_features, new_features)) feature_matrix.append(combined_features) X = feature_matrix = np.vstack(feature_matrix) # 2. 特征标准化 scaler = StandardScaler() X_scaled = scaler.fit_transform(X) # 3.3 应用 DBSCAN dbscan = DBSCAN(eps=2, min_samples=5) labels = dbscan.fit_predict(X_scaled) # 4. 失真异常定位 anomaly_indices = np.squeeze(np.array(np.nonzero(labels))) anomaly_indices_start = [] anomaly_indices_end = [] for ii in range(anomaly_indices.shape[0]): anomaly_indices_start.append(anomaly_indices[ii] * sliding_step) anomaly_indices_end.append(anomaly_indices[ii] * sliding_step + sliding_length) anomaly_indices_start = np.squeeze(np.array(anomaly_indices_start)) anomaly_indices_end = np.squeeze(np.array(anomaly_indices_end)) # 5. 使用 t-SNE 降维 # 获取样本数量 n_samples = X_scaled.shape[0] # 确保 perplexity 小于样本数量 perplexity = min(30, n_samples - 1) tsne = TSNE(n_components=2, perplexity=perplexity) X_tsne = tsne.fit_transform(X_scaled) # 6. 使用 OC-SVM 进行训练 # 提取 DBSCAN 识别的正常数据(排除噪声点) normal_indices = np.where(labels != -1)[0] X_normal = X_scaled[normal_indices] oc_svm = OneClassSVM(nu=0.05, kernel="rbf", gamma=0.1) oc_svm.fit(X_normal) # 使用正常数据训练 OC-SVM # 7. 保存 OC-SVM 模型和标准化器 joblib.dump(oc_svm, 'oc_svm_model2.pkl') joblib.dump(scaler, 'scaler.pkl') # 保存标准化器 # 8. 可视化 t-SNE 降维聚类结果及失真定位结果 plt.figure(figsize=(8, 6)) plt.scatter(X_tsne[:, 0], X_tsne[:, 1], c=labels, cmap='viridis', s=50) plt.title("t-SNE Visualization based on Clustering Results") plt.xlabel("Feature 1") plt.ylabel("Feature 2") plt.colorbar(label="Cluster Label") plt.show() plt.figure(11) plt.plot(dataSet) print(anomaly_indices.shape[0]) for ii in range(anomaly_indices.shape[0]): plt.axvspan(anomaly_indices_start[ii], anomaly_indices_end[ii], facecolor='red', alpha=0.3) plt.show() 在我的代码中我通过对输入数据每个窗口进行8维特征提取,用OC-SVM训练得到了一个决策边界oc_svm_model2.pkl文件,单这个边界是通过8维特征训练得到的,我现在想用T-SNE对这个边界进行降维为2维度,以得到可视化的决策边界,请你整理思路并为我输出可行的代码,输出的代码需要你先运行直到不报错为止
最新发布
03-23
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值