解决seaborn报错ValueError: zero-size array to reduction

首先说结论:错在matplotlib,降低它的版本就好。

在使用seaborn完成机器学习绘图时,产生了一个很奇怪的报错,我的使用的代码如下:

import seaborn as sns

sns.scatterplot(
    x='var1',
    y='var2',
    hue='var3',
    data=dataset,
)

在执行上述代码时,检验了数据集、数据逻辑等无误的情况下,还是会有如下报错:

ValueError: zero-size array to reduction operation minimum which has no identity

尝试了很多网上的办法,还是没有效果,更换了很多seaborn库的版本,也不行,最后偶然发现,注释掉hue这个参数,就可以正常运行了。因此,猜测很可能不是我的数据集或逻辑问题,也不是seaborn包的问题,可能是和其他的绘图库产生了冲突。

sns.scatterplot(
    x='var1',
    y='var2',
#    hue='var3',
    data=dataset,
)

果然,当我把matplotlib从3.3.1替换成3.2.2,这段代码就不报错了。可能是matplotlib最新版本有bug,因为seaborn是基于matplotlib建立的。

# 7. 可视化结果 fig2 = plt.figure(figsize=(12, 8)) labelFont = {"size": 8} fig2.suptitle(errResult, fontdict={"color": "red", "family": "serif"}) gs = gridspec.GridSpec(5, 2) # 7.1. 定位点的平均值与实际点的对应关系 ax21 = fig2.add_subplot(gs[0: 3, 0], projection="3d") ax21.set_title("Localization Array & RMS Distribution") ax21.set_xlabel("X[mm]", labelFont) ax21.set_ylabel("Y[mm]", labelFont) ax21.set_zlabel("Z[mm]", labelFont) ax21.set_proj_type('ortho') zmin = min(np.min(pointsRT[:, 2]), np.min(staPosAvg[:, 2])) zmax = max(np.max(pointsRT[:, 2]), np.max(staPosAvg[:, 2])) ax21.set_zlim(zmin - 10, zmax + 10) ax21.tick_params(labelsize=8) # 颜色编码:误差≤1mm为蓝色,>1mm为红色 colors = ['b' if errRMS <= 1.0 else 'r' for errRMS in errRMSs] # 绘制参考点和测量点 ax21.scatter(pointsRT[:, 0], pointsRT[:, 1], pointsRT[:, 2], c='k', marker='.') ax21.scatter(staPosAvg[:, 0], staPosAvg[:, 1], staPosAvg[:, 2], marker='o', facecolors='none', edgecolors=colors, s=50 * errRMSs) # 添加RMS误差标签 for i in range(n): rms = np.round(errRMSs[i], 1) ax21.text(pointsRT[i, 0] + 15, pointsRT[i, 1] + 15, pointsRT[i, 2], rms, size=8, color="k") # 7.2. 误差分布图:重复度和RMS大小 ax22 = fig2.add_subplot(gs[0: 3, 1], projection="3d") ax22.set_title("3d error (size = Rep, color = Bias)") ax22.set_xlabel("X[mm]", labelFont) ax22.set_ylabel("Y[mm]", labelFont) ax22.set_zlabel("Z[mm]", labelFont) ax22.tick_params(labelsize=8) ax22.set_zlim(zmin - 10, zmax + 10) cmap2 = plt.colormaps["seismic"] asc2 = ax22.scatter(pointsRT[:, 0], pointsRT[:, 1], pointsRT[:, 2], s=errReps * 100, c=absBiasArr, cmap=cmap2) cb2 = fig2.colorbar(asc2, ax=ax22, shrink=0.8) cb2.ax.tick_params(labelsize=8) # 7.3. 绝对误差与定位距离的箱线图 distances = np.linalg.norm(pointsRT, axis=1) disGap = 50 disRange = np.ceil(distances / disGap) * disGap # 将绝对距离向下取整 ax23 = fig2.add_subplot(gs[3: 5, 0]) ax23.set_title("absolute error vs absolute distance") ax23.set_xlabel("absolute distance [mm]") ax23.set_ylabel("absolute error [mm]") # seaborn.boxplot(x=disRange, y=errRMSs, ax=ax23, # boxprops={"color": "blue", "facecolor": "white"}, # medianprops={'linestyle': '--', 'color': 'red'}, # flierprops={'marker': '+', 'markeredgecolor': 'red'}) # 用于绘制小提琴图 disBox = np.arange(min(disRange), max(disRange + disGap), disGap) disBoxInd = [np.where(disRange == box)[0] for box in disBox] errRmsBox = [errRMSs[ind] for ind in disBoxInd] ax23.violinplot(errRmsBox, positions=disBox, showmedians=True, widths=20) ax23.plot([min(disRange), max(disRange)], [1, 1], 'g--', linewidth=2) ax23.grid() # 7.4. 绝对误差分布直方图 ax24 = fig2.add_subplot(gs[3: 5, 1]) ax24.set_title("absolute error distribution") ax24.set_xlabel("absolute error[mm]") ax24.set_ylabel("density[100%]") ax24.hist(errRmsAll, bins=20, linewidth=0.8, edgecolor="black", density=True) plt.show() 上一句对话没有发完代码,这才是完整的代码,刚才发送的部分并不包括最重要的errRmsBox 参数生成
07-30
import SimpleITK as sitk from radiomics import featureextractor, setVerbosity, getFeatureClasses import matplotlib.pyplot as plt import seaborn as sns import numpy as np import os import pandas as pd import logging import matplotlib.colors as mcolors from matplotlib.colors import LinearSegmentedColormap from skimage import measure from mpl_toolkits.mplot3d.art3d import Poly3DCollection # 添加缺失的导入 def set_chinese_font(): from matplotlib import font_manager font_path = "C:/Windows/WinSxS/amd64_microsoft-windows-font-truetype-simhei_31bf3856ad364e35_10.0.26100.1_none_f11b5ebedca17dd9/simhei.ttf" # 或 msyh.ttc、STSong.ttf 等 font_prop = font_manager.FontProperties(fname=font_path) plt.rcParams['font.family'] = font_prop.get_name() plt.rcParams['axes.unicode_minus'] = False # 设置日志详细程度 setVerbosity(logging.INFO) # 设置路径 base_path = r"C:\Users\53145\OneDrive\Desktop\1\radiomics\radiomic\p" image_dir = os.path.join(base_path, "image") mask_dir = os.path.join(base_path, "mask") results_dir = os.path.join(base_path, "results") os.makedirs(results_dir, exist_ok=True) # 设定设置 settings = { "binWidth": 25, "resampledPixelSpacing": [1, 1, 1], "interpolator": sitk.sitkBSpline, "normalize": True, "voxelBased": True # 启用基于体素的特征提取 } # 初始化特征提取器 extractor = featureextractor.RadiomicsFeatureExtractor(**settings) extractor.enableImageTypes(Original={}, LoG={}, Wavelet={}) # 打印启用的特征 print("启用的图像类型:", extractor.enabledImagetypes) print("启用的特征类:", getFeatureClasses().keys()) # ************ 数据准备 ************ img_file = "3.nii.gz" # 这里替换成你要处理的图像文件名 base_name = img_file.replace(".nii.gz", "") img_path = os.path.join(image_dir, img_file) mask_file = f"{base_name}mask.nii.gz" mask_path = os.path.join(mask_dir, mask_file) try: if not os.path.exists(mask_path): raise FileNotFoundError(f"掩膜文件 {mask_file} 不存在") # 读取图像和掩膜 image = sitk.ReadImage(img_path) mask = sitk.ReadImage(mask_path) # 打印图像信息 print(f"图像尺寸: {image.GetSize()}") print(f"图像间距: {image.GetSpacing()}") print(f"掩膜尺寸: {mask.GetSize()}") print(f"掩膜间距: {mask.GetSpacing()}") # 强制空间对齐 resampler = sitk.ResampleImageFilter() resampler.SetReferenceImage(image) resampler.SetInterpolator(sitk.sitkNearestNeighbor) aligned_mask = resampler.Execute(mask) # 检查掩膜有效性 mask_array = sitk.GetArrayFromImage(aligned_mask) if np.sum(mask_array) < 10: raise ValueError("掩膜区域太小,无法提取特征") # ************ 特征提取 ************ print("正在提取特征图...") # 提取特征 feature_vector = extractor.execute(image, aligned_mask, voxelBased=True) # 检查特征提取结果 if not feature_vector: raise ValueError("未提取到任何特征") # 查找我们需要的特征图 target_feature_name = "wavelet-LHH_glszm_ZoneEntropy" feature_map = None # 尝试找到匹配的特征图 for feature_name, feature_value in feature_vector.items(): if target_feature_name in feature_name: if isinstance(feature_value, sitk.Image): feature_map = feature_value target_feature_name = feature_name # 使用完整的特征名 break if feature_map is None: # 如果找不到特定特征,使用第一个特征图 for feature_name, feature_value in feature_vector.items(): if isinstance(feature_value, sitk.Image): feature_map = feature_value target_feature_name = feature_name print(f"未找到 '{target_feature_name}',使用第一个特征图: {feature_name}") break if feature_map is None: raise ValueError("未提取到任何特征图") print(f"使用特征图: {target_feature_name}") print(f"特征图尺寸: {feature_map.GetSize()}") # 转换为NumPy数组 feature_array = sitk.GetArrayFromImage(feature_map) # ************ 特征图可视化 ************ print("可视化特征图...") # 创建自定义颜色映射 colors = ["darkblue", "blue", "cyan", "green", "yellow", "orange", "red"] cmap = LinearSegmentedColormap.from_list("custom_cmap", colors) # 确定要可视化的切片范围 z_start = max(0, feature_array.shape[0] // 3) z_end = min(feature_array.shape[0], 2 * feature_array.shape[0] // 3) # 创建多切片可视化 fig, axes = plt.subplots(3, 3, figsize=(15, 12)) fig.suptitle(f"特征图可视化: {target_feature_name}\n{base_name}", fontsize=16) # 选择9个切片均匀分布在ROI范围内 slice_indices = np.linspace(z_start, z_end - 1, 9, dtype=int) # 计算全局最小值和最大值用于颜色标准化 # 注意:背景为0,我们只考虑非零区域 non_zero_mask = (feature_array != 0) if non_zero_mask.any(): vmin = np.percentile(feature_array[non_zero_mask], 5) vmax = np.percentile(feature_array[non_zero_mask], 95) else: vmin = 0 vmax = 1 for i, slice_idx in enumerate(slice_indices): ax = axes[i // 3, i % 3] slice_data = feature_array[slice_idx] # 显示热图 sns.heatmap(slice_data, cmap=cmap, cbar=i == 8, # 只在最后一个子图显示颜色条 ax=ax, vmin=vmin, vmax=vmax, xticklabels=False, yticklabels=False) ax.set_title(f"切片 {slice_idx}") set_chinese_font() plt.tight_layout(rect=[0, 0, 1, 0.96]) output_path = os.path.join(results_dir, f"{base_name}_{target_feature_name.replace(':', '_')}_feature_map.png") plt.savefig(output_path, dpi=300) plt.close() # 关闭图形以释放内存 print(f"特征图已保存为: {output_path}") except Exception as e: print(f"处理失败:{img_file} | 错误:{str(e)}") import traceback traceback.print_exc()
06-22
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np # 设置中文字体和图形样式 plt.rcParams['font.sans-serif'] = ['SimHei', 'Arial Unicode MS', 'DejaVu Sans'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['figure.dpi'] = 100 # 读取处理后的数据 df = pd.read_excel('嵌套数据处理.xlsx') print(f"数据形状: {df.shape}") print("\n数据基本信息:") print(df.info()) # 使用十分位数将BMI平均分为10组(0%, 10%, 20%, ..., 100%) bmi_quantiles = df['孕妇BMI'].quantile([i / 10 for i in range(11)]) print(f"\nBMI十分位数分组边界: {bmi_quantiles.values}") # 创建BMI分组(使用十分位数) bmi_bins = bmi_quantiles.values bmi_labels = [] for i in range(len(bmi_bins) - 1): bmi_labels.append(f'BMI组{i + 1}: {bmi_bins[i]:.1f}-{bmi_bins[i + 1]:.1f}') df['BMI分组'] = pd.cut(df['孕妇BMI'], bins=bmi_bins, labels=bmi_labels, include_lowest=True) # 使用十分位数将检测孕周平均分为10组 week_quantiles = df['检测孕周'].quantile([i / 10 for i in range(11)]) print(f"检测孕周十分位数分组边界: {week_quantiles.values}") # 创建检测孕周分组(使用十分位数) week_bins = week_quantiles.values week_labels = [] for i in range(len(week_bins) - 1): week_labels.append(f'孕周组{i + 1}: {week_bins[i]:.1f}-{week_bins[i + 1]:.1f}周') df['孕周分组'] = pd.cut(df['检测孕周'], bins=week_bins, labels=week_labels, include_lowest=True) print("\nBMI分组统计:") bmi_counts = df['BMI分组'].value_counts().sort_index() print(bmi_counts) print("\n孕周分组统计:") week_counts = df['孕周分组'].value_counts().sort_index() print(week_counts) # 创建第一个图形:不同BMI分组下 Y染色体浓度与检测孕周的散点图 fig1, axes1 = plt.subplots(2, 5, figsize=(20, 10)) # 减小画布(原25x15 → 20x10) axes1 = axes1.flatten() # 确保分组按组号排序 bmi_groups = df['BMI分组'].dropna().unique() bmi_groups_sorted = sorted(bmi_groups, key=lambda x: int(x.split('组')[1].split(':')[0])) bmi_groups = bmi_groups_sorted for i, (ax, bmi_group) in enumerate(zip(axes1, bmi_groups)): group_data = df[df['BMI分组'] == bmi_group] if len(group_data) > 0: colors = plt.cm.Spectral_r(np.linspace(0, 1, len(bmi_groups))) ax.scatter(group_data['检测孕周'], group_data['Y染色体浓度'], alpha=0.7, s=25, c=[colors[i]], edgecolors='none') # 减小点大小 if len(group_data) > 1: valid_data = group_data[['检测孕周', 'Y染色体浓度']].dropna() if len(valid_data) > 1: z = np.polyfit(valid_data['检测孕周'], valid_data['Y染色体浓度'], 1) p = np.poly1d(z) x_range = np.linspace(valid_data['检测孕周'].min(), valid_data['检测孕周'].max(), 100) ax.plot(x_range, p(x_range), "red", linewidth=1.5, alpha=0.8) correlation = valid_data['检测孕周'].corr(valid_data['Y染色体浓度']) ax.text(0.05, 0.95, f'r = {correlation:.3f}\nn = {len(group_data)}', transform=ax.transAxes, fontsize=10, bbox=dict(boxstyle="round,pad=0.4", facecolor="white", alpha=0.9, edgecolor='lightgray')) ax.set_xlabel('检测孕周', fontsize=11) ax.set_ylabel('Y染色体浓度', fontsize=11) ax.set_title(f'{bmi_group}', fontsize=12, pad=15) ax.grid(True, alpha=0.3, linestyle='-', linewidth=0.5) ax.tick_params(axis='both', which='major', labelsize=9) # 缩小刻度字体 ax.set_xlim(df['检测孕周'].min() - 0.5, df['检测孕周'].max() + 0.5) ax.set_ylim(df['Y染色体浓度'].min() - 0.005, df['Y染色体浓度'].max() + 0.005) # 隐藏多余子图 for i in range(len(bmi_groups), len(axes1)): axes1[i].set_visible(False) plt.suptitle('不同BMI分组下 Y染色体浓度与检测孕周的关系', fontsize=14, fontweight='bold', y=0.96) plt.subplots_adjust( left=0.06, right=0.96, bottom=0.12, # 留出足够下边距 top=0.90, # suptitle 留空间 hspace=0.5, # 垂直间距(因紧凑而略增) wspace=0.35 # 水平间距 ) plt.savefig('不同BMI十分位数分组_孕周与Y染色体浓度.png', dpi=300, bbox_inches='tight', facecolor='white') plt.show() # 计算各分组的相关系数矩阵 correlation_results = [] # BMI分组下的相关系数(孕周 vs Y染色体浓度) for bmi_group in bmi_groups: group_data = df[df['BMI分组'] == bmi_group] if len(group_data) > 1: valid_data = group_data[['检测孕周', 'Y染色体浓度']].dropna() if len(valid_data) > 1: correlation = valid_data['检测孕周'].corr(valid_data['Y染色体浓度']) correlation_results.append({ '分组类型': 'BMI分组', '分组名称': bmi_group, '分组编号': f'组{bmi_groups.index(bmi_group) + 1}', '样本数量': len(group_data), '相关系数': correlation, '变量关系': '孕周 vs Y染色体浓度', '分组下界': bmi_bins[bmi_groups.index(bmi_group)], '分组上界': bmi_bins[bmi_groups.index(bmi_group) + 1] }) # 孕周分组下的相关系数(BMI vs Y染色体浓度) for week_group in week_groups: group_data = df[df['孕周分组'] == week_group] if len(group_data) > 1: valid_data = group_data[['孕妇BMI', 'Y染色体浓度']].dropna() if len(valid_data) > 1: correlation = valid_data['孕妇BMI'].corr(valid_data['Y染色体浓度']) correlation_results.append({ '分组类型': '孕周分组', '分组名称': week_group, '分组编号': f'组{week_groups.index(week_group) + 1}', '样本数量': len(group_data), '相关系数': correlation, '变量关系': 'BMI vs Y染色体浓度', '分组下界': week_bins[week_groups.index(week_group)], '分组上界': week_bins[week_groups.index(week_group) + 1] }) # 转换为DataFrame并保存 correlation_df = pd.DataFrame(correlation_results) print("\n=== 各分组相关系数 ===") print(correlation_df.round(3)) # 保存结果 correlation_df.to_excel('十分位数分组相关性分析结果.xlsx', index=False) # 创建相关系数热力图 plt.figure(figsize=(15, 8)) # BMI分组的相关系数 bmi_corr = correlation_df[correlation_df['分组类型'] == 'BMI分组'] plt.subplot(1, 2, 1) bars = plt.bar(range(len(bmi_corr)), bmi_corr['相关系数'], color=plt.cm.Spectral_r(np.linspace(0, 1, len(bmi_corr))), alpha=0.8, edgecolor='gray', linewidth=0.5) plt.xlabel('BMI分组', fontsize=12) plt.ylabel('相关系数', fontsize=12) plt.title('BMI分组: 孕周 vs Y染色体浓度', fontsize=14) plt.xticks(range(len(bmi_corr)), [f'组{i + 1}' for i in range(len(bmi_corr))], rotation=45, fontsize=11) plt.grid(True, alpha=0.3, axis='y') # 在柱子上添加数值标签 for bar, corr in zip(bars, bmi_corr['相关系数']): height = bar.get_height() plt.text(bar.get_x() + bar.get_width() / 2, height + 0.01 * (1 if height >= 0 else -1), f'{corr:.3f}', ha='center', va='bottom' if height >= 0 else 'top', fontsize=10, fontweight='bold') # 孕周分组的相关系数 week_corr = correlation_df[correlation_df['分组类型'] == '孕周分组'] plt.subplot(1, 2, 2) bars = plt.bar(range(len(week_corr)), week_corr['相关系数'], color=plt.cm.magma(np.linspace(0, 1, len(week_corr))), alpha=0.8, edgecolor='gray', linewidth=0.5) plt.xlabel('孕周分组', fontsize=12) plt.ylabel('相关系数', fontsize=12) plt.title('孕周分组: BMI vs Y染色体浓度', fontsize=14) plt.xticks(range(len(week_corr)), [f'组{i + 1}' for i in range(len(week_corr))], rotation=45, fontsize=11) plt.grid(True, alpha=0.3, axis='y') # 在柱子上添加数值标签 for bar, corr in zip(bars, week_corr['相关系数']): height = bar.get_height() plt.text(bar.get_x() + bar.get_width() / 2, height + 0.01 * (1 if height >= 0 else -1), f'{corr:.3f}', ha='center', va='bottom' if height >= 0 else 'top', fontsize=10, fontweight='bold') plt.tight_layout() plt.savefig('十分位数分组相关系数可视化.png', dpi=300, bbox_inches='tight') plt.show() print("\n分析完成!所有图形和结果已保存。") print(f"总样本量: {len(df)}") print(f"BMI分组数量: {len(bmi_groups)}") print(f"孕周分组数量: {len(week_groups)}") 报错了,请修改
09-06
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值