R2.Matplotlib_创建子图

这篇博客介绍了如何使用matplotlib的subplot()函数创建多个子图,并在每个子图中分别绘制正弦、余弦和线性函数。通过subplot(221)、subplot(222)和subplot(212)创建了三个子图,展示了不同函数的图形,并利用plot()函数进行绘制。最后,保存图像为'image.png',分辨率为100dpi,并显示图像。

1.subplot()函数

在matplotlib中,可以将一个绘图对象分为几个绘图区域,在每个绘图区域中可以绘制不同的图像,这种绘图形式称为创建子图。可以使用subplot()函数

格式:subplot(numRows,numCols,plotNum)

函数说明:
numRows:表示将整个绘图区域等分为numRows行
numCols:表示将整个绘图区域等分为numCols列
plotNum:表示当前选中要操作的区域

subplot()函数的作用是将整个绘图区域等分为numRows(行) x numCols(列)个子区域,然后按照从左到右,从上到下的顺序对每个子区域进行编号,左上角的区域编号为1.如果numRows,numCols和plotNum这三个数都小于10,可以把他们缩写为一个整数,例如subplot(223)和subplot(2,2,3)是相同的。subplot()在plotNum指定的区域中创建图形。如果新创建的图形和先前创建的图形重叠,则先前创建的图形将被删除

1.1 示例

创建3个子图,分别绘制正弦函数,余弦函数,线性函数

import numpy as np
import matplotlib.pyplot as plt
x=np.linspace(0,10,1000)

用法:linspace(x1,x2,N)
功能:linspace是Matlab中的一个指令,用于产生x1,x2之间的N点行矢量。其中x1、x2、N分别为起始值、中止值、元素个数。
缺省N,默认点数为100。在matlab的命令窗口下输入help linspace或者doc linspace可以获得该函数的帮助信息。

y=np.sin(x)
z=np.cos(x)
k=x
#第一行的左图 subplot(221)=subplot(2,2,1)
plt.subplot(221)   
plt.plot(x,y,label="$sin(x)$",color="red",linewidth=2)
plt.title("cos(x)")
#第一行右图
plt.subplot(222)  
plt.plot(x,z,"b--",label="$cos(x)$")
plt.title("sin(x)")
#第二整行
plt.subplot(212) 
plt.plot(x,k,"g--",label="$x$")
plt.title("y=x")
plt.legend()
#dpi是指保存图像的分辨率 默认是80
plt.savefig('image.png',dpi=100)  
plt.show()


png

import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.decomposition import PCA from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score from sklearn.preprocessing import StandardScaler import warnings warnings.filterwarnings('ignore') # 设置中文字体 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False class ExcelDataAnalyzer: def __init__(self, file_path): self.file_path = file_path self.df = None self.X = None self.y = None self.X_train = None self.X_test = None self.y_train = None self.y_test = None self.pca = None self.regressor = None self.scaler = StandardScaler() def load_data(self, sheet_name=0, target_column=None): """加载Excel数据""" try: self.df = pd.read_excel(self.file_path, sheet_name=sheet_name) print("数据加载成功!") print(f"数据形状: {self.df.shape}") print("\n前5行数据:") print(self.df.head()) # 显示数据基本信息 print("\n数据基本信息:") print(self.df.info()) # 显示数值列的统计信息 print("\n数值列统计信息:") print(self.df.describe()) # 检查缺失值 print("\n缺失值统计:") print(self.df.isnull().sum()) # 处理缺失值 - 使用均值填充 self.df = self.df.fillna(self.df.mean(numeric_only=True)) # 设置目标变量 if target_column: if target_column in self.df.columns: self.y = self.df[target_column] self.X = self.df.drop(columns=[target_column]) else: print(f"错误: 目标列 '{target_column}' 不存在!") return False else: # 默认使用最后一列作为目标变量 self.y = self.df.iloc[:, -1] self.X = self.df.iloc[:, :-1] # 只保留数值列 self.X = self.X.select_dtypes(include=[np.number]) print(f"\n特征矩阵形状: {self.X.shape}") print(f"目标变量形状: {self.y.shape}") return True except Exception as e: print(f"加载数据时出错: {e}") return False def perform_pca(self, n_components=None): """执行PCA分析""" if self.X is None: print("请先加载数据!") return # 标准化数据 X_scaled = self.scaler.fit_transform(self.X) # 如果没有指定组件数,使用所有特征 if n_components is None: n_components = min(X_scaled.shape[0], X_scaled.shape[1]) # 执行PCA self.pca = PCA(n_components=n_components) X_pca = self.pca.fit_transform(X_scaled) # 输出解释方差比 print("PCA完成!") print(f"主成分数量: {n_components}") print("各主成分解释方差比例:") for i, ratio in enumerate(self.pca.explained_variance_ratio_): print(f"PC{i+1}: {ratio:.4f}") print(f"累计解释方差比例: {np.sum(self.pca.explained_variance_ratio_):.4f}") # 可视化解释方差比 self._plot_variance_explained() return X_pca def _plot_variance_explained(self): """绘制PCA解释方差""" plt.figure(figsize=(10, 6)) # 绘制累计解释方差比 cumulative_variance = np.cumsum(self.pca.explained_variance_ratio_) plt.plot(range(1, len(cumulative_variance) + 1), cumulative_variance, 'b-', marker='o', label='累计解释方差') # 绘制各主成分解释方差比 plt.bar(range(1, len(self.pca.explained_variance_ratio_) + 1), self.pca.explained_variance_ratio_, alpha=0.5, color='g', label='各主成分解释方差') plt.xlabel('主成分数量') plt.ylabel('解释方差比例') plt.title('PCA解释方差比例') plt.legend() plt.grid(True) plt.show() def perform_linear_regression(self, use_pca=True, test_size=0.2): """执行线性回归分析""" if self.X is None or self.y is None: print("请先加载数据!") return # 准备数据 if use_pca: X_processed = self.scaler.transform(self.X) X_processed = self.pca.transform(X_processed) print("使用PCA降维后的数据进行线性回归") else: X_processed = self.scaler.transform(self.X) print("使用原始数据进行线性回归") # 划分训练集和测试集 self.X_train, self.X_test, self.y_train, self.y_test = train_test_split( X_processed, self.y, test_size=test_size, random_state=42) # 创建并训练线性回归模型 self.regressor = LinearRegression() self.regressor.fit(self.X_train, self.y_train) # 预测 y_pred = self.regressor.predict(self.X_test) # 评估模型 mse = mean_squared_error(self.y_test, y_pred) r2 = r2_score(self.y_test, y_pred) print("\n线性回归结果:") print(f"均方误差(MSE): {mse:.4f}") print(f"决定系数(R²): {r2:.4f}") # 可视化预测结果 self._plot_regression_results(y_pred) return mse, r2 def _plot_regression_results(self, y_pred): """绘制回归结果""" plt.figure(figsize=(12, 5)) # 1: 实际值 vs 预测值 plt.subplot(1, 2, 1) plt.scatter(self.y_test, y_pred, alpha=0.7) plt.plot([self.y_test.min(), self.y_test.max()], [self.y_test.min(), self.y_test.max()], 'r--', lw=2) plt.xlabel('实际值') plt.ylabel('预测值') plt.title('实际值 vs 预测值') # 2: 残差 plt.subplot(1, 2, 2) residuals = self.y_test - y_pred plt.scatter(y_pred, residuals, alpha=0.7) plt.axhline(y=0, color='r', linestyle='--') plt.xlabel('预测值') plt.ylabel('残差') plt.title('残差') plt.tight_layout() plt.show() # 使用示例 if __name__ == "__main__": # 创建分析器实例 analyzer = ExcelDataAnalyzer("你的Excel文件路径.xlsx") # 加载数据 - 替换为你的Excel文件路径和目标列名 if analyzer.load_data(target_column="目标列名"): # 执行PCA分析 X_pca = analyzer.perform_pca(n_components=2) # 可以调整主成分数量 # 使用PCA结果进行线性回归 analyzer.perform_linear_regression(use_pca=True) # 也可以使用原始数据进行线性回归 # analyzer.perform_linear_regression(use_pca=False)
09-06
import pandas as pd import numpy as np import matplotlib.pyplot as plt import statsmodels.api as sm from scipy import stats import seaborn as sns from sklearn.linear_model import Lasso, LassoCV from sklearn.preprocessing import PolynomialFeatures from sklearn.model_selection import train_test_split, cross_val_score from sklearn.metrics import r2_score, mean_squared_error from sklearn.linear_model import LinearRegression from sklearn.pipeline import Pipeline # 设置中文字体支持 plt.rcParams['font.sans-serif'] = ['SimHei'] # 设置支持中文的字体,优先使用SimHei plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题 male = pd.read_csv(r"C:\Users\86137\Desktop\国赛\男胎儿.csv") print(male.info()) # 修复孕周转换函数 def convert_gestational_week(x):# 定义一个函数,用于将孕周表示转换为小数形式的周数 # 统一转换为小写处理,避免因大小写不一致导致的解析问题 # 统一转换为小写处理 x_lower = x.lower() # 检查字符串中是否包含'+',通常用于表示"周+天"的格式 if '+' in x_lower: # 分割周和天 parts = x_lower.split('w') if len(parts) > 1:# 提取周数部分并转换为浮点数 weeks = float(parts[0])# 从分割后的第二部分中提取天数,以'+'作为分割符('+'后通常为天数) days_part = parts[1].split('+') if len(days_part) > 1: days = float(days_part[1]) return weeks + days/7 # 如果没有'+'或者格式不正确,尝试直接提取周数 try:# 以'w'分割字符串,取第一部分转换为浮点数作为周数 return float(x_lower.split('w')[0]) except: # 如果所有尝试都失败,返回NaN return float('nan') ##然后处理男胎儿 male.drop(["序号","孕妇代码","末次月经","检测日期","染色体的非整倍体","胎儿是否健康"],axis=1,inplace=True) male['孕周数值'] = male['检测孕周'].apply(convert_gestational_week) male.drop("检测孕周",axis=1,inplace=True) male_one = pd.get_dummies(male,dtype='int8') print(male_one.head()) # 计算的行列数 n_features = len(male_one.columns) n_cols = 5 # 每行显示3个 n_rows = (n_features + n_cols - 1) // n_cols # 计算需要的行数 # 创建 fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 5*n_rows)) axes = axes.flatten() # 将axes数组展平,便于迭代 # 为每个特征绘制QQ for i, column in enumerate(male_one.columns): # 使用statsmodels绘制QQ sm.qqplot(male_one[column].dropna(), line='s', ax=axes[i]) axes[i].set_title(f'{column}的QQ') axes[i].set_xlabel('理论分位数') axes[i].set_ylabel('样本分位数') # 隐藏多余的 for j in range(i+1, len(axes)): axes[j].set_visible(False) plt.tight_layout() plt.show() # 可选:同时输出每个特征的正态性检验统计结果 print("各特征正态性检验结果(Shapiro-Wilk检验):") print("特征名\t\tW统计量\t\tp值\t\t是否正态(α=0.05)") print("-" * 65) for column in male_one.columns: stat, p_value = stats.shapiro(male_one[column]) is_normal = "是" if p_value > 0.05 else "否" print(f"{column}\t\t{stat:.4f}\t\t{p_value:.4e}\t\t{is_normal}") def plot_spearman_heatmap(data): # 计算斯皮尔曼相关系数 corr_matrix = data.corr(method='spearman') # 创建热力 plt.figure(figsize=(12, 10)) sns.heatmap(corr_matrix, annot=True, fmt=".2f", cmap='coolwarm', linewidths=0.5, vmin=-1, vmax=1, annot_kws={"size": 8}) plt.title('spearman matrix', fontsize=16) plt.xticks(fontsize=12) plt.yticks(fontsize=12) plt.tight_layout() plt.show() plot_spearman_heatmap(male_one) # 计算相关系数 def calculate_correlations(df, target_col='Y染色体浓度'): # 分离特征和目标变量 X = df.drop(columns=[target_col]) y = df[target_col] # 计算皮尔逊相关系数 pearson_corr = {} for col in X.columns: corr, p_value = stats.pearsonr(X[col], y) pearson_corr[col] = {'相关系数': corr, 'p值': p_value} # 计算斯皮尔曼相关系数 spearman_corr = {} for col in X.columns: corr, p_value = stats.spearmanr(X[col], y) spearman_corr[col] = {'相关系数': corr, 'p值': p_value} # 创建相关系数DataFrame pearson_df = pd.DataFrame(pearson_corr).T pearson_df.columns = ['皮尔逊相关系数', '皮尔逊p值'] spearman_df = pd.DataFrame(spearman_corr).T spearman_df.columns = ['斯皮尔曼相关系数', '斯皮尔曼p值'] # 合并结果 correlation_df = pd.concat([pearson_df, spearman_df], axis=1) correlation_df = correlation_df.sort_values('皮尔逊相关系数', key=abs, ascending=False) return correlation_df correlation_df = calculate_correlations(male_one) print(correlation_df) X = male_one.drop(columns=['Y染色体浓度']) y = male_one['Y染色体浓度'] # 创建并训练线性回归模型 model = LinearRegression() model.fit(X, y) # 使用测试集进行预测 y_pred = model.predict(X) # 计算评估指标 mse = mean_squared_error(y, y_pred) r2 = r2_score(y, y_pred) # 输出模型参数和评估结果 print("=== 线性回归模型结果 ===") print(f"特征系数 (Coefficients): {model.coef_}") print(f"截距 (Intercept): {model.intercept_}") print(f"均方误差 (MSE): {mse:.4f}") print(f"决定系数 (R²): {r2:.4f}") def lasso_regression_analysis(df, target_col='Y染色体浓度'): # 分离特征和目标变量 X = df.drop(columns=[target_col]) y = df[target_col] # 使用交叉验证选择最佳alpha值 lasso_cv = LassoCV(cv=5, random_state=42) lasso_cv.fit(X, y) # 使用最佳alpha值拟合LASSO模型 best_alpha = lasso_cv.alpha_ lasso = Lasso(alpha=best_alpha) lasso.fit(X, y) # 获取系数 coefficients = pd.DataFrame({ '特征': X.columns, '系数': lasso.coef_, '截距': lasso.intercept_ }) coefficients = coefficients.sort_values('系数', key=abs, ascending=False) # 计算模型性能 y_pred = lasso.predict(X) r2 = r2_score(y, y_pred) mse = mean_squared_error(y, y_pred) return { '最佳alpha': best_alpha, '系数': coefficients, 'R²': r2, 'MSE': mse, '模型': lasso } lasso_results = lasso_regression_analysis(male_one) print("\nLASSO回归分析结果:") print(f"最佳alpha值: {lasso_results['最佳alpha']}") print(f"模型R²: {lasso_results['R²']:.4f}") print(f"模型MSE: {lasso_results['MSE']:.4f}") print("特征系数:") print(lasso_results['系数']) def polynomial_regression_analysis(df, target_col='Y染色体浓度', degree=2): # 分离特征和目标变量 X = df.drop(columns=[target_col]) y = df[target_col] # 选择目标变量相关性最高的几个特征 corr_with_target = df.corr()[target_col].abs().sort_values(ascending=False) top_features = corr_with_target.index[1:6] # 选择前5个最相关的特征(排除目标变量自身) # 创建多项式特征管道 poly_model = Pipeline([ ('poly', PolynomialFeatures(degree=degree)), ('linear', LinearRegression()) ]) # 拟合模型 poly_model.fit(X, y) # 计算模型性能 y_pred = poly_model.predict(X) r2 = r2_score(y, y_pred) mse = mean_squared_error(y, y_pred) # 获取多项式特征名称 poly_features = PolynomialFeatures(degree=degree).fit(X) feature_names = poly_features.get_feature_names_out(X.columns) coefficients = poly_model.named_steps['linear'].coef_ intercept = poly_model.named_steps['linear'].intercept_ coefficients = pd.DataFrame({ '特征': feature_names, '系数': coefficients }) coefficients = coefficients.sort_values('系数', key=abs, ascending=False) return { '选定的特征': top_features.tolist(), 'R²': r2, 'MSE': mse, '模型': poly_model, '特征名称': feature_names, '系数': coefficients, '截距': intercept } poly_results = polynomial_regression_analysis(male_one, degree=2) print("\n多项式回归分析结果:") print(f"选定的特征: {poly_results['选定的特征']}") print(f"模型R²: {poly_results['R²']:.4f}") print(f"模型MSE: {poly_results['MSE']:.4f}") print(poly_results['系数']) print(poly_results['截距'])
最新发布
09-08
在以下代码的基础上,增加内容:绘制PDP的置信区间。import time import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score import matplotlib.pyplot as plt from sklearn.inspection import PartialDependenceDisplay import numpy as np from scipy.interpolate import UnivariateSpline # 导入平滑拟合函数 file_path = 'data11.csv' data = pd.read_csv(file_path, encoding='gbk') #-----------------------夏季-----------------------# ########poi2!!poi2!! # 设定特征和目标变量 X = data[['Distance to transit','Network betweenness','Population density','POI', 'PM2.5_summer','Temperature_summer','GVI_summer','SVF_summer']] # 选择所有列,除了最后一列作为特征 y = data['User intensity (summer)'] # 选择最后一列作为目标变量 # 划分数据集为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # 创建随机森林回归模型 model = RandomForestRegressor(criterion='squared_error', max_depth=None, min_samples_leaf=1, min_samples_split=2, n_estimators=100, bootstrap=True, oob_score=True, max_features=None) model.fit(X_train, y_train) # 获取特征重要性 importances = model.feature_importances_ # 创建一个DataFrame以便于查看和排序 features = X.columns importance_df = pd.DataFrame({'Feature': features, 'Importance': importances}) importance_df = importance_df.sort_values(by='Importance', ascending=False) print(importance_df) # 进行预测 y_pred = model.predict(X_test) # 计算均方误差 mse = mean_squared_error(y_test, y_pred) print(f'Mean Squared Error: {mse}') # 计算 R^2 值 r2 = r2_score(y_test, y_pred) print(f'R^2 Score: {r2}') feature_list = ['Distance to transit','Network betweenness','Population density','POI', 'PM2.5_summer','Temperature_summer','GVI_summer','SVF_summer'] abc_list = ["(a)","(b)","(c)","(d)","(e)","(f)","(g)","(h)"] # 创建4行2列的 fig, axes = plt.subplots(4, 2, figsize=(12, 16)) # 创建4行2列的形,调整尺寸 # 在中绘制部分依赖 for ind in range(len(feature_list)): feat_name = feature_list[ind] row = ind // 2 # 确定当前所在行 col = ind % 2 # 确定当前所在列 ax = axes[row, col] # 获取当前的轴 # 生成部分依赖 display = PartialDependenceDisplay.from_estimator(model, X_test, [feat_name]) # 设置原始部分依赖线条为灰色 display.plot(ax=ax) # 获取部分依赖的线条对象,修改颜色为红色 for line in ax.lines: line.set_color('r') # 将线条颜色设置为红色 # 确保用的是正确的坐标轴对象 if isinstance(display.axes_, np.ndarray): ax = display.axes_[0, 0] # 获取部分依赖的数据 # x_values: 特征值,y_values: 预测值 x_values = display.pd_results[0]['grid_values'][0] # 获取 x 值 y_values = display.pd_results[0]['average'][0] # 获取 y 值 # 使用UnivariateSpline对数据进行平滑拟合 spline = UnivariateSpline(x_values, y_values, s=600) # s为平滑参数,可以根据需要调整 smooth_y_values = spline(x_values) # 计算拟合的y值 # 绘制平滑曲线,并设置为红色 ax.plot(x_values, smooth_y_values, color='grey', linestyle='--', label='Smoothed Fit') # 设置标签和坐标轴名称的字号 xlabel_name = abc_list[ind]+' ' + feat_name ax.set_xlabel(xlabel_name, fontsize=12, fontfamily='Arial') ax.set_ylabel('User intensity (summer)', fontsize=12, fontfamily='Arial') # 设定坐标轴刻度的字号 ax.tick_params(axis='both', which='major', labelsize=10) # 添加网格 ax.grid(True, which='both', linestyle='--', linewidth=0.7) # 设置网格线样式和宽度 # 调整形布局,避免内容被裁剪 plt.tight_layout() # 保存像 #plt.savefig('figure/summer_all_features.png', format='png', dpi=960) # 显示像 plt.show()
05-14
import pandas as pd import shap import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestRegressor from sklearn.datasets import make_regression from sklearn.ensemble import RandomForestRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score plt.rcParams['font.sans-serif'] = ['SimSun'] plt.rcParams['axes.unicode_minus'] = False # 数据读取 excel_file = pd.ExcelFile("C:\\Users\\DELL\\Desktop\\数学建模选拔作业\\341.xlsx.xlsx") df = excel_file.parse('341.xlsx_extracted_data') features = ['Co负载量', 'Co/SiO₂的质量', 'HAP的质量', '乙醇浓度', '温度'] target_conversion = '乙醇转化率(%)' target_selectivity = 'C4烯烃选择性(%)' X = df[features] y_conversion = df[target_conversion] y_selectivity = df[target_selectivity] # 将 Co负载量 转换为数值类型,提取数字部分 X['Co负载量'] = X['Co负载量'].str.extract('(\d+\.?\d*)').astype(float) X_train, X_test, y_train_conversion, y_test_conversion = train_test_split( X, y_conversion, test_size=0.2, random_state=42) _, _, y_train_selectivity, y_test_selectivity = train_test_split( X, y_selectivity, test_size=0.2, random_state=42) # 模型训练 # 用于预测乙醇转化率的随机森林模型 rf_conversion = RandomForestRegressor(n_estimators=100, random_state=42) rf_conversion.fit(X_train, y_train_conversion) # 用于预测 C4 烯烃选择性的随机森林模型 rf_selectivity = RandomForestRegressor(n_estimators=100, random_state=42) rf_selectivity.fit(X_train, y_train_selectivity) # 3. 初始化SHAP解释器并计算SHAP值 explainer_ec = shap.TreeExplainer(rf_conversion ) shap_values_ec = explainer_ec.shap_values(X) explainer_c4 = shap.TreeExplainer(rf_selectivity) shap_values_c4 = explainer_c4.shap_values(X) # 4. 创建可视化表 plt.figure(figsize=(12, 5)) # 乙醇转化率SHAP摘要 plt.subplot(1, 2, 1) shap.summary_plot(shap_values_ec, X, feature_names=features, plot_type="bar", show=False) plt.title("乙醇转化率特征重要性", fontsize=14) plt.xlabel("平均|SHAP值|", fontsize=12) # C4烯烃选择性SHAP摘要 plt.subplot(1, 2, 2) shap.summary_plot(shap_values_c4, X, feature_names=features, plot_type="bar", show=False) plt.title("C4烯烃选择性特征重要性", fontsize=14) plt.xlabel("平均|SHAP值|", fontsize=12) plt.tight_layout() plt.savefig('shap_summary_plots.png', dpi=300) # 保存高清像 plt.show() 怎么就出来一个
07-16
UnicodeEncodeError Traceback (most recent call last) Cell In[3], line 146 137 if name in param_grids: 138 grid_search = GridSearchCV( 139 estimator=model, 140 param_grid=param_grids[name], (...) 144 verbose=1 145 ) --> 146 grid_search.fit(X_train, y_train) 148 best_models[name] = grid_search.best_estimator_ 149 print(f"Best params: {grid_search.best_params_}") File ~\AppData\Roaming\Python\Python312\site-packages\sklearn\base.py:1365, in _fit_context.<locals>.decorator.<locals>.wrapper(estimator, *args, **kwargs) 1358 estimator._validate_params() 1360 with config_context( 1361 skip_parameter_validation=( 1362 prefer_skip_nested_validation or global_skip_validation 1363 ) 1364 ): -> 1365 return fit_method(estimator, *args, **kwargs) File ~\AppData\Roaming\Python\Python312\site-packages\sklearn\model_selection\_search.py:979, in BaseSearchCV.fit(self, X, y, **params) 967 fit_and_score_kwargs = dict( 968 scorer=scorers, 969 fit_params=routed_params.estimator.fit, (...) 976 verbose=self.verbose, 977 ) 978 results = {} --> 979 with parallel: 980 all_candidate_params = [] 981 all_out = [] File D:\anacondaxiaz\Lib\site-packages\joblib\parallel.py:1347, in Parallel.__enter__(self) 1345 self._managed_backend = True 1346 self._calling = False -> 1347 self._initialize_backend() 1348 return self File D:\anacondaxiaz\Lib\site-packages\joblib\parallel.py:1359, in Parallel._initialize_backend(self) 1357 """Build a process or thread pool and return the number of workers""" 1358 try: -> 1359 n_jobs = self._backend.configure(n_jobs=self.n_jobs, parallel=self, 1360 **self._backend_args) 1361 if self.timeout is not None and not self._backend.supports_timeout: 1362 warnings.warn( 1363 'The backend class {!r} does not support timeout. ' 1364 "You have set 'timeout={}' in Parallel but " 1365 "the 'timeout' parameter will not be used.".format( 1366 self._backend.__class__.__name__, 1367 self.timeout)) File D:\anacondaxiaz\Lib\site-packages\joblib\_parallel_backends.py:538, in LokyBackend.configure(self, n_jobs, parallel, prefer, require, idle_worker_timeout, **memmappingexecutor_args) 534 if n_jobs == 1: 535 raise FallbackToBackend( 536 SequentialBackend(nesting_level=self.nesting_level)) --> 538 self._workers = get_memmapping_executor( 539 n_jobs, timeout=idle_worker_timeout, 540 env=self._prepare_worker_env(n_jobs=n_jobs), 541 context_id=parallel._id, **memmappingexecutor_args) 542 self.parallel = parallel 543 return n_jobs File D:\anacondaxiaz\Lib\site-packages\joblib\executor.py:20, in get_memmapping_executor(n_jobs, **kwargs) 19 def get_memmapping_executor(n_jobs, **kwargs): ---> 20 return MemmappingExecutor.get_memmapping_executor(n_jobs, **kwargs) File D:\anacondaxiaz\Lib\site-packages\joblib\executor.py:42, in MemmappingExecutor.get_memmapping_executor(cls, n_jobs, timeout, initializer, initargs, env, temp_folder, context_id, **backend_args) 39 reuse = _executor_args is None or _executor_args == executor_args 40 _executor_args = executor_args ---> 42 manager = TemporaryResourcesManager(temp_folder) 44 # reducers access the temporary folder in which to store temporary 45 # pickles through a call to manager.resolve_temp_folder_name. resolving 46 # the folder name dynamically is useful to use different folders across 47 # calls of a same reusable executor 48 job_reducers, result_reducers = get_memmapping_reducers( 49 unlink_on_gc_collect=True, 50 temp_folder_resolver=manager.resolve_temp_folder_name, 51 **backend_args) File D:\anacondaxiaz\Lib\site-packages\joblib\_memmapping_reducer.py:540, in TemporaryResourcesManager.__init__(self, temp_folder_root, context_id) 534 if context_id is None: 535 # It would be safer to not assign a default context id (less silent 536 # bugs), but doing this while maintaining backward compatibility 537 # with the previous, context-unaware version get_memmaping_executor 538 # exposes too many low-level details. 539 context_id = uuid4().hex --> 540 self.set_current_context(context_id) File D:\anacondaxiaz\Lib\site-packages\joblib\_memmapping_reducer.py:544, in TemporaryResourcesManager.set_current_context(self, context_id) 542 def set_current_context(self, context_id): 543 self._current_context_id = context_id --> 544 self.register_new_context(context_id) File D:\anacondaxiaz\Lib\site-packages\joblib\_memmapping_reducer.py:569, in TemporaryResourcesManager.register_new_context(self, context_id) 562 new_folder_name = ( 563 "joblib_memmapping_folder_{}_{}_{}".format( 564 os.getpid(), self._id, context_id) 565 ) 566 new_folder_path, _ = _get_temp_dir( 567 new_folder_name, self._temp_folder_root 568 ) --> 569 self.register_folder_finalizer(new_folder_path, context_id) 570 self._cached_temp_folders[context_id] = new_folder_path File D:\anacondaxiaz\Lib\site-packages\joblib\_memmapping_reducer.py:585, in TemporaryResourcesManager.register_folder_finalizer(self, pool_subfolder, context_id) 578 def register_folder_finalizer(self, pool_subfolder, context_id): 579 # Register the garbage collector at program exit in case caller forgets 580 # to call terminate explicitly: note we do not pass any reference to 581 # ensure that this callback won't prevent garbage collection of 582 # parallel instance and related file handler resources such as POSIX 583 # semaphores and pipes 584 pool_module_name = whichmodule(delete_folder, 'delete_folder') --> 585 resource_tracker.register(pool_subfolder, "folder") 587 def _cleanup(): 588 # In some cases the Python runtime seems to set delete_folder to 589 # None just before exiting when accessing the delete_folder (...) 594 # because joblib should only use relative imports to allow 595 # easy vendoring. 596 delete_folder = __import__( 597 pool_module_name, fromlist=['delete_folder'] 598 ).delete_folder File D:\anacondaxiaz\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py:179, in ResourceTracker.register(self, name, rtype) 177 """Register a named resource, and increment its refcount.""" 178 self.ensure_running() --> 179 self._send("REGISTER", name, rtype) File D:\anacondaxiaz\Lib\site-packages\joblib\externals\loky\backend\resource_tracker.py:196, in ResourceTracker._send(self, cmd, name, rtype) 192 if len(name) > 512: 193 # posix guarantees that writes to a pipe of less than PIPE_BUF 194 # bytes are atomic, and that PIPE_BUF >= 512 195 raise ValueError("name too long") --> 196 msg = f"{cmd}:{name}:{rtype}\n".encode("ascii") 197 nbytes = os.write(self._fd, msg) 198 assert nbytes == len(msg) UnicodeEncodeError: 'ascii' codec can't encode characters in position 18-19: ordinal not in range(128)
08-12
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

TryBest_

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值