我在训练一个深度学习模型的时候出现了这个错误,但是在colab上可以运行。
具体代码如下:
def run_model():
### Open a strategy scope.
with strategy.scope():
# Everything that creates variables should be under the strategy scope.
# In general this is only model construction & `compile()`.
model = get_compiled_model()
### Set train steps and validation steps
train_steps = len(train_generator.labels)/ batch_size
val_steps = len(validation_generator.labels) / batch_size
#### set the path to save models having lowest validation loss during training
save_model_dir = './models/'
if not os.path.exists(save_model_dir):
os.mkdir(save_model_dir)
filepath= "models/breast-"+args.structure+"-fold" + str(i+1) + "-" + database + "-" + args.model_name + "-" + str(image_size) + "-" + str(batch_size) + "-"+str(args.lr)+ ".h5"
checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1,save_best_only=True, mode='min')
history = model.fit(
train_generator,
epochs=num_epoches,
steps_per_epoch=train_steps,
validation_data=validation_generator,
validation_steps=val_steps,
use_multiprocessing=False,
workers=10,
callbacks=[checkpoint])
### Save training loss
train_auc = history.history['auc']
val_auc = history.history['val_auc']
train_loss = history.history['loss']
val_loss = history.history['val_loss']
d_loss = pd.DataFrame({'train_auc':train_auc, 'val_auc':val_auc, 'train_loss':train_loss, 'val_loss':val_loss})
save_loss_dir = './loss'
if not os.path.exists(save_loss_dir):
os.mkdir(save_loss_dir)
d_loss.to_csv("loss/breast-"+args.structure+"-fold" + str(i+1) + "-" + database + "-" + args.model_name + "-" + str(image_size) + "-" + str(batch_size) + "-"+str(args.lr)+ ".csv", index=False)
# In[17]:
for i in range(5):
#df_train=pd.read_csv("dataframe/breast_train_fold"+str(i+1)+".csv")
#df_val=pd.read_csv("dataframe/breast_val_fold"+str(i+1)+".csv")
df_train=pd.read_csv("fold"+str(i)+"_train.csv")
df_val=pd.read_csv("fold"+str(i)+"_test.csv")
train_data_generator = ImageDataGenerator(
rescale=1./255,
preprocessing_function=preprocess_input,
rotation_range=10,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.1,
zoom_range=0.1,
horizontal_flip=True,
fill_mode='nearest'
)
data_generator = ImageDataGenerator(rescale=1./255, preprocessing_function=preprocess_input)
train_generator = train_data_generator.flow_from_dataframe(
dataframe=df_train,
x_col = 'filename',
y_col = 'label',
target_size=(image_size, image_size),
batch_size=batch_size,
shuffle=True,
seed=726,
class_mode='categorical',
)
validation_generator = data_generator.flow_from_dataframe(
dataframe=df_val,
x_col = 'filename',
y_col = 'label',
target_size=(image_size, image_size),
batch_size=batch_size,
shuffle=True,
seed=726,
class_mode='categorical',
)
num_classes =len(train_generator.class_indices)
run_model()
当时我以为是我tensflow版本不对,但我发现colab的版本也就2.12
所以我谷歌了一下发现有人提到use_multiprocessing更改为False,但是我以为是将train_data_generator.flow_from_datafram里添加,没注意到
history = model.fit(
train_generator,
epochs=num_epoches,
steps_per_epoch=train_steps,
validation_data=validation_generator,
validation_steps=val_steps,
use_multiprocessing=False,
workers=10,
callbacks=[checkpoint])
model.fit本身就有use_multiprocessing=False,false是我修正过的,所以目前代码可以在笔记本上运行了。
顺带说一下,m2芯片还是比较强的,但长时间工作那个风扇转速感觉CPU要烧了,训练个小项目还是比较方便。