def fit_evaluate(X_train, X_test, y_train, y_test, pipeline, n_min=10000):
pipeline_nm = utils.pipeline_name(pipeline)
print(pipeline_nm)
# Fit model
start_time = time.perf_counter()
pipeline.fit(X_train, y_train)
end_time = time.perf_counter()
print('Time elapsed to fit: {:.1f}s'.format(end_time - start_time))
# Evaluate model
start_time = time.perf_counter()
utils.evaluate(X_train, X_test, y_train, y_test, pipeline)
end_time = time.perf_counter()
print('Time elapsed to evaluate: {:.1f}s'.format(end_time - start_time))
# train_exponent = int(math.log10(len(X_train)))
# train_sample_n = int(math.pow(10, max(train_exponent - 2, 2)))
# train_sample_n = max(train_sample_n, min(n_min, len(X_train)))
train_sample_n = 10000
X_sample_train = X_train.sample(n=train_sample_n)
y_sample_train = y_train.reindex(X_sample_train.index)
test_exponent = int(math.log10(len(X_test)))
test_sample_n = int(math.pow(10, max(test_exponent - 2, 2)))
test_sample_n = max(test_sample_n, min(n_min, len(X_test)))
X_sample_test = X_test.sample(n=test_sample_n)
y_sample_test = y_test.reindex(X_sample_test.index)
# Visually inspect residuals for goodness of fitness
res_fig = utils.plot_residuals(X_sample_train,
X_sample_test,
y_sample_train,
y_sample_test,
pipeline)
res_fmt = 'output/residual_{}.png'
res_fig.savefig(res_fmt.format(pipeline_nm), dpi=200)
# Learning curve
start_time = time.perf_counter()
learn_fig = utils.plot_learning_curve([pipeline], X_sample_train, y_sample_train)
lc_fmt = 'output/learning_curve_{}.png'
learn_fig.savefig(lc_fmt.format(pipeline_nm), dpi=200)
end_time = time.perf_counter()
print('Time elapsed for learning curves: {:.1f}s'.format(end_time - start_time))
# Validation curve
# start_time = time.perf_counter()
# val_fig = utils.plot_validation_curve([pipeline],
# X_train,
# y_train,
# n_jobs=1)
# vc_fmt = 'output/validation_curve_{}.png'
# val_fig.savefig(vc_fmt.format(pipeline_nm), dpi=200)
# end_time = time.perf_counter()
# print('Time elapsed for validation curves: {:.1f}s'.format(end_time - start_time))