工具包导入:
# Pandas and numpy for data manipulation
import pandas as pd
import numpy as np
# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None
pd.set_option('display.max_columns', 60)
# Matplotlib for visualization
import matplotlib.pyplot as plt
%matplotlib inline
# Set default font size
plt.rcParams['font.size'] = 24
from IPython.core.pylabtools import figsize
# Seaborn for visualization
import seaborn as sns
sns.set(font_scale = 2)
# Imputing missing values
from sklearn.preprocessing import Imputer, MinMaxScaler
# Machine Learning Models
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn import tree
# LIME只会对每一个样本进行解释
import lime #需要安装 pip install -i https://pypi.tuna.tsinghua.edu.cn/simple lime
import lime.lime_tabular
import warnings
warnings.filterwarnings("ignore")
读取数据:
先拿到我们上个模型的结果
#上次保存好的数据加载进来
#这些数据集经过test_train_test
# Read in data into dataframes
train_features = pd.read_csv('C:/Users/lb/Desktop/data/training_features.csv')
test_features = pd.read_csv('C:/Users/lb/Desktop/data/testing_features.csv')
train_labels = pd.read_csv('C:/Users/lb/Desktop/data/training_labels.csv')
test_labels = pd.read_csv('C:/Users/lb/Desktop/data/testing_labels.csv')
缺失值处理:
# 用中值代替缺失值
imputer = Imputer(strategy='medi