import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
plt.rcParams['font.sans-serif']=['SimHei']
plt.rcParams['axes.unicode_minus']=False
pd.options.display.max_columns =None
# 拆分列defsplit_column(df, y="y"):try:
X = df.drop(y, axis=1)except KeyError:raise KeyError("请在拆分列的参数中选择数据中有的字段")
y = pd.DataFrame(df[y], columns=[y])return X, y
# 自定义函数defcheck_nan(df_var):print("列数:{},行数:{}".format(*df_var.shape))
nan_result = df_var.isnull().sum(axis=0)
col_name_list = df_var.columns.values
result_dict ={
k: v for k, v inzip(col_name_list,list(nan_result))}
total = df_var.shape[0]
nan_dict =dict()for rd in result_dict.items():print("{}: {}%".format(rd[0],round((rd[1]/total)*100,2)))
nan_dict[rd[0]]=round((rd[1]/total)*100,2)return nan_dict