读取数据
import pandas as pd
data_all = pd.read_csv('data_all.csv',encoding='gbk') # encoding='gbk' 解决编码问题
划分数据集
# 划分训练集测试集
from sklearn.model_selection import train_test_split
features = [x for x in data_all.columns if x not in ['status']]
X = data_all[features]
y = data_all['status']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=2018)
构建模型
# 逻辑回归模型
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression(random_state =2018)
lr.fit(X_train, y_train)
模型评分
lr.score(X_test,y_test)
# 0.7484232655921513