代码:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
def train():
# 1)读取数据:
df1=pd.read_csv('horseColicTraining.txt',delimiter='\t',header=None)
df2=pd.read_csv('horseColicTest.txt',delimiter='\t',header=None)
last_column = df1.iloc[:, -1] # 获取最后一列数据
x_train1 = df1.iloc[:, :-1] # 第一个DataFrame包含除最后一列以外的所有列
y_train1 = pd.DataFrame(last_column) # 第二个DataFrame只包含最后一列
last_column1 = df2.iloc[:, -1] # 获取最后一列数据
x_test1 = df2.iloc[:, :-1] # 第一个DataFrame包含除最后一列以外的所有列
y_test1 = pd.DataFrame(last_column1) # 第二个DataFrame只包含最后一列
# 2)缺失值处理:
#3)划分数据集:
# 筛选特征值和目标值
# 4)特征工程标准化
transfer=StandardScaler()
x_train=transfer.fit_transform(x_train1)
# print(x_train)
x_tes