import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.preprocessing import LabelEncoder,MinMaxScaler from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression import random #加载数据返回dataframe def load_data(): data = pd.read_csv('D:\data\lgdata.csv',index_col=False, header=None,names=['target','x1','x2','x3','x4','x5', 'x6','x7','x8','x9','x10','x11','x12', 'x13','x14','x15','x16','x17','x18','x19', 'x20','x21','x22']) return data #特征工程:补齐缺失值对离散特征进行独热码以及对target进行labelecode def deal_feature(data): #首先使用knn对特征进行补齐 #在此之前先将数据集进行独热码处理,否则无法使用knn #需要补齐得数据 l_data = data.loc[data['x11'].isin(['?'])] #训练数据 t_data = data.loc[data['x11']!='?'] #训练数据得x t_x = t_data.loc[:,[
梯度下降法和随机梯度下降法
最新推荐文章于 2024-05-21 22:51:01 发布