这段时间做了一个二分类的任务,训练数据是8000个包含1000个特征和一个label的数据。下面记录一下使用python实现logistic二分类的代码。
import os
import time
import numpy as np
# 获取路径
def get_directory(file_name):
path = os.getcwd()
directory = os.path.join(path,file_name)
return directory
# 读取速度相对较慢
def get_train_data(directory):
data = np.loadtxt(directory,delimiter=',')
print(data.shape)
# 读取速度相对更快
def loadDataSet(file_name, label_existed_flag):
feats = []
labels = []
fr = open(file_name)
lines = fr.readlines()
for line in lines:
temp = []
allInfo = line.strip().split(',')
dims = len(allInfo)
if label_existed_flag == 1:
for index in range(dims - 1):
temp.append(float(allInfo[index]))
feats.append(temp)