# coding=utf-8
from pandas import DataFrame
import datetime
import numpy as np
import pandas as pd
# The error metric: RMSE on the log of the sale prices.
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import KFold
import xgboost as xgb
from sklearn.linear_model import Lasso, Ridge, ElasticNet
from sklearn.kernel_ridge import KernelRidge
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.preprocessing import LabelEncoder
from scipy.stats import skew
import csv
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import RandomForestClassifier
from pandas import DataFrame
data = pd.read_csv('hoursetrain1.csv') # 数据文件路径,data 是list
# print data.columns.size #'finalData.csv' 文件的列数
# print len(data) #'finalData.csv' 文件的行数
x1 = data.drop(['Survived'], axis=1) #除了label 外的全部数据
y1 = data['Survived']
x = np.array(x1)
y = np.array(y1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.5, random_state=1)
# logistic回归
lr = LogisticRegression(penalty='l2')
#print lr
lr.fit(x_train, y_train)
y_hat = lr.predict(x_test) # y_hat 是预测出来的类别
b =np.ones(50)
print len(x)
print len(y_hat)
print np.c_[x_train,y_hat] # x_train 增加一列内容为y_hat的值
print np.column_stack((x_train,y_hat)) # x_train 增加一列内容为y_hat的值
网上的例子:首先我们有一个数据是一个mn的numpy矩阵现在我们希望能够进行给他加上一列变成一个m(n+1)的矩阵
import numpy as np
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = np.ones(3)
c = np.array([[1,2,3,1],[4,5,6,1],[7,8,9,1]])
PRint(a)
print(b)
print(c)
[[1 2 3]
[4 5 6]
[7 8 9]]
[ 1. 1. 1.]
[[1 2 3 1]
[4 5 6 1]
[7 8 9 1]]
我们要做的就是把a,b合起来变成c