# 机器学习备忘-维度辨析axis-矩阵和向量混合运算-.py文件中调用.mat文件-欠拟合过拟合解决方案

1.可以通过np.matrix()函数将一个变量转换为numpy型矩阵
2.python中，np库中，矩阵和向量相乘的话，会自动对向量进行转换，转换成列向量/一列的矩阵。
3.关于维度的一些辨析

# 找到每个样本中预测概率最大的值
h_argmax = np.argmax(h, axis=1)
#这里axis = 1 表示按行找出最大值对应的列索引


2.二维数组或者矩阵中，axis=1表示列。
3.a.reshape（-1）表示变形为一行。
4.x_categ.reshape((shape)+(n_out,)))是确保x_categ是一个行是shape，列是n_out的矩阵，通过reshape来实现。这里的(shape)+(n_out,)实现的是元组的拼接。其中，shape = （14， ），n_out = 10
5.标量和矢量相减，会自动扩展到矢量的尺寸再运算
second_term = np.multiply((1 - y[i,:]), np.log(1 - h[i,:]))
6.获取.mat文件里面的每一个属性的数据有2种方式：

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data
X = data['X']
y = data['y']

X.shape, y.shape#看下维度


import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

"""for ex5
d['X'] shape = (12, 1)
pandas has trouble taking this 2d ndarray to construct a dataframe, so I ravel
the results
"""
#     return d
return map(np.ravel, [d['X'], d['y'], d['Xval'], d['yval'], d['Xtest'], d['ytest']])

X, y, Xval, yval, Xtest, ytest = load_data()


7.矩阵和向量或者数组相乘的时候，会自动把数组或向量调整为矩阵运算所需要的维度：

import numpy as np
import scipy.io as sio
import scipy.optimize as opt
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
data
data['X']
data['X'].shape
type(data['X'])

"""for ex5
d['X'] shape = (12, 1)
pandas has trouble taking this 2d ndarray to construct a dataframe, so I ravel
the results
"""
#     return d
return map(np.ravel, [d['X'], d['y'], d['Xval'], d['yval'], d['Xtest'], d['ytest']])

X, y, Xval, yval, Xtest, ytest = load_data()
X, X.shape

X, Xval, Xtest = [np.insert(x.reshape(x.shape[0], 1), 0, np.ones(x.shape[0]), axis=1) for x in (X, Xval, Xtest)]

def cost(theta, X, y):
# INPUT：参数值theta，数据X,标签y
# OUTPUT：当前参数值下代价函数
# TODO：根据参数和输入的数据计算代价函数

# STEP1：获取样本个数
# your code here  (appro ~ 1 lines)
m = X.shape[0]

# STEP2：计算代价函数
# your code here  (appro ~ 3 lines)
inner = X @ theta.T - y	#**

## 这里直接用theta也是对的

**
square_sum = inner.T @ inner	#@是正常的矩阵乘法
cost = square_sum / (2 * m)

return cost

theta = np.ones(X.shape[1])
cost(theta, X, y)


8.实际工作中，如果欠拟合，又没有那么多数据的话，会考虑用特征多项式进行维度拓展。这个时候如果过拟合的话，就要加正则项进行控制。一般在训练集上，用一组lamda进行正则化训练，同时在验证集上选择代价最小的那一个lamda作为最终的选择。

def prepare_poly_data(*args, power):
"""
args: keep feeding in X, Xval, or Xtest
will return in the same order
"""
def prepare(x):
# 特征映射
df = poly_features(x, power=power)

# 归一化处理
ndarr = normalize_feature(df).as_matrix()#要先转化成矩阵，才能利用下面的方式在第一列添加一列

# 添加偏置项
return np.insert(ndarr, 0, np.ones(ndarr.shape[0]), axis=1)

return [prepare(x) for x in args]

def poly_features(x, power, as_ndarray=False):  #特征映射
data = {'f{}'.format(i): np.power(x, i) for i in range(1, power + 1)}
df = pd.DataFrame(data)

return df.as_matrix() if as_ndarray else df

def normalize_feature(df):
"""Applies function along input axis(default 0) of DataFrame."""
return df.apply(lambda column: (column - column.mean()) / column.std())


©️2019 CSDN 皮肤主题: 游动-白 设计师: 上身试试