方案1: data_path = ['1.jpeg', '2.jpg', '3.jpg', '4.jpg', '5.jpg']
data_all = [] data_all = list(data_all) for i in range(len(data_path)): data = read_image(data_path[i]) H, W, C = np.shape(data) data_all.extend(list(np.resize(data, (H*W, C))))
方案2:
从结尾写入txt,之后再读入txt
with open('data.txt','ab') as f:
np.savetxt(f, data, delimiter=" ") # 注意这里的f如果误打成data.txt,则只保留最后保留的,前面保留的都被删除了
读取的时候:
with open('data.txt','r') as f:
data = np.loadtxt(f)
应用:
将数据集做PCA:
for...
# 数据读入
data = np.resize(data_for_pca, (H*W), C)
data = data[::10, :] # 采样,防止数据规模太大
with open('data.txt','ab') as f:
np.savetxt(f, data)
# 数据集pca计算
with open('data.txt','r') as f:
data = np.loadtxt(f)
X_norm, means, stds = featureNormalize(pca_data)
U,S,V=pca(X_norm)
Z=self.projectData(pca_data, U, 4)
def featureNormalize(X):
means = X.means(axis=0)
stds = X.std(axis=0, ddof=1)
X_norm=(X-means)/stds
return X_norm, means, stds
def projectData(X,U,K):
Z = X@U[:, :K]
return Z
def pca(X):
sigma = (X.T@X)/len(X)
U,S,V = np.linalg.svd(sigma)
return U, S, V