Coursera NG 机器学习 第七周 KMeans PCA 图像压缩 Python实现

KMeans

ex7.py 

import numpy as np
import matplotlib.pyplot as plt
import time
from scipy.io import loadmat
from sklearn.cluster import KMeans
from ex7modules import *

#Part 1:Check MyKMeans
X=loadmat('ex7data2.mat')['X']
K=3
max_iters=10
init_centroids=np.array([[3,3],[6,2],[8,5]])
centroids,idx=MyKMeans(X,init_centroids,max_iters,True)

#Part 2:Image Compression
fig=loadmat('bird_small')['A']/255   #Normalize
fig_size=fig.shape[0]
plt.imshow(fig)
plt.show()

fig=fig.reshape(3,-1).T  #convert(128,128,3) to (3,128*128) ,to fit the KMeans function

K=16  #reduce nmuber of colors to 16
max_iters=10

time_start=time.time()
init_centroids=InitCentroids(fig,K)

fig_centroids,_=MyKMeans(fig,init_centroids,max_iters,False) # find the most used K colors

fig_idx=findClosestCentroids(fig,fig_centroids)  # find every pixel's closest color
time_end=time.time()

print("Using my Kmeans costs time: ",time_end-time_start)

fig_recovered=np.zeros((fig_size*fig_size,3))  #assign every pixel to the closest color
for i in range(fig_size*fig_size):
    fig_recovered[i,:]=fig_centroids[fig_idx[i]-1,:]

fig_recovered=fig_recovered.T.reshape((fig_size,fig_size,3))  #need to Transpose first,otherwise
                                                              #there is a mistake in image show
plt.imshow(fig_recovered)
plt.show()

#Part 3:Using SKlearn
time_start=time.time()
clf=KMeans(n_clusters=16,init='random',max_iter=50)
clf.fit(fig)
time_end=time.time()

print("Using SKlearn Kmeans costs time: ",time_end-time_start)

cluster_centers=clf.cluster_centers_
labels=clf.labels_

fig_recovered=np.zeros((fig_size*fig_size,3))
for i in range(fig_size*fig_size):
    fig_recovered[i,:]=cluster_centers[labels[i],:]

fig_recovered=fig_recovered.T.reshape((fig_size,fig_size,3))

plt.imshow(fig_recovered)
plt.show()

 

从左至右,依次是原图,自己的KMeans和SKlearn的KMeans。速度上,还是差很多滴~~~

怎么感觉图被压胡了。。w(゜Д゜)w。。。没有课件里的效果好。。

                                                                                        课件效果

                                                                             


PCA

ex7pca.py

from scipy.io import loadmat
from ex7modules import *
#part 1
X=loadmat('ex7data1.mat')['X']

X_norm,mu,sigma=featureNormalize(X)
U,S=PCA(X_norm)
visualizeEigVector(mu,U,S,X)

print('Top eigenvector: ')
print(U[0,0],U[0,1])
print()

K=1
Z=projectData(X_norm,U,K)
X_rec=recoverData(Z,U,K)
visualizePCA(X_norm,X_rec)

#Part 2
X=loadmat('ex7faces.mat')['X']
displayData(X,10)

X_norm,_,_=featureNormalize(X)
U,S=PCA(X_norm)
displayData(U.T,6)

K=100
Z=projectData(X_norm,U,K)
print('The projected data Z has a size of: ',Z.shape)
X_rec=recoverData(Z,U,K)
displayData(X_rec,10)

Cpcompare(X,X_rec,10)

                                           

前100个图像

                                   

提取的前36个特征(鬼出没w(゜Д゜)w)

                                  

压缩后还原的图像

                                 

压缩前跟压缩后的对比

                             

 


ex7modules.py 

import numpy as np
import matplotlib.pyplot as plt

def findClosestCentroids(X,centroids):
    K=centroids.shape[0]
    dist=np.zeros((X.shape[0],K))
    for i in range(X.shape[0]):
        for j in range(K):
            dist[i, j]=np.linalg.norm(X[i,:]-centroids[j,:])
    idx=np.argmin(dist,axis=1)
    return idx+1

def computeCentroids(X,idx,K):
    centroids=np.zeros((K,X.shape[1]))
    for i in range(1,K+1):
        X_idx = np.where(idx == i)
        X_re = X[X_idx, :].reshape(X[X_idx, :].shape[1], X[X_idx, :].shape[2])
        centroids[i - 1, :] = np.sum(X_re, axis=0, keepdims=True) / len(X_idx[0])
    return centroids

def MyKMeans(X,initial_centroids,max_iters,plot):
    K=initial_centroids.shape[0]
    for i in range(max_iters):
        idx=findClosestCentroids(X,initial_centroids)
        initial_centroids=computeCentroids(X,idx,K)
        if plot==True:
            idx1 = np.where(idx == 1)[0]
            idx2 = np.where(idx == 2)[0]
            idx3 = np.where(idx == 3)[0]
            plt.scatter(X[idx1, 0], X[idx1, 1], c='w',edgecolors='r',s=10)
            plt.scatter(X[idx2, 0], X[idx2, 1], c='w',edgecolors='b',s=10)
            plt.scatter(X[idx3, 0], X[idx3, 1], c='w',edgecolors='k',s=10)
            plt.scatter(initial_centroids[0, 0], initial_centroids[0, 1], marker='x', c='r')
            plt.scatter(initial_centroids[1, 0], initial_centroids[1, 1], marker='x', c='b')
            plt.scatter(initial_centroids[2, 0], initial_centroids[2, 1], marker='x', c='k')
            plt.show()
    return initial_centroids,idx

def InitCentroids(X,K):
    randidx=np.random.permutation(X.shape[0])
    centroids=X[randidx[0:K],:]
    return centroids

def featureNormalize(X):
    mu=np.mean(X,axis=0)
    sigma=np.std(X,axis=0,ddof=1)
    X=(X-mu)/sigma
    return X,mu,sigma

def PCA(X):
    U,S,_=np.linalg.svd(X.T.dot(X)/X.shape[0])
    return U,S

def projectData(X,U,K):
    return X.dot(U[:,:K])

def recoverData(Z,U,K):
    return Z.dot(U[:,:K].T)

def displayData(X,num):
    fig, ax = plt.subplots(num, num)
    for i in range(num):
        for j in range(num):
            ax[i, j].imshow(X[i * num + j, :].reshape((32, 32)).T, cmap='gray')
            ax[i, j].set_xticks([])
            ax[i, j].set_yticks([])
    fig.tight_layout()
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()

def Cpcompare(X,X_rec,num):
    fig, ax = plt.subplots(num, num*2)
    for i in range(num):
        for j in range(0,num*2,2):
            ax[i, j].imshow(X[i * num + j, :].reshape((32, 32)).T, cmap='gray')
            ax[i, j].set_xticks([])
            ax[i, j].set_yticks([])
            ax[i, j+1].imshow(X_rec[i * num + j, :].reshape((32, 32)).T, cmap='gray')
            ax[i, j+1].set_xticks([])
            ax[i, j+1].set_yticks([])
    fig.tight_layout()
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()


def visualizeEigVector(mu,U,S,X):
    mu = mu.reshape((1, 2))
    point1 = mu + 1.5 * S[0] * (U[:, 0].reshape((1, 2)))
    point2 = mu + 1.5 * S[1] * (U[:, 1].reshape((1, 2)))
    x1 = mu[:, 0]
    y1 = mu[:, 1]
    x2 = point1[:, 0]
    y2 = point1[:, 1]
    x3 = point2[:, 0]
    y3 = point2[:, 1]
    ax1 = np.array([x1, x2])
    ax2 = np.array([x1, x3])
    ay1 = np.array([y1, y2])
    ay2 = np.array([y1, y3])
    plt.scatter(X[:, 0], X[:, 1], marker='o', c='w', edgecolors='b')
    plt.plot(ax1, ay1, c='k')
    plt.plot(ax2, ay2, c='k')
    plt.show()

def visualizePCA(X_norm,X_rec):
    plt.xlim((-3, 3))
    plt.ylim((-3, 3))
    plt.scatter(X_norm[:, 0], X_norm[:, 1], marker='o', c='w', edgecolors='b')
    plt.scatter(X_rec[:, 0], X_rec[:, 1], marker='o', c='w', edgecolors='r')
    for i in range(X_norm.shape[0]):
        x = np.array([X_norm[i, 0], X_rec[i, 0]])
        y = np.array([X_norm[i, 1], X_rec[i, 1]])
        plt.plot(x, y, 'k--')
    plt.show()

 

  • 0
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值