吴恩达机器学习python实现7 K-means及PCA

K-means

1、找到所属簇

def find_close_centroids(X, centroids):
    res = np.zeros((1,))
    for x in X:
        res = np.append(res, np.argmin(np.sqrt(np.sum((centroids-x)**2, axis=1))))
    return res[1:]

2、计算新的簇中心

def compute_centroids(X, idx):
    K = int(np.max(idx))+1
    m = X.shape[0]
    n = X.shape[-1]
    centroids = np.zeros((K, n))
    counts = np.zeros((K, n))
    for i in range(m):
        centroids[int(idx[i])] += X[i]
        counts[int(idx[i])] += 1
    centroids = centroids / counts
    return centroids

3、测试一下

data = sio.loadmat(文件路径)
    X = data["X"]  # (300,2)
    init_centroids = np.array([[3, 3], [6, 2], [8, 5]])
    idx = find_close_centroids(X, init_centroids)  # (300,)
    print(compute_centroids(X, idx))

4、随机选取K组数据为簇中心

def random_initialization(X, K):
    res = np.zeros((1, X.shape[-1]))
    m = X.shape[0]
    rl = []
    while True:
        index = random.randint(0, m)
        if index not in rl:
            rl.append(index)
        if len(rl) >= K:
            break
    for index in rl:
        res = np.concatenate((res, X[index].reshape(1, -1)), axis=0)
    return res[1:]

5、计算代价函数

def cost(X, idx, centroids):
    c = 0
    for i in range(len(X)):
        c += np.sum((X[i] - centroids[int(idx[i])]) ** 2)
    c /= len(X)
    return c

6、k-means聚类算法

def k_means(X, K):
    centroids = random_initialization(X, K)
    centroids_all = [centroids]
    idx = np.zeros((1,))
    last_c = -1
    now_c = -2
    # 收敛时结束算法
    while now_c != last_c:
        idx = find_close_centroids(X, centroids)
        last_c = now_c
        now_c = cost(X, idx, centroids)
        centroids = compute_centroids(X, idx)
        centroids_all.append(centroids)
    return idx, centroids_all

7、可视化聚类结果、簇中心移动过程

def visualizing(X, idx, centroids_all):
    plt.scatter(X[..., 0], X[..., 1], c=idx)
    xx = []
    yy = []
    for c in centroids_all:
        xx.append(c[..., 0])
        yy.append(c[..., 1])
    plt.plot(xx, yy, 'rx--')
    plt.show()

8、尝试一下

    idx, centroids_all = k_means(X, 3)
    visualizing(X, idx, centroids_all)

9、压缩图片

def compress(image, colors_num):
    d1, d2, _ = image.shape
    raw_image = image.reshape(d1*d2, -1)
    idx, centroids_all = k_means(raw_image, colors_num)
    colors = centroids_all[-1]
    compress_image = np.zeros((1, 1))
    for i in range(d1*d2):
        compress_image = np.concatenate((compress_image, idx[i].reshape(1, -1)), axis=0)
    compressed_image = compress_image[1:].reshape(d1, d2, -1)
    return compressed_image, colors

 10、将压缩后图像转换为可以正常显示的图片格式

def compressed_format_to_normal_format(compressed_image, colors):
    d1, d2, _ = compressed_image.shape
    normal_format_image = np.zeros((1, len(colors[0])))
    compressed_image = compressed_image.reshape(d1*d2, -1)
    for i in range(d1*d2):
        normal_format_image = np.concatenate((normal_format_image, colors[int(compressed_image[i][0])].reshape(1, -1)), axis=0)
    normal_format_image = normal_format_image[1:].reshape(d1, d2, -1)
    return normal_format_image

11、测试一下

    image = plt.imread(文件路径)
    print(image.shape)  # (128,128,3)
    plt.subplot(1, 2, 1)
    plt.imshow(image)
    plt.axis("off")
    plt.title("raw image")
    plt.subplot(1, 2, 2)
    compressed_image, colors = compress(image, 16)
    print(compressed_image.shape, colors.shape)  # (128, 128, 1)  (16, 3)
    plt.imshow(compressed_format_to_normal_format(compressed_image, colors))
    plt.axis("off")
    plt.title("compressed image")
    plt.show()

降维

1、数据归一化

def data_process(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0, ddof=1)
    return (X-mean)/std, mean, std

 2、PCA

def pca(X):
    sigma = X.T.dot(X)/len(X)  # (n,m)*(m,n) (n,n)
    u, s, v = np.linalg.svd(sigma)  # u(n,n) s(n, ) v(n,n)
    return u, s, v

3、降维 X(m,n) U(n,n) X原始数据 U奇异值分解后的U K目标维度

def project_data(X, U, K):
    return X.dot(U[..., :K])

4、数据升维 Z降维后数据 U奇异值分解后 K降维的维度

def reconstruct_data(Z, U, K):
    return Z.dot(U[..., :K].T)

5、测试

data = sio.loadmat(文件路径)
    X = data["X"]  # (50,2)
    normalized_X, _, _ = data_process(X)
    u, _, _ = pca(normalized_X)  # (2,2)
    Z = project_data(normalized_X, u, 1)
    print(Z.shape)  # (50,1)
    print(Z[0])  # [1.48127391]
    rec_X = reconstruct_data(Z, u, 1)
    print(rec_X.shape)  # (50,2)
    print(rec_X[0])  # [-1.04741883 -1.04741883]
    plt.scatter(normalized_X[..., 0], normalized_X[..., 1], marker="x", c="b", label="normalize x")
    plt.scatter(rec_X[..., 0], rec_X[..., 1], marker="x", c="r", label="reconstructed x")
    plt.title("Visualizing the projections")
    for i in range(len(normalized_X)):
        plt.plot([normalized_X[i][0], rec_X[i][0]], [normalized_X[i][1], rec_X[i][1]], 'k--')
    plt.legend()
    plt.show()

 6、脸部图像处理

# 可视化图片  d 一行展示的图片张数
def visualizing_images(X, d):
    m = X.shape[0]
    n = X.shape[-1]
    s = int(np.sqrt(n))
    for i in range(1, m+1):
        plt.subplot(int(m/d), d, i)
        plt.axis("off")
        plt.imshow(X[i-1].reshape(s, s).T, cmap="Greys_r")
    plt.show()

原始图

 data = sio.loadmat(学习路径)
    X = data["X"]  # (5000,1024)
    visualizing_images(X[:25], 5)

处理后的图

nor_X, _, _ = data_process(X)
    u, _, _ = pca(nor_X)
    Z = project_data(nor_X, u, 36)
    rec_X = reconstruct_data(Z, u, 36)
    visualizing_images(rec_X[:25], 5)

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值