Pytorch实现高斯混合模型GMM
在数据量很大时,利用GPU进行GMM模型加速
import torch
import numpy as np
from sklearn. covariance import LedoitWolf
def euclidean_metric_np ( X, centroids) :
X = np. expand_dims( X, 1 )
centroids = np. expand_dims( centroids, 0 )
dists = ( X - centroids) ** 2
dists = np. sum ( dists, axis= 2 )
return dists
def euclidean_metric_gpu ( X, centers) :
X = X. unsqueeze( 1 )
centers = centers. unsqueeze( 0 )
dist = torch. sum ( ( X - centers) ** 2 , dim= - 1 )
return dist
def kmeans_fun_gpu ( X, K= 10 , max_iter= 1000 , batch_size= 8096 , tol= 1e - 40 ) :
N = X. shape[ 0 ]
indices = torch. randperm( N) [ : K]
init_centers = X[ indices]
batchs = N // batch_size
last = 1 if N % batch_size != 0 else 0
choice_cluster = torch. zeros( [ N] ) . cuda( )
for _ in range ( max_iter) :
for bn in range ( batchs + last) :
if bn == batchs and last == 1 :
_end = - 1
else :
_end = ( bn + 1 ) * batch_size
X_batch = X[ bn * batch_size: _end]
dis_batch = euclidean_metric_gpu( X_batch, init_centers)
choice_cluster[ bn * batch_size: _end] = torch. argmin( dis_batch, dim= 1 )
init_centers_pre = init_centers. clone( )
for index in range ( K) :
selected = torch. nonzero( choice_cluster == index) . squeeze( ) . cuda( )
selected = torch. index_select( X, 0 , selected)
init_centers[ index] = selected. mean( dim= 0 )
center_shift = torch. sum (
torch. sqrt(
torch. sum ( ( init_centers - init_centers_pre) ** 2 , dim= 1 )
) )
if center_shift < tol:
break
k_mean = init_centers. detach( ) . cpu( ) . numpy( )
choice_cluster = choice_cluster. detach( ) . cpu( ) . numpy( )
return k_mean, choice_cluster
def _cal_var ( X, centers= None , choice_cluster= None , K= 10 ) :
D = X. shape[ 1 ]
k_var = np. zeros( [ K, D, D] )
eps = np. eye( D) * 1e - 10
if centers is not None :
_dist = euclidean_metric_np( X, centers)
choice_cluster = np. argmin( _dist, axis= 1 )
for k in range ( K) :
samples = X[ k == choice_cluster]
_m = np. mean( samples, axis= 0 )
k_var[ k] = LedoitWolf( ) . fit( samples) . covariance_ + eps
return k_var. astype( np. float32)
def mahalanobias_metric_gpu ( X, mean, var) :
torch. cuda. empty_cache( )
dis = torch. zeros( [ X. shape[ 0 ] , mean. shape[ 0 ] ] )
for k in range ( mean. shape[ 0 ] ) :
_m = mean[ k]