from numpy import array, append, vstack, transpose, reshape, \ dot, true_divide, mean, exp, sqrt, log, \ loadtxt, savetxt, zeros, frombuffer from numpy.linalg import norm, lstsq from multiprocessing import Process, Array from random import sample from time import time from sys import stdout from ctypes import c_double from h5py import File
def _calculate_sigmas(self): neurons = self.neurons mu = self.mu
sigmas = zeros((neurons, )) for i in xrange(neurons): dists = [0 for _ in xrange(neurons)] for j in xrange(neurons): if i != j: dists[j] = metrics(mu[i], mu[j]) sigmas[i] = mean(dists)* 2 # max(dists) / sqrt(neurons * 2)) return sigmas
def _calculate_phi(self, x): C = self.workers neurons = self.neurons mu = self.mu sigmas = self.sigmas phi = self.phi = None n = self.n
def heavy_lifting(c, phi): s = jobs[c][1] - jobs[c][0] for k, i in enumerate(xrange(jobs[c][0], jobs[c][1])): for j in xrange(neurons): # phi[i, j] = metrics(x[i,:], mu[j])**3) # phi[i, j] = plateSpine(x[i,:], mu[j])) # phi[i, j] = invMultiQuadric(x[i,:], mu[j], sigmas[j])) phi[i, j] = multiQuadric(x[i,:], mu[j], sigmas[j]) # phi[i, j] = gaussian(x[i,:], mu[j], sigmas[j])) if k % 1000 == 0: percent = true_divide(k, s)*100 print(c, ': {:2.2f}%'.format(percent)) print(c, ': Done')
# distributing the work between 4 workers shared_array = Array(c_double, n * neurons) phi = frombuffer(shared_array.get_obj()) phi = phi.reshape((n, neurons))
jobs = [] workers = []
p = n / C m = n % C for c in range(C): jobs.append((c*p, (c+1)*p + (m if c == C-1 else 0))) worker = Process(target = heavy_lifting, args = (c, phi)) workers.append(worker) worker.start()
for worker in workers: worker.join()
return phi
def _do_algebra(self, y): phi = self.phi
w = lstsq(phi, y)[0] os = dot(w, transpose(phi)) return w, os # Saving to HDF5 os_h5 = os_handle.create_dataset('os', data = os)
## Mu generation mu = self.mu = self._generate_mu(x) self.neurons = mu.shape[0] print('({} neurons)'.format(self.neurons)) # Save to HDF5 mu_h5 = mu_handle.create_dataset('mu', data = mu)
## Sigma calculation print('Calculating Sigma...') sigmas = self.sigmas = self._calculate_sigmas() # Save to HDF5 sigmas_h5 = sigma_handle.create_dataset('sigmas', data = sigmas) print('Done')
## Phi calculation print('Calculating Phi...') phi = self.phi = self._calculate_phi(x) print('Done') # Saving to HDF5 print('Serializing...') phi_h5 = phi_handle.create_dataset('phi', data = phi) del phi self.phi = phi_h5 print('Done')
## Algebra print('Doing final algebra...') w, os = self.w, _ = self._do_algebra(y) # Saving to HDF5 w_h5 = w_handle.create_dataset('w', data = w) os_h5 = os_handle.create_dataset('os', data = os)
def predict(self, test_data): mu = self.mu = self.mu.value sigmas = self.sigmas = self.sigmas.value w = self.w = self.w.value
print('Calculating phi for test data...') phi = self._calculate_phi(test_data) os = dot(w, transpose(phi)) savetxt('iok3834.txt', os, delimiter='\n') return os