Example 1
def KMO(data):
cor_ = pd.DataFrame.corr(data)
invCor = np.linalg.inv(cor_)
rows = cor_.shape[0]
cols = cor_.shape[1]
A = np.ones((rows, cols))
for i in range(rows):
for j in range(i, cols):
A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))
A[j, i] = A[i, j]
num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))
den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))
kmo = num / den
return kmo
Example 2
def remove_artifacts(self, image):
"""
Remove the connected components that are not within the parameters
Operates in place
:param image: sudoku's thresholded image w/o grid
:return: None
"""
labeled, features = label(image, structure=CROSS)
lbls = np.arange(1, features + 1)
areas = extract_feature(image, labeled, lbls, np.sum,
np.uint32, 0)
sides = extract_feature(image, labeled, lbls, min_side,
np.float32, 0, True)
diags = extract_feature(image, labeled, lbls, diagonal,
np.float32, 0, True)
for index in lbls:
area = areas[index - 1] / 255
side = sides[index - 1]
diag = diags[index - 1]
if side < 5 or side > 20 \
or diag < 15 or diag > 25 \
or area < 40:
image[labeled == index] = 0
return None
Example 3
def remove_artifacts(self, image):
"""
Remove the connected components that are not within the parameters
Operates in place
:param image: sudoku's thresholded image w/o grid
:return: None
"""
labeled, features = label(image, structure=CROSS)
lbls = np.arange(1, features + 1)
areas = extract_feature(image, labeled, lbls, np.sum,
np.uint32, 0)
sides = extract_feature(image, labeled, lbls, min_side,
np.float32, 0, True)
diags = extract_feature(image, labeled, lbls, diagonal,
np.float32, 0, True)
for index in lbls:
area = areas[index - 1] / 255
side = sides[index - 1]
diag = diags[index - 1]
if side < 5 or side > 20 \
or diag < 15 or diag > 25 \
or area < 40:
image[labeled == index] = 0
return None
Example 4
def evaluate(self, dataset):
predictions = self.predict(dataset[:,0])
confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)
precisions = []
recalls = []
accuracies = []
for gender in self.__classes:
idx = self.__classes_indexes[gender]
precision = 1
recall = 1
if np.sum(confusion_matrix[idx,:]) > 0:
precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])
if np.sum(confusion_matrix[:, idx]) > 0:
recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])
precisions.append(precision)
recalls.append(recall)
precision = np.mean(precisions)
recall = np.mean(recalls)
f1 = (2*(precision*recall))/float(precision+recall)
accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))
return precision, recall, accuracy, f1
Example 5
def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
output = pd.DataFrame(population[item].position)
output.columns = ['Split']
dataSplit = pd.concat([data, output], axis=1)
f1 = []
results = []
for i in range(nclusters):
dataSplited = (dataSplit.loc[dataSplit['Split']
== i]).drop('Split', axis=1)
dataSplited.index = range(len(dataSplited))
try:
results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
reg, 0, 50, HOC='true'))
resid = results[i].residuals()[3]
f1.append(resid)
except:
f1.append(10000)
# print((1 / np.sum(f1)))
return (1 / np.sum(f1))
Example 6
def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):
output = pd.DataFrame(population[item].genes)
output.columns = ['Split']
dataSplit = pd.concat([data, output], axis=1)
f1 = []
results = []
for i in range(nclusters):
dataSplited = (dataSplit.loc[dataSplit['Split']
== i]).drop('Split', axis=1)
dataSplited.index = range(len(dataSplited))
try:
results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
reg, 0, 50, HOC='true'))
resid = results[i].residuals()[3]
f1.append(resid)
except:
f1.append(10000)
return (1 / np.sum(f1))
# Main
Example 7
def xloads(self):
# Xloadings
A = self.data_.transpose().values
B = self.fscores.transpose().values
A_mA = A - A.mean(1)[:, None]
B_mB = B - B.mean(1)[:, None]
ssA = (A_mA**2).sum(1)
ssB = (B_mB**2).sum(1)
xloads_ = (np.dot(A_mA, B_mB.T) /
np.sqrt(np.dot(ssA[:, None], ssB[None])))
xloads = pd.DataFrame(
xloads_, index=self.manifests, columns=self.latent)
return xloads
Example 8
def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):
output = pd.DataFrame(population[item].position)
output.columns = ['Split']
dataSplit = pd.concat([data, output], axis=1)
f1 = []
results = []
for i in range(nclusters):
dataSplited = (dataSplit.loc[dataSplit['Split']
== i]).drop('Split', axis=1)
dataSplited.index = range(len(dataSplited))
try:
results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,
reg, 0, 50, HOC='true'))
resid = results[i].residuals()[3]
f1.append(resid)
except:
f1.append(10000)
print((1 / np.sum(f1)))
return (1 / np.sum(f1))
Example 9
def do_work_pso(self, item):
output = pd.DataFrame(self.population[item].position)
output.columns = ['Split']
dataSplit = pd.concat([self.data, output], axis=1)
f1 = []
results = []
for i in range(self.nclusters):
dataSplited = (dataSplit.loc[dataSplit['Split']
== i]).drop('Split', axis=1)
dataSplited.index = range(len(dataSplited))
try:
results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
self.reg, 0, 50, HOC='true'))
resid = results[i].residuals()[3]
f1.append(resid)
except:
f1.append(10000)
print((1 / np.sum(f1)))
return (1 / np.sum(f1))
Example 10
def do_work_tabu(self, item):
output = pd.DataFrame(self.population[item])
output.columns = ['Split']
dataSplit = pd.concat([self.data, output], axis=1)
f1 = []
results = []
for i in range(self.nclusters):
dataSplited = (dataSplit.loc[dataSplit['Split']
== i]).drop('Split', axis=1)
dataSplited.index = range(len(dataSplited))
try:
results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,
self.reg, 0, 50, HOC='true'))
resid = results[i].residuals()[3]
f1.append(resid)
except:
f1.append(10000)
cost = (np.sum(f1))
print(1 / cost)
return [self.population[item], cost]
Example 11
def forward(self, x):
x = F.relu(F.max_pool2d(self.conv1(x), 2))
# A different (control flow based) way to control dropout
if self.training:
x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
else:
x = F.relu(F.max_pool2d(self.conv2(x), 2))
x = x.view(-1, 320)
x = F.relu(self.fc1(x))
if self.training:
x = F.dropout(x, training=True)
x = self.fc2(x)
# Check for NaNs and infinites
nans = np.sum(np.isnan(x.data.numpy()))
infs = np.sum(np.isinf(x.data.numpy()))
if nans > 0:
print("There is {} NaN at the output layer".format(nans))
if infs > 0:
print("There is {} infinite values at the output layer".format(infs))
return F.log_softmax(x)
Example 12
def test():
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
if args.cuda:
data, target = data.cuda(), target.cuda()
data, target = Variable(data, volatile=True), Variable(target)
output = model(data)
test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss
pred = output.data.max(1)[1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum()
test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
100. * correct / len(test_loader.dataset)))
Example 13
def score_samples(self, X):
"""Return the log-likelihood of each sample
See. "Pattern Recognition and Machine Learning"
by C. Bishop, 12.2.1 p. 574
or http://www.miketipping.com/papers/met-mppca.pdf
Parameters
----------
X: array, shape(n_samples, n_features)
The data.
Returns
-------
ll: array, shape (n_samples,)
Log-likelihood of each sample under the current model
"""
check_is_fitted(self, 'mean_')
X = check_array(X)
Xr = X - self.mean_
n_features = X.shape[1]
log_like = np.zeros(X.shape[0])
precision = self.get_precision()
log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)
log_like -= .5 * (n_features * log(2. * np.pi)
- fast_logdet(precision))
return log_like
Example 14
def main():
files = tf.gfile.Glob(flags.FLAGS.src_path_1)
labels_uni = np.zeros([4716,1])
labels_matrix = np.zeros([4716,4716])
for file in files:
labels_all = get_video_input_feature(file)
print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5]))
"""
for labels in labels_all:
for i in range(len(labels)):
labels_uni[labels[i]] += 1
for j in range(len(labels)):
labels_matrix[labels[i],labels[j]] += 1
labels_matrix = labels_matrix/labels_uni
labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0)
for i in range(4716):
labels_matrix[i,i] = 1.0
np.savetxt('labels_uni.out', labels_uni, delimiter=',')
np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')"""
Example 15
def calculate_gap(predictions, actuals, top_k=20):
"""Performs a local (numpy) calculation of the global average precision.
Only the top_k predictions are taken for each of the videos.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
top_k: How many predictions to use per video.
Returns:
float: The global average precision.
"""
gap_calculator = ap_calculator.AveragePrecisionCalculator()
sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
return gap_calculator.peek_ap_at_n()
Example 16
def calculate_gap(predictions, actuals, top_k=20):
"""Performs a local (numpy) calculation of the global average precision.
Only the top_k predictions are taken for each of the videos.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
top_k: How many predictions to use per video.
Returns:
float: The global average precision.
"""
gap_calculator = ap_calculator.AveragePrecisionCalculator()
sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
return gap_calculator.peek_ap_at_n()
Example 17
def format_lines(video_ids, predictions, labels, top_k):
batch_size = len(video_ids)
for video_index in range(batch_size):
n_recall = max(int(numpy.sum(labels[video_index])), 1)
# labels
label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]
label_predictions = [(class_index, predictions[video_index][class_index])
for class_index in label_indices]
label_predictions = sorted(label_predictions, key=lambda p: -p[1])
label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])
# predictions
top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]
top_k_predictions = [(class_index, predictions[video_index][class_index])
for class_index in top_k_indices]
top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])
top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])
# compute PERR
top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]
positives = [labels[video_index][class_index]
for class_index in top_n_indices]
perr = sum(positives) / float(n_recall)
# URL
url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')
yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"
Example 18
def calculate_gap(predictions, actuals, top_k=20):
"""Performs a local (numpy) calculation of the global average precision.
Only the top_k predictions are taken for each of the videos.
Args:
predictions: Matrix containing the outputs of the model.
Dimensions are 'batch' x 'num_classes'.
actuals: Matrix containing the ground truth labels.
Dimensions are 'batch' x 'num_classes'.
top_k: How many predictions to use per video.
Returns:
float: The global average precision.
"""
gap_calculator = ap_calculator.AveragePrecisionCalculator()
sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)
gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))
return gap_calculator.peek_ap_at_n()
Example 19
def getTrainKernel(self, params):
self.checkParams(params)
if (self.sameParams(params)): return self.cache['getTrainKernel']
ell = np.exp(params[0])
if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell)#precompute squared distances
else: K = self.K_sq / ell**2
self.cache['K_sq_scaled'] = K
# # # #manual computation (just for sanity checks)
# # # K1 = np.exp(-K / 2.0)
# # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0]))
# # # for i1 in xrange(self.X_scaled.shape[0]):
# # # for i2 in xrange(i1, self.X_scaled.shape[0]):
# # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:]
# # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell))
# # # K2[i2, i1] = K2[i1, i2]
# # # print np.max((K1-K2)**2)
# # # sys.exit(0)
K_exp = np.exp(-K / 2.0)
self.cache['getTrainKernel'] = K_exp
self.saveParams(params)
return K_exp
Example 20
def getTrainTestKernel(self, params, Xtest):
self.checkParams(params)
ell2 = np.exp(2*params[0])
z = Xtest / np.sqrt(Xtest.shape[1])
S = 1 + self.X_scaled.dot(z.T)
sz = 1 + np.sum(z**2, axis=1)
sqrtEll2Psx = np.sqrt(ell2+self.sx)
sqrtEll2Psz = np.sqrt(ell2+sz)
K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz)
return np.arcsin(K)
Example 21
def match_matrix(event: Event):
"""Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.
Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for
red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated
on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an
event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match
matrix that looks like this (sans labels):
#1 #2 #3 #4 #5 #6 #7
qm1_red 1 0 1 0 1 0 0
qm1_blue 0 1 0 1 0 1 0
"""
match_list = []
for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):
matchRow = []
for team in event.teams:
matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)
match_list.append(matchRow)
matchRow = []
for team in event.teams:
matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)
match_list.append(matchRow)
mat = numpy.array(match_list)
sum_matches = numpy.sum(mat, axis=0)
avg_team_matches = sum(sum_matches) / float(len(sum_matches))
return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]
Example 22
def compute_angle(pt0, pt1, pt2):
"""
Given 3 points, compute the cosine of the angle from pt0
:type pt0: numpy.array
:type pt1: numpy.array
:type pt2: numpy.array
:return: cosine of angle
"""
a = pt0 - pt1
b = pt0 - pt2
return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b))
Example 23
def _zoning(image):
"""
It works better with DSIZE = 28
~0.9967 precision and recall
:param image:
:return: #pixels/area ratio of each zone (7x7) as feature vector
"""
zones = []
for i in range(0, 28, 7):
for j in range(0, 28, 7):
roi = image[i:i+7, j:j+7]
val = (np.sum(roi)/255) / 49.
zones.append(val)
return np.array(zones, np.float32)
Example 24
def getTypeProblem (self, solution_filename):
''' Get the type of problem directly from the solution file (in case we do not have an info file)'''
if 'task' not in self.info.keys():
solution = np.array(data_converter.file_to_array(solution_filename))
target_num = solution.shape[1]
self.info['target_num']=target_num
if target_num == 1: # if we have only one column
solution = np.ravel(solution) # flatten
nbr_unique_values = len(np.unique(solution))
if nbr_unique_values < len(solution)/8:
# Classification
self.info['label_num'] = nbr_unique_values
if nbr_unique_values == 2:
self.info['task'] = 'binary.classification'
self.info['target_type'] = 'Binary'
else:
self.info['task'] = 'multiclass.classification'
self.info['target_type'] = 'Categorical'
else:
# Regression
self.info['label_num'] = 0
self.info['task'] = 'regression'
self.info['target_type'] = 'Numerical'
else:
# Multilabel or multiclass
self.info['label_num'] = target_num
self.info['target_type'] = 'Binary'
if any(item > 1 for item in map(np.sum,solution.astype(int))):
self.info['task'] = 'multilabel.classification'
else:
self.info['task'] = 'multiclass.classification'
return self.info['task']
Example 25
def binarize_predictions(array, task='binary.classification'):
''' Turn predictions into decisions {0,1} by selecting the class with largest
score for multiclass problems and thresholding at 0.5 for other cases.'''
# add a very small random value as tie breaker (a bit bad because this changes the score every time)
# so to make sure we get the same result every time, we seed it
#eps = 1e-15
#np.random.seed(sum(array.shape))
#array = array + eps*np.random.rand(array.shape[0],array.shape[1])
bin_array = np.zeros(array.shape)
if (task != 'multiclass.classification') or (array.shape[1]==1):
bin_array[array>=0.5] = 1
else:
sample_num=array.shape[0]
for i in range(sample_num):
j = np.argmax(array[i,:])
bin_array[i,j] = 1
return bin_array
Example 26
def acc_stat (solution, prediction):
''' Return accuracy statistics TN, FP, TP, FN
Assumes that solution and prediction are binary 0/1 vectors.'''
# This uses floats so the results are floats
TN = sum(np.multiply((1-solution), (1-prediction)))
FN = sum(np.multiply(solution, (1-prediction)))
TP = sum(np.multiply(solution, prediction))
FP = sum(np.multiply((1-solution), prediction))
#print "TN =",TN
#print "FP =",FP
#print "TP =",TP
#print "FN =",FN
return (TN, FP, TP, FN)
Example 27
def pac_metric (solution, prediction, task='binary.classification'):
''' Probabilistic Accuracy based on log_loss metric.
We assume the solution is in {0, 1} and prediction in [0, 1].
Otherwise, run normalize_array.'''
debug_flag=False
[sample_num, label_num] = solution.shape
if label_num==1: task='binary.classification'
eps = 1e-15
the_log_loss = log_loss(solution, prediction, task)
# Compute the base log loss (using the prior probabilities)
pos_num = 1.* sum(solution) # float conversion!
frac_pos = pos_num / sample_num # prior proba of positive class
the_base_log_loss = prior_log_loss(frac_pos, task)
# Alternative computation of the same thing (slower)
# Should always return the same thing except in the multi-label case
# For which the analytic solution makes more sense
if debug_flag:
base_prediction = np.empty(prediction.shape)
for k in range(sample_num): base_prediction[k,:] = frac_pos
base_log_loss = log_loss(solution, base_prediction, task)
diff = np.array(abs(the_base_log_loss-base_log_loss))
if len(diff.shape)>0: diff=max(diff)
if(diff)>1e-10:
print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))
# Exponentiate to turn into an accuracy-like score.
# In the multi-label case, we need to average AFTER taking the exp
# because it is an NL operation
pac = mvmean(np.exp(-the_log_loss))
base_pac = mvmean(np.exp(-the_base_log_loss))
# Normalize: 0 for random, 1 for perfect
score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))
return score
Example 28
def auc_metric(solution, prediction, task='binary.classification'):
''' Normarlized Area under ROC curve (AUC).
Return Gini index = 2*AUC-1 for binary classification problems.
Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values
for the predictions. If solution and prediction are not vectors, the AUC
of the columns of the matrices are computed and averaged (with no weight).
The same for all classification problems (in fact it treats well only the
binary and multilabel classification problems).'''
#auc = metrics.roc_auc_score(solution, prediction, average=None)
# There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect
label_num=solution.shape[1]
auc=np.empty(label_num)
for k in range(label_num):
r_ = tiedrank(prediction[:,k])
s_ = solution[:,k]
if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))
npos = sum(s_==1)
nneg = sum(s_<1)
auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)
return 2*mvmean(auc)-1
### END CLASSIFICATION METRICS
# ======= Specialized scores ========
# We run all of them for all tasks even though they don't make sense for some tasks
Example 29
def prior_log_loss(frac_pos, task = 'binary.classification'):
''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''
eps = 1e-15
frac_pos_ = sp.maximum (eps, frac_pos)
if (task != 'multiclass.classification'): # binary case
frac_neg = 1-frac_pos
frac_neg_ = sp.maximum (eps, frac_neg)
pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)
neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)
base_log_loss = pos_class_log_loss_ + neg_class_log_loss_
# base_log_loss = mvmean(base_log_loss)
# print('binary {}'.format(base_log_loss))
# In the multilabel case, the right thing i to AVERAGE not sum
# We return all the scores so we can normalize correctly later on
else: # multiclass case
fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case
# Only ONE label is 1 in the multiclass case active for each line
pos_class_log_loss_ = - frac_pos * np.log(fp)
base_log_loss = np.sum(pos_class_log_loss_)
return base_log_loss
# sklearn implementations for comparison
Example 30
def num_lines (filename):
''' Count the number of lines of file'''
return sum(1 for line in open(filename))
Example 31
def tp_filter(X, Y, feat_num=1000, verbose=True):
''' TP feature selection in the spirit of the winners of the KDD cup 2001
Only for binary classification and sparse matrices'''
if issparse(X) and len(Y.shape)==1 and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1:
if verbose: print("========= Filtering features...")
Posidx=Y>0
#npos = sum(Posidx)
#Negidx=Y<=0
#nneg = sum(Negidx)
nz=X.nonzero()
mx=X[nz].max()
if X[nz].min()==mx: # sparse binary
if mx!=1: X[nz]=1
tp=csr_matrix.sum(X[Posidx,:], axis=0)
#fn=npos-tp
#fp=csr_matrix.sum(X[Negidx,:], axis=0)
#tn=nneg-fp
else:
tp=np.sum(X[Posidx,:]>0, axis=0)
#tn=np.sum(X[Negidx,:]<=0, axis=0)
#fn=np.sum(X[Posidx,:]<=0, axis=0)
#fp=np.sum(X[Negidx,:]>0, axis=0)
tp=np.ravel(tp)
idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)
return idx[0:feat_num]
else:
feat_num = X.shape[1]
return range(feat_num)
Example 32
def predict(self, X):
prediction = self.predict_method(X)
# Calibrate proba
if self.task != 'regression' and self.postprocessor!=None:
prediction = self.postprocessor.predict_proba(prediction)
# Keep only 2nd column because the second one is 1-first
if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:
prediction = prediction[:,1]
# Make sure the normalization is correct
if self.task=='multiclass.classification':
eps = 1e-15
norma = np.sum(prediction, axis=1)
for k in range(prediction.shape[0]):
prediction[k,:] /= sp.maximum(norma[k], eps)
return prediction
Example 33
def fit(self, X, Y):
if len(Y.shape)==1:
Y = np.array([Y]).transpose() # Transform vector into column matrix
# This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range
self.n_target = Y.shape[1] # Num target values = num col of Y
self.n_label = len(set(Y.ravel())) # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )
# Create the right number of copies of the predictor instance
if len(self.predictors)!=self.n_target:
predictorInstance = self.predictors[0]
self.predictors = [predictorInstance]
for i in range(1,self.n_target):
self.predictors.append(copy.copy(predictorInstance))
# Fit all predictors
for i in range(self.n_target):
# Update the number of desired prodictos
if hasattr(self.predictors[i], 'n_estimators'):
self.predictors[i].n_estimators=self.n_estimators
# Subsample if desired
if self.balance:
pos = Y[:,i]>0
neg = Y[:,i]<=0
if sum(pos)
chosen = pos
not_chosen = neg
else:
chosen = neg
not_chosen = pos
num = sum(chosen)
idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))
idx=np.array(zip(*idx)[0])
np.random.shuffle(idx)
chosen[idx[0:min(num, len(idx))]]=True
# Train with chosen samples
self.predictors[i].fit(X[chosen,:],Y[chosen,i])
else:
self.predictors[i].fit(X,Y[:,i])
return
Example 34
def get_batch_loss(self, input_batch, output_batch):
dynet.renew_cg()
# Dimension: maxSentLength * minibatch_size
wids = []
wids_reversed = []
# List of lists to store whether an input is
# present(1)/absent(0) for an example at a time step
# masks = [] # Dimension: maxSentLength * minibatch_size
# tot_words = 0
maxSentLength = max([len(sent) for sent in input_batch])
for j in range(maxSentLength):
wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])
# mask = [(1 if len(sent)>j else 0) for sent in input_batch]
# masks.append(mask)
#tot_words += sum(mask)
embedded_batch = self.embed_batch_seq(wids)
embedded_batch_reverse = self.embed_batch_seq(wids_reversed)
encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse)
# pass last hidden state of encoder to decoder
return self.decode_batch(encoded_batch, output_batch)
Example 35
def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01):
# Receptive Fields Summary
try:
W = layer.W
except:
W = layer
wp = W.eval().transpose();
if len(np.shape(wp)) < 4:# Fully connected layer, has no shape
fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)
else:# Convolutional layer already has shape
features, channels, iy, ix = np.shape(wp)
if channel is not None:
fields = wp[:,channel,:,:]
else:
fields = np.reshape(wp,[features*channels,iy,ix])
perRow = int(math.floor(math.sqrt(fields.shape[0])))
perColumn = int(math.ceil(fields.shape[0]/float(perRow)))
fig = mpl.figure(figOffset); mpl.clf()
# Using image grid
from mpl_toolkits.axes_grid1 import ImageGrid
grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
for i in range(0,np.shape(fields)[0]):
im = grid[i].imshow(fields[i],cmap=cmap);
grid.cbar_axes[0].colorbar(im)
mpl.title('%s Receptive Fields' % layer.name)
# old way
# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
# tiled = []
# for i in range(0,perColumn*perRow,perColumn):
# tiled.append(np.hstack(fields2[i:i+perColumn]))
#
# tiled = np.vstack(tiled)
# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()
Example 36
def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01):
# Receptive Fields Summary
W = layer.W
wp = W.eval().transpose();
if len(np.shape(wp)) < 4:# Fully connected layer, has no shape
fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)
else:# Convolutional layer already has shape
features, channels, iy, ix = np.shape(wp)
if channel is not None:
fields = wp[:,channel,:,:]
else:
fields = np.reshape(wp,[features*channels,iy,ix])
fieldsN = min(fields.shape[0],maxFields)
perRow = int(math.floor(math.sqrt(fieldsN)))
perColumn = int(math.ceil(fieldsN/float(perRow)))
fig = mpl.figure(figName); mpl.clf()
# Using image grid
from mpl_toolkits.axes_grid1 import ImageGrid
grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')
for i in range(0,fieldsN):
im = grid[i].imshow(fields[i],cmap=cmap);
grid.cbar_axes[0].colorbar(im)
mpl.title('%s Receptive Fields' % layer.name)
# old way
# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])
# tiled = []
# for i in range(0,perColumn*perRow,perColumn):
# tiled.append(np.hstack(fields2[i:i+perColumn]))
#
# tiled = np.vstack(tiled)
# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();
mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()
Example 37
def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):
"""
The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors
and the two convariance matrixes
--- INPUT ---
mu1 The mean of the first gaussian
covar1 The covariance matrix of of the first gaussian
mu2 The mean of the second gaussian
covar2 The covariance matrix of of the second gaussian
"""
muconv = mu1+mu2
covarconv = covar1+covar2
return muconv, covarconv
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
Example 38
def reshape_array(array, newsize, pixcombine='sum'):
"""
Reshape an array to a give size using either the sum, mean or median of the pixels binned
Note that the old array dimensions have to be multiples of the new array dimensions
--- INPUT ---
array Array to reshape (combine pixels)
newsize New size of array
pixcombine The method to combine the pixels with. Choices are sum, mean and median
"""
sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]
pdb.set_trace()
if pixcombine == 'sum':
reshapedarray = array.reshape(sh).sum(-1).sum(1)
elif pixcombine == 'mean':
reshapedarray = array.reshape(sh).mean(-1).mean(1)
elif pixcombine == 'median':
reshapedarray = array.reshape(sh).median(-1).median(1)
return reshapedarray
# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
Example 39
def inner_product_to_infty(self,gf1,gf2):
"Inner product on non-compact domain"
factors = [s.get_scale_factor() for s in self.stencils]
factor = np.prod(factors)
integrand = (factor*gf1*self.weights2D*gf2*self.dRdX)
integrand[-1] = 0
integral = np.sum(integrand)
return integral
Example 40
def get_integration_weights(order,nodes=None):
"""
Returns the integration weights for Gauss-Lobatto quadrature
as a function of the order of the polynomial we want to
represent.
See: https://en.wikipedia.org/wiki/Gaussian_quadrature
See: arXive:gr-qc/0609020v1
"""
if np.all(nodes == False):
nodes=get_quadrature_points(order)
if poly == polynomial.chebyshev.Chebyshev:
weights = np.empty((order+1))
weights[1:-1] = np.pi/order
weights[0] = np.pi/(2*order)
weights[-1] = weights[0]
return weights
elif poly == polynomial.legendre.Legendre:
interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2)
boundary_weights = np.array([1-0.5*np.sum(interior_weights)])
weights = np.concatenate((boundary_weights,
interior_weights,
boundary_weights))
return weights
else:
raise ValueError("Not a known polynomial type.")
return False
Example 41
def inner_product(self,gf1,gf2):
"""Calculates the 2D inner product between grid functions
gf1 and gf2 using the appropriate quadrature rule
"""
factors = [s.get_scale_factor() for s in self.stencils]
factor = np.prod(factors)
integrand = gf1*self.weights2D*gf2
integral_unit_cell = np.sum(integrand)
integral_physical = integral_unit_cell*factor
return integral_physical
Example 42
def compute_rhs(rhs):
U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)]
curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)]
for i in range(3):
U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias)
curl_dealiased = curl(U_hat, curl_dealiased)
rhs = cross(U_dealiased, curl_dealiased, rhs)
P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat)
rhs -= P_hat*K
rhs -= nu*K2*U_hat
return rhs
# Initialize a Taylor Green vortex
Example 43
def gof(self):
r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)
AVEmean = self.AVE().copy()
totalblock = 0
for i in range(self.lenlatent):
block = self.data_[self.Variables['measurement']
[self.Variables['latent'] == self.latent[i]]]
block = len(block.columns.values)
totalblock += block
AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block
AVEmean = np.sum(AVEmean) / totalblock
return np.sqrt(AVEmean * r2mean)
Example 44
def cr(self):
# Composite Reliability
composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)
for i in range(self.lenlatent):
block = self.data_[self.Variables['measurement']
[self.Variables['latent'] == self.latent[i]]]
p = len(block.columns)
if(p != 1):
cor_mat = np.cov(block.T)
evals, evecs = np.linalg.eig(cor_mat)
U, S, V = np.linalg.svd(cor_mat, full_matrices=False)
indices = np.argsort(evals)
indices = indices[::-1]
evecs = evecs[:, indices]
evals = evals[indices]
loadings = V[0, :] * np.sqrt(evals[0])
numerador = np.sum(abs(loadings))**2
denominador = numerador + (p - np.sum(loadings ** 2))
cr = numerador / denominador
composite[self.latent[i]] = cr
else:
composite[self.latent[i]] = 1
composite = composite.T
return(composite)
Example 45
def r2adjusted(self):
n = len(self.data_)
r2 = self.r2.values
r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent)
for i in range(self.lenlatent):
p = sum(self.LVariables['target'] == self.latent[i])
r2adjusted[self.latent[i]] = r2[i] - \
(p * (1 - r2[i])) / (n - p - 1)
return r2adjusted.T
Example 46
def AVE(self):
# AVE
return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
Example 47
def fornell(self):
cor_ = pd.DataFrame.corr(self.fscores)**2
AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())
for i in range(len(cor_)):
cor_.ix[i, i] = AVE[i]
return(cor_)
Example 48
def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression):
output = pd.DataFrame(self.genes)
output.columns = ['Split']
dataSplit = pd.concat([data_, output], axis=1)
f1 = []
results = []
for i in range(n_clusters):
dataSplited = (dataSplit.loc[dataSplit['Split']
== i]).drop('Split', axis=1)
dataSplited.index = range(len(dataSplited))
try:
results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme,
regression, 0, 50, HOC='true'))
sumOuterResid = pd.DataFrame.sum(
pd.DataFrame.sum(results[i].residuals()[1]**2))
sumInnerResid = pd.DataFrame.sum(
pd.DataFrame.sum(results[i].residuals()[2]**2))
f1.append(sumOuterResid + sumInnerResid)
except:
f1.append(10000)
print((1 / np.sum(f1)))
return (1 / np.sum(f1))
Example 49
def roulettewheel(pop, fit):
fit = fit - min(fit)
sumf = sum(fit)
if(sumf == 0):
return pop[0]
prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)]
prob_ = uniform(0, 1)
# print(prob)
individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1)))
return pop[individuo]
Example 50
def xavier_initializer(shape):
dim_sum = np.sum(shape)
if len(shape) == 1:
dim_sum += 1
bound = np.sqrt(2.0 / dim_sum)
return tf.random_uniform(shape, minval=-bound, maxval=bound)
# Assigning network variables to target network variables