pythonspiit函数_Python numpy.sum() 使用实例

最新推荐文章于 2024-02-16 13:03:31 发布

weixin_39996478

最新推荐文章于 2024-02-16 13:03:31 发布

阅读量787

点赞数

文章标签： pythonspiit函数

Example 1

def KMO(data):

cor_ = pd.DataFrame.corr(data)

invCor = np.linalg.inv(cor_)

rows = cor_.shape[0]

cols = cor_.shape[1]

A = np.ones((rows, cols))

for i in range(rows):

for j in range(i, cols):

A[i, j] = - (invCor[i, j]) / (np.sqrt(invCor[i, i] * invCor[j, j]))

A[j, i] = A[i, j]

num = np.sum(np.sum((cor_)**2)) - np.sum(np.sum(np.diag(cor_**2)))

den = num + (np.sum(np.sum(A**2)) - np.sum(np.sum(np.diag(A**2))))

kmo = num / den

return kmo

Example 2

def remove_artifacts(self, image):

"""

Remove the connected components that are not within the parameters

Operates in place

:param image: sudoku's thresholded image w/o grid

:return: None

"""

labeled, features = label(image, structure=CROSS)

lbls = np.arange(1, features + 1)

areas = extract_feature(image, labeled, lbls, np.sum,

np.uint32, 0)

sides = extract_feature(image, labeled, lbls, min_side,

np.float32, 0, True)

diags = extract_feature(image, labeled, lbls, diagonal,

np.float32, 0, True)

for index in lbls:

area = areas[index - 1] / 255

side = sides[index - 1]

diag = diags[index - 1]

if side < 5 or side > 20 \

or diag < 15 or diag > 25 \

or area < 40:

image[labeled == index] = 0

return None

Example 3

def remove_artifacts(self, image):

"""

Remove the connected components that are not within the parameters

Operates in place

:param image: sudoku's thresholded image w/o grid

:return: None

"""

labeled, features = label(image, structure=CROSS)

lbls = np.arange(1, features + 1)

areas = extract_feature(image, labeled, lbls, np.sum,

np.uint32, 0)

sides = extract_feature(image, labeled, lbls, min_side,

np.float32, 0, True)

diags = extract_feature(image, labeled, lbls, diagonal,

np.float32, 0, True)

for index in lbls:

area = areas[index - 1] / 255

side = sides[index - 1]

diag = diags[index - 1]

if side < 5 or side > 20 \

or diag < 15 or diag > 25 \

or area < 40:

image[labeled == index] = 0

return None

Example 4

def evaluate(self, dataset):

predictions = self.predict(dataset[:,0])

confusion_matrix = sklearn_confusion_matrix(dataset[:,1], predictions, labels=self.__classes)

precisions = []

recalls = []

accuracies = []

for gender in self.__classes:

idx = self.__classes_indexes[gender]

precision = 1

recall = 1

if np.sum(confusion_matrix[idx,:]) > 0:

precision = confusion_matrix[idx][idx]/np.sum(confusion_matrix[idx,:])

if np.sum(confusion_matrix[:, idx]) > 0:

recall = confusion_matrix[idx][idx]/np.sum(confusion_matrix[:, idx])

precisions.append(precision)

recalls.append(recall)

precision = np.mean(precisions)

recall = np.mean(recalls)

f1 = (2*(precision*recall))/float(precision+recall)

accuracy = np.sum(confusion_matrix.diagonal())/float(np.sum(confusion_matrix))

return precision, recall, accuracy, f1

Example 5

def do_work_pso(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):

output = pd.DataFrame(population[item].position)

output.columns = ['Split']

dataSplit = pd.concat([data, output], axis=1)

f1 = []

results = []

for i in range(nclusters):

dataSplited = (dataSplit.loc[dataSplit['Split']

== i]).drop('Split', axis=1)

dataSplited.index = range(len(dataSplited))

try:

results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,

reg, 0, 50, HOC='true'))

resid = results[i].residuals()[3]

f1.append(resid)

except:

f1.append(10000)

# print((1 / np.sum(f1)))

return (1 / np.sum(f1))

Example 6

def do_work_ga(item, nclusters, data, LVcsv, Mcsv, scheme, reg, h, maximo, population):

output = pd.DataFrame(population[item].genes)

output.columns = ['Split']

dataSplit = pd.concat([data, output], axis=1)

f1 = []

results = []

for i in range(nclusters):

dataSplited = (dataSplit.loc[dataSplit['Split']

== i]).drop('Split', axis=1)

dataSplited.index = range(len(dataSplited))

try:

results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,

reg, 0, 50, HOC='true'))

resid = results[i].residuals()[3]

f1.append(resid)

except:

f1.append(10000)

return (1 / np.sum(f1))

# Main

Example 7

def xloads(self):

# Xloadings

A = self.data_.transpose().values

B = self.fscores.transpose().values

A_mA = A - A.mean(1)[:, None]

B_mB = B - B.mean(1)[:, None]

ssA = (A_mA**2).sum(1)

ssB = (B_mB**2).sum(1)

xloads_ = (np.dot(A_mA, B_mB.T) /

np.sqrt(np.dot(ssA[:, None], ssB[None])))

xloads = pd.DataFrame(

xloads_, index=self.manifests, columns=self.latent)

return xloads

Example 8

def do_work_pso(data, LVcsv, Mcsv, scheme, reg, h, maximo):

output = pd.DataFrame(population[item].position)

output.columns = ['Split']

dataSplit = pd.concat([data, output], axis=1)

f1 = []

results = []

for i in range(nclusters):

dataSplited = (dataSplit.loc[dataSplit['Split']

== i]).drop('Split', axis=1)

dataSplited.index = range(len(dataSplited))

try:

results.append(PyLSpm(dataSplited, LVcsv, Mcsv, scheme,

reg, 0, 50, HOC='true'))

resid = results[i].residuals()[3]

f1.append(resid)

except:

f1.append(10000)

print((1 / np.sum(f1)))

return (1 / np.sum(f1))

Example 9

def do_work_pso(self, item):

output = pd.DataFrame(self.population[item].position)

output.columns = ['Split']

dataSplit = pd.concat([self.data, output], axis=1)

f1 = []

results = []

for i in range(self.nclusters):

dataSplited = (dataSplit.loc[dataSplit['Split']

== i]).drop('Split', axis=1)

dataSplited.index = range(len(dataSplited))

try:

results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,

self.reg, 0, 50, HOC='true'))

resid = results[i].residuals()[3]

f1.append(resid)

except:

f1.append(10000)

print((1 / np.sum(f1)))

return (1 / np.sum(f1))

Example 10

def do_work_tabu(self, item):

output = pd.DataFrame(self.population[item])

output.columns = ['Split']

dataSplit = pd.concat([self.data, output], axis=1)

f1 = []

results = []

for i in range(self.nclusters):

dataSplited = (dataSplit.loc[dataSplit['Split']

== i]).drop('Split', axis=1)

dataSplited.index = range(len(dataSplited))

try:

results.append(PyLSpm(dataSplited, self.LVcsv, self.Mcsv, self.scheme,

self.reg, 0, 50, HOC='true'))

resid = results[i].residuals()[3]

f1.append(resid)

except:

f1.append(10000)

cost = (np.sum(f1))

print(1 / cost)

return [self.population[item], cost]

Example 11

def forward(self, x):

x = F.relu(F.max_pool2d(self.conv1(x), 2))

# A different (control flow based) way to control dropout

if self.training:

x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))

else:

x = F.relu(F.max_pool2d(self.conv2(x), 2))

x = x.view(-1, 320)

x = F.relu(self.fc1(x))

if self.training:

x = F.dropout(x, training=True)

x = self.fc2(x)

# Check for NaNs and infinites

nans = np.sum(np.isnan(x.data.numpy()))

infs = np.sum(np.isinf(x.data.numpy()))

if nans > 0:

print("There is {} NaN at the output layer".format(nans))

if infs > 0:

print("There is {} infinite values at the output layer".format(infs))

return F.log_softmax(x)

Example 12

def test():

model.eval()

test_loss = 0

correct = 0

for data, target in test_loader:

if args.cuda:

data, target = data.cuda(), target.cuda()

data, target = Variable(data, volatile=True), Variable(target)

output = model(data)

test_loss += F.nll_loss(output, target, size_average=False).data[0] # sum up batch loss

pred = output.data.max(1)[1] # get the index of the max log-probability

correct += pred.eq(target.data.view_as(pred)).cpu().sum()

test_loss /= len(test_loader.dataset)

print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(

test_loss, correct, len(test_loader.dataset),

100. * correct / len(test_loader.dataset)))

Example 13

def score_samples(self, X):

"""Return the log-likelihood of each sample

See. "Pattern Recognition and Machine Learning"

by C. Bishop, 12.2.1 p. 574

or http://www.miketipping.com/papers/met-mppca.pdf

Parameters

----------

X: array, shape(n_samples, n_features)

The data.

Returns

-------

ll: array, shape (n_samples,)

Log-likelihood of each sample under the current model

"""

check_is_fitted(self, 'mean_')

X = check_array(X)

Xr = X - self.mean_

n_features = X.shape[1]

log_like = np.zeros(X.shape[0])

precision = self.get_precision()

log_like = -.5 * (Xr * (np.dot(Xr, precision))).sum(axis=1)

log_like -= .5 * (n_features * log(2. * np.pi)

- fast_logdet(precision))

return log_like

Example 14

def main():

files = tf.gfile.Glob(flags.FLAGS.src_path_1)

labels_uni = np.zeros([4716,1])

labels_matrix = np.zeros([4716,4716])

for file in files:

labels_all = get_video_input_feature(file)

print(len(labels_all[0][2]),len(labels_all[0][3]),len(labels_all[0][4]),len(labels_all[0][5]))

"""

for labels in labels_all:

for i in range(len(labels)):

labels_uni[labels[i]] += 1

for j in range(len(labels)):

labels_matrix[labels[i],labels[j]] += 1

labels_matrix = labels_matrix/labels_uni

labels_matrix = labels_matrix/(np.sum(labels_matrix,axis=0)-1.0)

for i in range(4716):

labels_matrix[i,i] = 1.0

np.savetxt('labels_uni.out', labels_uni, delimiter=',')

np.savetxt('labels_matrix.out', labels_matrix, delimiter=',')"""

Example 15

def calculate_gap(predictions, actuals, top_k=20):

"""Performs a local (numpy) calculation of the global average precision.

Only the top_k predictions are taken for each of the videos.

Args:

predictions: Matrix containing the outputs of the model.

Dimensions are 'batch' x 'num_classes'.

actuals: Matrix containing the ground truth labels.

Dimensions are 'batch' x 'num_classes'.

top_k: How many predictions to use per video.

Returns:

float: The global average precision.

"""

gap_calculator = ap_calculator.AveragePrecisionCalculator()

sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)

gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))

return gap_calculator.peek_ap_at_n()

Example 16

def calculate_gap(predictions, actuals, top_k=20):

"""Performs a local (numpy) calculation of the global average precision.

Only the top_k predictions are taken for each of the videos.

Args:

predictions: Matrix containing the outputs of the model.

Dimensions are 'batch' x 'num_classes'.

actuals: Matrix containing the ground truth labels.

Dimensions are 'batch' x 'num_classes'.

top_k: How many predictions to use per video.

Returns:

float: The global average precision.

"""

gap_calculator = ap_calculator.AveragePrecisionCalculator()

sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)

gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))

return gap_calculator.peek_ap_at_n()

Example 17

def format_lines(video_ids, predictions, labels, top_k):

batch_size = len(video_ids)

for video_index in range(batch_size):

n_recall = max(int(numpy.sum(labels[video_index])), 1)

# labels

label_indices = numpy.argpartition(labels[video_index], -n_recall)[-n_recall:]

label_predictions = [(class_index, predictions[video_index][class_index])

for class_index in label_indices]

label_predictions = sorted(label_predictions, key=lambda p: -p[1])

label_str = "\t".join(["%d\t%f"%(x,y) for x,y in label_predictions])

# predictions

top_k_indices = numpy.argpartition(predictions[video_index], -top_k)[-top_k:]

top_k_predictions = [(class_index, predictions[video_index][class_index])

for class_index in top_k_indices]

top_k_predictions = sorted(top_k_predictions, key=lambda p: -p[1])

top_k_str = "\t".join(["%d\t%f"%(x,y) for x,y in top_k_predictions])

# compute PERR

top_n_indices = numpy.argpartition(predictions[video_index], -n_recall)[-n_recall:]

positives = [labels[video_index][class_index]

for class_index in top_n_indices]

perr = sum(positives) / float(n_recall)

# URL

url = "https://www.youtube.com/watch?v=" + video_ids[video_index].decode('utf-8')

yield url + "\t" + str(1-perr) + "\t" + top_k_str + "\t" + label_str + "\n"

Example 18

def calculate_gap(predictions, actuals, top_k=20):

"""Performs a local (numpy) calculation of the global average precision.

Only the top_k predictions are taken for each of the videos.

Args:

predictions: Matrix containing the outputs of the model.

Dimensions are 'batch' x 'num_classes'.

actuals: Matrix containing the ground truth labels.

Dimensions are 'batch' x 'num_classes'.

top_k: How many predictions to use per video.

Returns:

float: The global average precision.

"""

gap_calculator = ap_calculator.AveragePrecisionCalculator()

sparse_predictions, sparse_labels, num_positives = top_k_by_class(predictions, actuals, top_k)

gap_calculator.accumulate(flatten(sparse_predictions), flatten(sparse_labels), sum(num_positives))

return gap_calculator.peek_ap_at_n()

Example 19

def getTrainKernel(self, params):

self.checkParams(params)

if (self.sameParams(params)): return self.cache['getTrainKernel']

ell = np.exp(params[0])

if (self.K_sq is None): K = sq_dist(self.X_scaled.T / ell)#precompute squared distances

else: K = self.K_sq / ell**2

self.cache['K_sq_scaled'] = K

# # # #manual computation (just for sanity checks)

# # # K1 = np.exp(-K / 2.0)

# # # K2 = np.zeros((self.X_scaled.shape[0], self.X_scaled.shape[0]))

# # # for i1 in xrange(self.X_scaled.shape[0]):

# # # for i2 in xrange(i1, self.X_scaled.shape[0]):

# # # diff = self.X_scaled[i1,:] - self.X_scaled[i2,:]

# # # K2[i1, i2] = np.exp(-np.sum(diff**2) / (2*ell))

# # # K2[i2, i1] = K2[i1, i2]

# # # print np.max((K1-K2)**2)

# # # sys.exit(0)

K_exp = np.exp(-K / 2.0)

self.cache['getTrainKernel'] = K_exp

self.saveParams(params)

return K_exp

Example 20

def getTrainTestKernel(self, params, Xtest):

self.checkParams(params)

ell2 = np.exp(2*params[0])

z = Xtest / np.sqrt(Xtest.shape[1])

S = 1 + self.X_scaled.dot(z.T)

sz = 1 + np.sum(z**2, axis=1)

sqrtEll2Psx = np.sqrt(ell2+self.sx)

sqrtEll2Psz = np.sqrt(ell2+sz)

K = S / np.outer(sqrtEll2Psx, sqrtEll2Psz)

return np.arcsin(K)

Example 21

def match_matrix(event: Event):

"""Returns a numpy participation matrix for the qualification matches in this event, used for calculating OPR.

Each row in the matrix corresponds to a single alliance in a match, meaning that there will be two rows (one for

red, one for blue) per match. Each column represents a single team, ordered by team number. If a team participated

on a certain alliance, the value at that row and column would be 1, otherwise, it would be 0. For example, an

event with teams 1-7 that featured a match that pitted teams 1, 3, and 5 against 2, 4, and 6 would have a match

matrix that looks like this (sans labels):

#1 #2 #3 #4 #5 #6 #7

qm1_red 1 0 1 0 1 0 0

qm1_blue 0 1 0 1 0 1 0

"""

match_list = []

for match in filter(lambda match: match['comp_level'] == 'qm', event.matches):

matchRow = []

for team in event.teams:

matchRow.append(1 if team['key'] in match['alliances']['red']['teams'] else 0)

match_list.append(matchRow)

matchRow = []

for team in event.teams:

matchRow.append(1 if team['key'] in match['alliances']['blue']['teams'] else 0)

match_list.append(matchRow)

mat = numpy.array(match_list)

sum_matches = numpy.sum(mat, axis=0)

avg_team_matches = sum(sum_matches) / float(len(sum_matches))

return mat[:, numpy.apply_along_axis(numpy.count_nonzero, 0, mat) > avg_team_matches - 2]

Example 22

def compute_angle(pt0, pt1, pt2):

"""

Given 3 points, compute the cosine of the angle from pt0

:type pt0: numpy.array

:type pt1: numpy.array

:type pt2: numpy.array

:return: cosine of angle

"""

a = pt0 - pt1

b = pt0 - pt2

return (np.sum(a * b)) / (np.linalg.norm(a) * np.linalg.norm(b))

Example 23

def _zoning(image):

"""

It works better with DSIZE = 28

~0.9967 precision and recall

:param image:

:return: #pixels/area ratio of each zone (7x7) as feature vector

"""

zones = []

for i in range(0, 28, 7):

for j in range(0, 28, 7):

roi = image[i:i+7, j:j+7]

val = (np.sum(roi)/255) / 49.

zones.append(val)

return np.array(zones, np.float32)

Example 24

def getTypeProblem (self, solution_filename):

''' Get the type of problem directly from the solution file (in case we do not have an info file)'''

if 'task' not in self.info.keys():

solution = np.array(data_converter.file_to_array(solution_filename))

target_num = solution.shape[1]

self.info['target_num']=target_num

if target_num == 1: # if we have only one column

solution = np.ravel(solution) # flatten

nbr_unique_values = len(np.unique(solution))

if nbr_unique_values < len(solution)/8:

# Classification

self.info['label_num'] = nbr_unique_values

if nbr_unique_values == 2:

self.info['task'] = 'binary.classification'

self.info['target_type'] = 'Binary'

else:

self.info['task'] = 'multiclass.classification'

self.info['target_type'] = 'Categorical'

else:

# Regression

self.info['label_num'] = 0

self.info['task'] = 'regression'

self.info['target_type'] = 'Numerical'

else:

# Multilabel or multiclass

self.info['label_num'] = target_num

self.info['target_type'] = 'Binary'

if any(item > 1 for item in map(np.sum,solution.astype(int))):

self.info['task'] = 'multilabel.classification'

else:

self.info['task'] = 'multiclass.classification'

return self.info['task']

Example 25

def binarize_predictions(array, task='binary.classification'):

''' Turn predictions into decisions {0,1} by selecting the class with largest

score for multiclass problems and thresholding at 0.5 for other cases.'''

# add a very small random value as tie breaker (a bit bad because this changes the score every time)

# so to make sure we get the same result every time, we seed it

#eps = 1e-15

#np.random.seed(sum(array.shape))

#array = array + eps*np.random.rand(array.shape[0],array.shape[1])

bin_array = np.zeros(array.shape)

if (task != 'multiclass.classification') or (array.shape[1]==1):

bin_array[array>=0.5] = 1

else:

sample_num=array.shape[0]

for i in range(sample_num):

j = np.argmax(array[i,:])

bin_array[i,j] = 1

return bin_array

Example 26

def acc_stat (solution, prediction):

''' Return accuracy statistics TN, FP, TP, FN

Assumes that solution and prediction are binary 0/1 vectors.'''

# This uses floats so the results are floats

TN = sum(np.multiply((1-solution), (1-prediction)))

FN = sum(np.multiply(solution, (1-prediction)))

TP = sum(np.multiply(solution, prediction))

FP = sum(np.multiply((1-solution), prediction))

#print "TN =",TN

#print "FP =",FP

#print "TP =",TP

#print "FN =",FN

return (TN, FP, TP, FN)

Example 27

def pac_metric (solution, prediction, task='binary.classification'):

''' Probabilistic Accuracy based on log_loss metric.

We assume the solution is in {0, 1} and prediction in [0, 1].

Otherwise, run normalize_array.'''

debug_flag=False

[sample_num, label_num] = solution.shape

if label_num==1: task='binary.classification'

eps = 1e-15

the_log_loss = log_loss(solution, prediction, task)

# Compute the base log loss (using the prior probabilities)

pos_num = 1.* sum(solution) # float conversion!

frac_pos = pos_num / sample_num # prior proba of positive class

the_base_log_loss = prior_log_loss(frac_pos, task)

# Alternative computation of the same thing (slower)

# Should always return the same thing except in the multi-label case

# For which the analytic solution makes more sense

if debug_flag:

base_prediction = np.empty(prediction.shape)

for k in range(sample_num): base_prediction[k,:] = frac_pos

base_log_loss = log_loss(solution, base_prediction, task)

diff = np.array(abs(the_base_log_loss-base_log_loss))

if len(diff.shape)>0: diff=max(diff)

if(diff)>1e-10:

print('Arrggh {} != {}'.format(the_base_log_loss,base_log_loss))

# Exponentiate to turn into an accuracy-like score.

# In the multi-label case, we need to average AFTER taking the exp

# because it is an NL operation

pac = mvmean(np.exp(-the_log_loss))

base_pac = mvmean(np.exp(-the_base_log_loss))

# Normalize: 0 for random, 1 for perfect

score = (pac - base_pac) / sp.maximum(eps, (1 - base_pac))

return score

Example 28

def auc_metric(solution, prediction, task='binary.classification'):

''' Normarlized Area under ROC curve (AUC).

Return Gini index = 2*AUC-1 for binary classification problems.

Should work for a vector of binary 0/1 (or -1/1)"solution" and any discriminant values

for the predictions. If solution and prediction are not vectors, the AUC

of the columns of the matrices are computed and averaged (with no weight).

The same for all classification problems (in fact it treats well only the

binary and multilabel classification problems).'''

#auc = metrics.roc_auc_score(solution, prediction, average=None)

# There is a bug in metrics.roc_auc_score: auc([1,0,0],[1e-10,0,0]) incorrect

label_num=solution.shape[1]

auc=np.empty(label_num)

for k in range(label_num):

r_ = tiedrank(prediction[:,k])

s_ = solution[:,k]

if sum(s_)==0: print('WARNING: no positive class example in class {}'.format(k+1))

npos = sum(s_==1)

nneg = sum(s_<1)

auc[k] = (sum(r_[s_==1]) - npos*(npos+1)/2) / (nneg*npos)

return 2*mvmean(auc)-1

### END CLASSIFICATION METRICS

# ======= Specialized scores ========

# We run all of them for all tasks even though they don't make sense for some tasks

Example 29

def prior_log_loss(frac_pos, task = 'binary.classification'):

''' Baseline log loss. For multiplr classes ot labels return the volues for each column'''

eps = 1e-15

frac_pos_ = sp.maximum (eps, frac_pos)

if (task != 'multiclass.classification'): # binary case

frac_neg = 1-frac_pos

frac_neg_ = sp.maximum (eps, frac_neg)

pos_class_log_loss_ = - frac_pos * np.log(frac_pos_)

neg_class_log_loss_ = - frac_neg * np.log(frac_neg_)

base_log_loss = pos_class_log_loss_ + neg_class_log_loss_

# base_log_loss = mvmean(base_log_loss)

# print('binary {}'.format(base_log_loss))

# In the multilabel case, the right thing i to AVERAGE not sum

# We return all the scores so we can normalize correctly later on

else: # multiclass case

fp = frac_pos_ / sum(frac_pos_) # Need to renormalize the lines in multiclass case

# Only ONE label is 1 in the multiclass case active for each line

pos_class_log_loss_ = - frac_pos * np.log(fp)

base_log_loss = np.sum(pos_class_log_loss_)

return base_log_loss

# sklearn implementations for comparison

Example 30

def num_lines (filename):

''' Count the number of lines of file'''

return sum(1 for line in open(filename))

Example 31

def tp_filter(X, Y, feat_num=1000, verbose=True):

''' TP feature selection in the spirit of the winners of the KDD cup 2001

Only for binary classification and sparse matrices'''

if issparse(X) and len(Y.shape)==1 and len(set(Y))==2 and (sum(Y)/Y.shape[0])<0.1:

if verbose: print("========= Filtering features...")

Posidx=Y>0

#npos = sum(Posidx)

#Negidx=Y<=0

#nneg = sum(Negidx)

nz=X.nonzero()

mx=X[nz].max()

if X[nz].min()==mx: # sparse binary

if mx!=1: X[nz]=1

tp=csr_matrix.sum(X[Posidx,:], axis=0)

#fn=npos-tp

#fp=csr_matrix.sum(X[Negidx,:], axis=0)

#tn=nneg-fp

else:

tp=np.sum(X[Posidx,:]>0, axis=0)

#tn=np.sum(X[Negidx,:]<=0, axis=0)

#fn=np.sum(X[Posidx,:]<=0, axis=0)

#fp=np.sum(X[Negidx,:]>0, axis=0)

tp=np.ravel(tp)

idx=sorted(range(len(tp)), key=tp.__getitem__, reverse=True)

return idx[0:feat_num]

else:

feat_num = X.shape[1]

return range(feat_num)

Example 32

def predict(self, X):

prediction = self.predict_method(X)

# Calibrate proba

if self.task != 'regression' and self.postprocessor!=None:

prediction = self.postprocessor.predict_proba(prediction)

# Keep only 2nd column because the second one is 1-first

if self.target_num==1 and len(prediction.shape)>1 and prediction.shape[1]>1:

prediction = prediction[:,1]

# Make sure the normalization is correct

if self.task=='multiclass.classification':

eps = 1e-15

norma = np.sum(prediction, axis=1)

for k in range(prediction.shape[0]):

prediction[k,:] /= sp.maximum(norma[k], eps)

return prediction

Example 33

def fit(self, X, Y):

if len(Y.shape)==1:

Y = np.array([Y]).transpose() # Transform vector into column matrix

# This is NOT what we want: Y = Y.reshape( -1, 1 ), because Y.shape[1] out of range

self.n_target = Y.shape[1] # Num target values = num col of Y

self.n_label = len(set(Y.ravel())) # Num labels = num classes (categories of categorical var if n_target=1 or n_target if labels are binary )

# Create the right number of copies of the predictor instance

if len(self.predictors)!=self.n_target:

predictorInstance = self.predictors[0]

self.predictors = [predictorInstance]

for i in range(1,self.n_target):

self.predictors.append(copy.copy(predictorInstance))

# Fit all predictors

for i in range(self.n_target):

# Update the number of desired prodictos

if hasattr(self.predictors[i], 'n_estimators'):

self.predictors[i].n_estimators=self.n_estimators

# Subsample if desired

if self.balance:

pos = Y[:,i]>0

neg = Y[:,i]<=0

if sum(pos)

chosen = pos

not_chosen = neg

else:

chosen = neg

not_chosen = pos

num = sum(chosen)

idx=filter(lambda(x): x[1]==True, enumerate(not_chosen))

idx=np.array(zip(*idx)[0])

np.random.shuffle(idx)

chosen[idx[0:min(num, len(idx))]]=True

# Train with chosen samples

self.predictors[i].fit(X[chosen,:],Y[chosen,i])

else:

self.predictors[i].fit(X,Y[:,i])

return

Example 34

def get_batch_loss(self, input_batch, output_batch):

dynet.renew_cg()

# Dimension: maxSentLength * minibatch_size

wids = []

wids_reversed = []

# List of lists to store whether an input is

# present(1)/absent(0) for an example at a time step

# masks = [] # Dimension: maxSentLength * minibatch_size

# tot_words = 0

maxSentLength = max([len(sent) for sent in input_batch])

for j in range(maxSentLength):

wids.append([(self.src_vocab[sent[j]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])

wids_reversed.append([(self.src_vocab[sent[len(sent)- j-1]].i if len(sent)>j else self.src_vocab.END_TOK.i) for sent in input_batch])

# mask = [(1 if len(sent)>j else 0) for sent in input_batch]

# masks.append(mask)

#tot_words += sum(mask)

embedded_batch = self.embed_batch_seq(wids)

embedded_batch_reverse = self.embed_batch_seq(wids_reversed)

encoded_batch = self.encode_batch_seq(embedded_batch, embedded_batch_reverse)

# pass last hidden state of encoder to decoder

return self.decode_batch(encoded_batch, output_batch)

Example 35

def plotFields(layer,fieldShape=None,channel=None,figOffset=1,cmap=None,padding=0.01):

# Receptive Fields Summary

try:

W = layer.W

except:

W = layer

wp = W.eval().transpose();

if len(np.shape(wp)) < 4:# Fully connected layer, has no shape

fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)

else:# Convolutional layer already has shape

features, channels, iy, ix = np.shape(wp)

if channel is not None:

fields = wp[:,channel,:,:]

else:

fields = np.reshape(wp,[features*channels,iy,ix])

perRow = int(math.floor(math.sqrt(fields.shape[0])))

perColumn = int(math.ceil(fields.shape[0]/float(perRow)))

fig = mpl.figure(figOffset); mpl.clf()

# Using image grid

from mpl_toolkits.axes_grid1 import ImageGrid

grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')

for i in range(0,np.shape(fields)[0]):

im = grid[i].imshow(fields[i],cmap=cmap);

grid.cbar_axes[0].colorbar(im)

mpl.title('%s Receptive Fields' % layer.name)

# old way

# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])

# tiled = []

# for i in range(0,perColumn*perRow,perColumn):

# tiled.append(np.hstack(fields2[i:i+perColumn]))

# tiled = np.vstack(tiled)

# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();

mpl.figure(figOffset+1); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()

Example 36

def plotFields(layer,fieldShape=None,channel=None,maxFields=25,figName='ReceptiveFields',cmap=None,padding=0.01):

# Receptive Fields Summary

W = layer.W

wp = W.eval().transpose();

if len(np.shape(wp)) < 4:# Fully connected layer, has no shape

fields = np.reshape(wp,list(wp.shape[0:-1])+fieldShape)

else:# Convolutional layer already has shape

features, channels, iy, ix = np.shape(wp)

if channel is not None:

fields = wp[:,channel,:,:]

else:

fields = np.reshape(wp,[features*channels,iy,ix])

fieldsN = min(fields.shape[0],maxFields)

perRow = int(math.floor(math.sqrt(fieldsN)))

perColumn = int(math.ceil(fieldsN/float(perRow)))

fig = mpl.figure(figName); mpl.clf()

# Using image grid

from mpl_toolkits.axes_grid1 import ImageGrid

grid = ImageGrid(fig,111,nrows_ncols=(perRow,perColumn),axes_pad=padding,cbar_mode='single')

for i in range(0,fieldsN):

im = grid[i].imshow(fields[i],cmap=cmap);

grid.cbar_axes[0].colorbar(im)

mpl.title('%s Receptive Fields' % layer.name)

# old way

# fields2 = np.vstack([fields,np.zeros([perRow*perColumn-fields.shape[0]] + list(fields.shape[1:]))])

# tiled = []

# for i in range(0,perColumn*perRow,perColumn):

# tiled.append(np.hstack(fields2[i:i+perColumn]))

# tiled = np.vstack(tiled)

# mpl.figure(figOffset); mpl.clf(); mpl.imshow(tiled,cmap=cmap); mpl.title('%s Receptive Fields' % layer.name); mpl.colorbar();

mpl.figure(figName+' Total'); mpl.clf(); mpl.imshow(np.sum(np.abs(fields),0),cmap=cmap); mpl.title('%s Total Absolute Input Dependency' % layer.name); mpl.colorbar()

Example 37

def analytic_convolution_gaussian(mu1,covar1,mu2,covar2):

"""

The analytic vconvolution of two Gaussians is simply the sum of the two mean vectors

and the two convariance matrixes

--- INPUT ---

mu1 The mean of the first gaussian

covar1 The covariance matrix of of the first gaussian

mu2 The mean of the second gaussian

covar2 The covariance matrix of of the second gaussian

"""

muconv = mu1+mu2

covarconv = covar1+covar2

return muconv, covarconv

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

Example 38

def reshape_array(array, newsize, pixcombine='sum'):

"""

Reshape an array to a give size using either the sum, mean or median of the pixels binned

Note that the old array dimensions have to be multiples of the new array dimensions

--- INPUT ---

array Array to reshape (combine pixels)

newsize New size of array

pixcombine The method to combine the pixels with. Choices are sum, mean and median

"""

sh = newsize[0],array.shape[0]//newsize[0],newsize[1],array.shape[1]//newsize[1]

pdb.set_trace()

if pixcombine == 'sum':

reshapedarray = array.reshape(sh).sum(-1).sum(1)

elif pixcombine == 'mean':

reshapedarray = array.reshape(sh).mean(-1).mean(1)

elif pixcombine == 'median':

reshapedarray = array.reshape(sh).median(-1).median(1)

return reshapedarray

# = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =

Example 39

def inner_product_to_infty(self,gf1,gf2):

"Inner product on non-compact domain"

factors = [s.get_scale_factor() for s in self.stencils]

factor = np.prod(factors)

integrand = (factor*gf1*self.weights2D*gf2*self.dRdX)

integrand[-1] = 0

integral = np.sum(integrand)

return integral

Example 40

def get_integration_weights(order,nodes=None):

"""

Returns the integration weights for Gauss-Lobatto quadrature

as a function of the order of the polynomial we want to

represent.

See: https://en.wikipedia.org/wiki/Gaussian_quadrature

See: arXive:gr-qc/0609020v1

"""

if np.all(nodes == False):

nodes=get_quadrature_points(order)

if poly == polynomial.chebyshev.Chebyshev:

weights = np.empty((order+1))

weights[1:-1] = np.pi/order

weights[0] = np.pi/(2*order)

weights[-1] = weights[0]

return weights

elif poly == polynomial.legendre.Legendre:

interior_weights = 2/((order+1)*order*poly.basis(order)(nodes[1:-1])**2)

boundary_weights = np.array([1-0.5*np.sum(interior_weights)])

weights = np.concatenate((boundary_weights,

interior_weights,

boundary_weights))

return weights

else:

raise ValueError("Not a known polynomial type.")

return False

Example 41

def inner_product(self,gf1,gf2):

"""Calculates the 2D inner product between grid functions

gf1 and gf2 using the appropriate quadrature rule

"""

factors = [s.get_scale_factor() for s in self.stencils]

factor = np.prod(factors)

integrand = gf1*self.weights2D*gf2

integral_unit_cell = np.sum(integrand)

integral_physical = integral_unit_cell*factor

return integral_physical

Example 42

def compute_rhs(rhs):

U_dealiased = work[((3,) + FFT.work_shape(dealias), float, 0)]

curl_dealiased = work[((3,) + FFT.work_shape(dealias), float, 1)]

for i in range(3):

U_dealiased[i] = FFT.ifftn(U_hat[i], U_dealiased[i], dealias)

curl_dealiased = curl(U_hat, curl_dealiased)

rhs = cross(U_dealiased, curl_dealiased, rhs)

P_hat[:] = sum(rhs*K_over_K2, 0, out=P_hat)

rhs -= P_hat*K

rhs -= nu*K2*U_hat

return rhs

# Initialize a Taylor Green vortex

Example 43

def gof(self):

r2mean = np.mean(self.r2.T[self.endoexo()[0]].values)

AVEmean = self.AVE().copy()

totalblock = 0

for i in range(self.lenlatent):

block = self.data_[self.Variables['measurement']

[self.Variables['latent'] == self.latent[i]]]

block = len(block.columns.values)

totalblock += block

AVEmean[self.latent[i]] = AVEmean[self.latent[i]] * block

AVEmean = np.sum(AVEmean) / totalblock

return np.sqrt(AVEmean * r2mean)

Example 44

def cr(self):

# Composite Reliability

composite = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

for i in range(self.lenlatent):

block = self.data_[self.Variables['measurement']

[self.Variables['latent'] == self.latent[i]]]

p = len(block.columns)

if(p != 1):

cor_mat = np.cov(block.T)

evals, evecs = np.linalg.eig(cor_mat)

U, S, V = np.linalg.svd(cor_mat, full_matrices=False)

indices = np.argsort(evals)

indices = indices[::-1]

evecs = evecs[:, indices]

evals = evals[indices]

loadings = V[0, :] * np.sqrt(evals[0])

numerador = np.sum(abs(loadings))**2

denominador = numerador + (p - np.sum(loadings ** 2))

cr = numerador / denominador

composite[self.latent[i]] = cr

else:

composite[self.latent[i]] = 1

composite = composite.T

return(composite)

Example 45

def r2adjusted(self):

n = len(self.data_)

r2 = self.r2.values

r2adjusted = pd.DataFrame(0, index=np.arange(1), columns=self.latent)

for i in range(self.lenlatent):

p = sum(self.LVariables['target'] == self.latent[i])

r2adjusted[self.latent[i]] = r2[i] - \

(p * (1 - r2[i])) / (n - p - 1)

return r2adjusted.T

Example 46

def AVE(self):

# AVE

return self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())

Example 47

def fornell(self):

cor_ = pd.DataFrame.corr(self.fscores)**2

AVE = self.comunalidades().apply(lambda column: column.sum() / (column != 0).sum())

for i in range(len(cor_)):

cor_.ix[i, i] = AVE[i]

return(cor_)

Example 48

def fitness(self, data_, n_clusters, lvmodel, mvmodel, scheme, regression):

output = pd.DataFrame(self.genes)

output.columns = ['Split']

dataSplit = pd.concat([data_, output], axis=1)

f1 = []

results = []

for i in range(n_clusters):

dataSplited = (dataSplit.loc[dataSplit['Split']

== i]).drop('Split', axis=1)

dataSplited.index = range(len(dataSplited))

try:

results.append(PyLSpm(dataSplited, lvmodel, mvmodel, scheme,

regression, 0, 50, HOC='true'))

sumOuterResid = pd.DataFrame.sum(

pd.DataFrame.sum(results[i].residuals()[1]**2))

sumInnerResid = pd.DataFrame.sum(

pd.DataFrame.sum(results[i].residuals()[2]**2))

f1.append(sumOuterResid + sumInnerResid)

except:

f1.append(10000)

print((1 / np.sum(f1)))

return (1 / np.sum(f1))

Example 49

def roulettewheel(pop, fit):

fit = fit - min(fit)

sumf = sum(fit)

if(sumf == 0):

return pop[0]

prob = [(item + sum(fit[:index])) / sumf for index, item in enumerate(fit)]

prob_ = uniform(0, 1)

# print(prob)

individuo = (int(BinSearch(prob, prob_, 0, len(prob) - 1)))

return pop[individuo]

Example 50

def xavier_initializer(shape):

dim_sum = np.sum(shape)

if len(shape) == 1:

dim_sum += 1

bound = np.sqrt(2.0 / dim_sum)

return tf.random_uniform(shape, minval=-bound, maxval=bound)

# Assigning network variables to target network variables

weixin_39996478

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫