python模糊查询reslist_python list数组split_Python numpy.array_split() 使用实例

最新推荐文章于 2023-01-15 17:19:13 发布

木-Star

最新推荐文章于 2023-01-15 17:19:13 发布

阅读量764

点赞数

文章标签： python模糊查询reslist

本文链接：https://blog.csdn.net/weixin_32822843/article/details/114441786

版权

这段代码展示了如何在Python中使用numpy.array_split()进行数据分割，以及在不同场景下的应用，如图像处理、并行计算和数据处理。示例涵盖了从光学流计算到数据生成、数据分块、图像处理等多个方面。

摘要由CSDN通过智能技术生成

The following are code examples for showing how to use . They are extracted from open source Python projects. You can vote up the examples you like or vote down the exmaples you don’t like. You can also save this page to your account.

Example 1

def extract_optical_flow(fn, n_frames=34):

img = dd.image.load(fn)

if img.shape != (128*34, 128, 3):

return []

frames = np.array_split(img, 34, axis=0)

grayscale_frames = [fr.mean(-1) for fr in frames]

mags = []

skip_frames = np.random.randint(34 - n_frames + 1)

middle_frame = frames[np.random.randint(skip_frames, skip_frames+n_frames)]

im0 = grayscale_frames[skip_frames]

for f in range(1+skip_frames, 1+skip_frames+n_frames-1):

im1 = grayscale_frames[f]

flow = cv2.calcOpticalFlowFarneback(im0, im1,

None, # flow

0.5, # pyr_scale

3, # levels

np.random.randint(3, 20), # winsize

3, #iterations

5, #poly_n

1.2, #poly_sigma

0 # flags

)

mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])

mags.append(mag)

im0 = im1

mag = np.sum(mags, 0)

mag = mag.clip(min=0)

#norm_mag = np.tanh(mag * 10000)

norm_mag = (mag - mag.min()) / (mag.max() - mag.min() + 1e-5)

outputs = []

outputs.append((middle_frame, norm_mag))

return outputs

Example 2

def create_agents(self, generator):

"""

Given information on a set of countries and a generator function,

generate the agents and assign the results to ``self.agents``.

:type generator: DataFrame, str, int

:param generator: A function which generates the agents.

"""

self.generator = generator

country_array = pd.concat([pd.Series([c] * k["Population"]) for c, k in self.df.iterrows()])

country_array.index = range(len(country_array))

# Garbage collect before creating new processes.

gc.collect()

self.agents = pd.concat(

self.pool.imap(self._gen_agents,

np.array_split(country_array, self.processes * self.splits))

)

self.agents.index = range(len(self.agents))

Example 3

def create_agents(self, generator):

"""

Given information on a set of countries and a generator function,

generate the agents and assign the results to ``self.agents``.

:type generator: DataFrame, str, int

:param generator: A function which generates the agents.

"""

self.generator = generator

country_array = pd.concat([pd.Series([c] * k["Population"]) for c, k in self.df.iterrows()])

country_array.index = range(len(country_array))

# Garbage collect before creating new processes.

gc.collect()

self.agents = pd.concat(

self.pool.imap(self._gen_agents,

np.array_split(country_array, self.processes * self.splits))

)

self.agents.index = range(len(self.agents))

Example 4

def test_latlon2pix_internals(pix_size_single, origin_point, is_flipped,

num_chunks, chunk_position):

img = make_image(pix_size_single, origin_point, is_flipped,

num_chunks, chunk_position)

chunk_idx = img.chunk_idx

res_x = img._full_res[0]

res_y = img._full_res[1]

pix_size = (img.pixsize_x, img.pixsize_y)

origin = (img._start_lon, img._start_lat)

# +0.5 for centre of pixels

lons = (np.arange(res_x) + 0.5) * pix_size[0] + origin[0]

all_lats = (np.arange(res_y) + 0.5) * pix_size[1] + origin[1]

lats = np.array_split(all_lats, num_chunks)[chunk_idx]

pix_x = np.arange(res_x)

pix_y = np.arange(lats.shape[0])

d = np.array([[a, b] for a in lons for b in lats])

xy = img.lonlat2pix(d)

true_xy = np.array([[a, b] for a in pix_x for b in pix_y])

assert np.all(xy == true_xy)

Example 5

def test_pix2latlong(pix_size_single, origin_point, is_flipped,

num_chunks, chunk_position):

img = make_image(pix_size_single, origin_point, is_flipped,

num_chunks, chunk_position)

chunk_idx = img.chunk_idx

res_x = img._full_res[0]

res_y = img._full_res[1]

pix_size = (img.pixsize_x, img.pixsize_y)

origin = (img._start_lon, img._start_lat)

true_lons = np.arange(res_x) * pix_size[0] + origin[0]

all_lats = np.arange(res_y) * pix_size[1] + origin[1]

true_lats = np.array_split(all_lats, num_chunks)[chunk_idx]

true_d = np.array([[a, b] for a in true_lons for b in true_lats])

pix_x = np.arange(res_x)

pix_y = np.arange(img.resolution[1]) # chunk resolution

xy = np.array([[a, b] for a in pix_x for b in pix_y])

lonlats = img.pix2lonlat(xy)

assert np.all(lonlats == true_d)

Example 6

def transform(self, X):

if self.tagger is None:

raise ValueError("Must find_motifs before you can tag anything")

logging.info("Tagging %s data with motifs using %d workers..." % (

str(X.shape), self.n_jobs))

if self.n_jobs > 1:

pool = mp.ProcessingPool(self.n_jobs)

splits = np.array_split(X, self.n_jobs)

tag_lists = pool.map(self._tag_motifs, splits)

tags = list(itertools.chain.from_iterable(tag_lists))

else:

tags = self._tag_motifs(X)

logging.info("All motifs have been tagged")

return self._sparsify_tags(tags)

Example 7

def subset_iterator(X, m, repeats=1):

'''

Iterates over array X in chunks of m, repeat number of times.

Each time the order of the repeat is randomly generated.

'''

N, dim = X.shape

progress = tqdm(total=repeats * int(N / m))

for i in range(repeats):

indices = np.random.permutation(N)

for idx in np.array_split(indices, N // m):

yield X[idx][:]

progress.update()

progress.close()

Example 8

def _split_into_groups(y, num_groups):

groups = [[] for _ in range(num_groups)]

group_index = 0

for cls in set(y):

this_cls_indices = np.where(y == cls)[0]

num_cls_samples = len(this_cls_indices)

num_cls_split_groups = ceil(num_cls_samples / 500)

split = np.array_split(this_cls_indices, num_cls_split_groups)

for cls_group in split:

groups[group_index] = np.hstack((groups[group_index], cls_group))

group_index = (group_index + 1) % num_groups

return groups

Example 9

def get_embedding_X(img):

'''

Args : Numpy Images vector

Returns : Embedded Matrix of length Samples, 4096

'''

img = img.reshape((img.shape[0], img.shape[1], img.shape[2], 1))

sess = tf.Session()

imgs = tf.placeholder(tf.float32, [None, None, None, None])

vgg = vgg16(imgs, '/tmp/vgg16_weights.npz', sess)

embs = []

cnt = 0

for img_batch in np.array_split(img, img.shape[0] / 1000):

emb = sess.run(vgg.emb, feed_dict={vgg.imgs: img_batch})

embs.extend(emb)

cnt += 1

progress = round(100 * (cnt * 1000 / img.shape[0]),2)

if(progress%10 == 0):

print progress

embs = np.array(embs)

print embs.shape

embs = np.reshape(embs,(embs.shape[0],embs.shape[1] * embs.shape[2] * embs.shape[3]))

return embs

Example 10

def __init__(self, pobj, just_list = False, attr='_grids',

round_robin=False):

ObjectIterator.__init__(self, pobj, just_list, attr=attr)

# pobj has to be a ParallelAnalysisInterface, so it must have a .comm

# object.

self._offset = pobj.comm.rank

self._skip = pobj.comm.size

# Note that we're doing this in advance, and with a simple means

# of choosing them; more advanced methods will be explored later.

if self._use_all:

self.my_obj_ids = np.arange(len(self._objs))

else:

if not round_robin:

self.my_obj_ids = np.array_split(

np.arange(len(self._objs)), self._skip)[self._offset]

else:

self.my_obj_ids = np.arange(len(self._objs))[self._offset::self._skip]

Example 11

def iter_combinatorial_pairs(queue, num_examples, batch_size, interval,

num_classes, augment_positive=False):

num_examples_per_class = num_examples // num_classes

pairs = np.array(list(itertools.combinations(range(num_examples), 2)))

if augment_positive:

additional_positive_pairs = make_positive_pairs(

num_classes, num_examples_per_class, num_classes - 1)

pairs = np.concatenate((pairs, additional_positive_pairs))

num_pairs = len(pairs)

num_batches = num_pairs // batch_size

perm = np.random.permutation(num_pairs)

for i, batch_indexes in enumerate(np.array_split(perm, num_batches)):

if i % interval == 0:

x, c = queue.get()

x = x.astype(np.float32) / 255.0

c = c.ravel()

indexes0, indexes1 = pairs[batch_indexes].T

x0, x1, c0, c1 = x[indexes0], x[indexes1], c[indexes0], c[indexes1]

t = np.int32(c0 == c1) # 1 if x0 and x1 are same class, 0 otherwise

yield x0, x1, t

Example 12

def get_epoch_indexes(self):

B = self.batch_size

K = self.num_classes

M = self.num_per_class

N = K * M # number of total examples

num_batches = M * int(K // B) # number of batches per epoch

indexes = np.arange(N, dtype=np.int32).reshape(K, M)

epoch_indexes = []

for m in range(M):

perm = np.random.permutation(K)

c_batches = np.array_split(perm, num_batches // M)

for c_batch in c_batches:

b = len(c_batch) # actual number of examples of this batch

indexes_anchor = M * c_batch + m

positive_candidates = np.delete(indexes[c_batch], m, axis=1)

indexes_positive = positive_candidates[

range(b), np.random.choice(M - 1, size=b)]

epoch_indexes.append((indexes_anchor, indexes_positive))

return epoch_indexes

Example 13

def pre_processing(self):

"""Provide same API as Model, we split data to K folds here.

"""

if self.random:

mask = np.random.permutation(self.train_x.shape[0])

train_x = self.train_x[mask]

train_y = self.train_y[mask]

else:

train_x = self.train_x[:]

train_y = self.train_y[:]

if self.select_train_method == 'step':

self.x_folds = [train_x[i::self.k_folds] for i in range(0, self.k_folds)]

self.y_folds = [train_y[i::self.k_folds] for i in range(0, self.k_folds)]

else:

self.x_folds = np.array_split(train_x, self.k_folds)

self.y_folds = np.array_split(train_y, self.k_folds)

# for i in range(self.k_folds):

# self.x_folds[i] = self.train_x[0] + self.x_folds[i] + self.train_x[-1]

# self.y_folds[i] = self.train_y[0] + self.y_folds[i] + self.train_y[-1]

Example 14

def Train(self, C, A, Y, SF):

'''

Train the classifier using the sample matrix A and target matrix Y

'''

C.fit(A, Y)

YH = np.zeros(Y.shape, dtype = np.object)

for i in np.array_split(np.arange(A.shape[0]), 32): #Split up verification into chunks to prevent out of memory

YH[i] = C.predict(A[i])

s1 = SF(Y, YH)

print('All:{:8.6f}'.format(s1))

'''

ss = ShuffleSplit(random_state = 1151) #Use fixed state for so training can be repeated later

trn, tst = next(ss.split(A, Y)) #Make train/test split

mi = [8] * 1 #Maximum number of iterations at each iter

YH = np.zeros((A.shape[0]), dtype = np.object)

for mic in mi: #Chunk size to split dataset for CV results

#C.SetMaxIter(mic) #Set the maximum number of iterations to run

#C.fit(A[trn], Y[trn]) #Perform training iterations

'''

Example 15

def add_point(self, t, alt, az):

self.window.append((t, alt, az))

if self._current_window_size() < self.window_duration:

return

points = np.array(self.window)

steady, current = np.array_split(points, 2)

_, steady_cube = self.create_cube(steady)

timestamps, current_cube = self.create_cube(current)

t = self.denoise_and_compare_cubes(steady_cube, current_cube)

self.trigger_criterion.append(list(t))

self.trigger_criterion_timestamps.append(list(timestamps))

has_triggered = self.check_trigger(t)

new_duration = self.window_duration - self.step

self._reduce_to_duration(new_duration)

Example 16

def predict(self):

if os.path.exists(DATA_QUERIES_VECTOR_NPZ) and not FORCE_LOAD:

print('{}: loading precomputed data'.format(self.__class__.__name__))

self.load_precomputed_data()

else:

self.precomputed_similarity()

batch_size = 100

batch_elements = math.ceil(self.queries_vector.shape[0] / batch_size)

batch_queue = np.array_split(self.queries_vector.A, batch_elements)

print("starting batch computation of Similarity and KNN calculation")

# # multiple versions of calculating the prediction, some faster, some use more mem

# prediction = self.multiprocessor_batch_calc(batch_queue)

prediction = self.batch_calculation(batch_queue)

# prediction = self.individual_calculation()

# prediction = self.cosine_knn_calc()

# prediction = self.custom_knn_calculation(prediction)

train_avg_salary = sum(self.y_train) / len(self.y_train)

cleaned_predictions = [x if str(x) != 'nan' else train_avg_salary for x in prediction]

return self.y_train, cleaned_predictions

Example 17

def load_test_data(self):

# Remove non-mat files, and perform ascending sort

allfiles = os.listdir(self.data_dir)

npzfiles = []

for idx, f in enumerate(allfiles):

if ".npz" in f:

npzfiles.append(os.path.join(self.data_dir, f))

npzfiles.sort()

# Files for validation sets

val_files = np.array_split(npzfiles, self.n_folds)

val_files = val_files[self.fold_idx]

print "\n========== [Fold-{}] ==========\n".format(self.fold_idx)

print "Load validation set:"

data_val, label_val = self._load_npz_list_files(val_files)

return data_val, label_val

Example 18

def __init__(self, X, kern, Xm):

super(PITC, self).__init__("PITC")

M = np.shape(Xm)[0]

self.M = M

start = time.time()

X_split = np.array_split(X, M)

self.kern = kern

kern_blocks = np.zeros((M),dtype=object)

for t in xrange(M):

nyst = Nystrom(X_split[t], kern, Xm, False)

size = np.shape(X_split[t])[0]

kern_blocks[t] = kern.K(X_split[t], X_split[t]) - nyst.precon + (kern.noise)*np.identity(size)

self.blocks = kern_blocks

blocked = block_diag(*kern_blocks)

self.nyst = Nystrom(X, kern, Xm, False)

self.precon = self.nyst.precon + blocked

self.duration = time.time() - start

Example 19

def __init__(self, X, kern, Xm):

super(PITC, self).__init__("PITC")

M = np.shape(Xm)[0]

self.M = M

start = time.time()

X_split = np.array_split(X, M)

self.kern = kern

kern_blocks = np.zeros((M),dtype=object)

for t in xrange(M):

nyst = Nystrom(X_split[t], kern, Xm, False)

size = np.shape(X_split[t])[0]

kern_blocks[t] = kern.K(X_split[t], X_split[t]) - nyst.precon + (kern.noise)*np.identity(size)

self.blocks = kern_blocks

blocked = block_diag(*kern_blocks)

self.nyst = Nystrom(X, kern, Xm, False)

self.precon = self.nyst.precon + blocked

self.duration = time.time() - start

Example 20

def _read_image_as_array(path, dtype, load_size, crop_size, flip):

f = Image.open(path)

A, B = numpy.array_split(numpy.asarray(f), 2, axis=1)

if hasattr(f, 'close'):

f.close()

A = _resize(A, load_size, Image.BILINEAR, dtype)

B = _resize(B, load_size, Image.NEAREST, dtype)

sx, sy = numpy.random.randint(0, load_size-crop_size, 2)

A = _crop(A, sx, sy, crop_size)

B = _crop(B, sx, sy, crop_size)

if flip and numpy.random.rand() > 0.5:

A = numpy.fliplr(A)

B = numpy.fliplr(B)

return A.transpose(2, 0, 1), B.transpose(2, 0, 1)

Example 21

def setup_figure():

f = plt.figure(figsize=(7, 5))

mat_grid = plt.GridSpec(2, 6, .07, .52, .98, .95, .15, .20)

mat_axes = [f.add_subplot(spec) for spec in mat_grid]

sticks_axes, rest_axes = np.array_split(mat_axes, 2)

scatter_grid = plt.GridSpec(1, 6, .07, .30, .98, .49, .15, .05)

scatter_axes = [f.add_subplot(spec) for spec in scatter_grid]

kde_grid = plt.GridSpec(1, 6, .07, .07, .98, .21, .15, .05)

kde_axes = [f.add_subplot(spec) for spec in kde_grid]

cbar_ax = f.add_axes([.04, .62, .015, .26])

return f, sticks_axes, rest_axes, scatter_axes, kde_axes, cbar_ax

Example 22

def partitions(min_val, max_val, n):

"""

Get start/stop boundaries for N partitions.

Args:

min_val (int): The starting value.

max_val (int): The last value.

n (int): The number of partitions.

"""

pts = np.array_split(np.arange(min_val, max_val+1), n)

bounds = []

for pt in pts:

bounds.append((int(pt[0]), int(pt[-1])))

return bounds

Example 23

def fit(self, X, y):

"""Fit a series of independent estimators to the dataset.

Parameters

----------

X : array, shape (n_samples, n_features, n_estimators)

The training input samples. For each data slice, a clone estimator

is fitted independently.

y : array, shape (n_samples,)

The target values.

Returns

-------

self : object

Return self.

"""

self._check_Xy(X, y)

self.estimators_ = list()

# For fitting, the parallelization is across estimators.

parallel, p_func, n_jobs = parallel_func(_sl_fit, self.n_jobs)

estimators = parallel(

p_func(self.base_estimator, split, y)

for split in np.array_split(X, n_jobs, axis=-1))

self.estimators_ = np.concatenate(estimators, 0)

return self

Example 24

def _transform(self, X, method):

"""Aux. function to make parallel predictions/transformation."""

self._check_Xy(X)

method = _check_method(self.base_estimator, method)

if X.shape[-1] != len(self.estimators_):

raise ValueError('The number of estimators does not match '

'X.shape[2]')

# For predictions/transforms the parallelization is across the data and

# not across the estimators to avoid memory load.

parallel, p_func, n_jobs = parallel_func(_sl_transform, self.n_jobs)

X_splits = np.array_split(X, n_jobs, axis=-1)

est_splits = np.array_split(self.estimators_, n_jobs)

y_pred = parallel(p_func(est, x, method)

for (est, x) in zip(est_splits, X_splits))

if n_jobs > 1:

y_pred = np.concatenate(y_pred, axis=1)

else:

y_pred = y_pred[0]

return y_pred

Example 25

def _yield_minibatches_idx(self, n_batches, data_ary, shuffle=True):

indices = np.arange(data_ary.shape[0])

if shuffle:

indices = np.random.permutation(indices)

if n_batches > 1:

remainder = data_ary.shape[0] % n_batches

if remainder:

minis = np.array_split(indices[:-remainder], n_batches)

minis[-1] = np.concatenate((minis[-1],

indices[-remainder:]),

axis=0)

else:

minis = np.array_split(indices, n_batches)

else:

minis = (indices,)

for idx_batch in minis:

yield idx_batch

Example 26

def test_mini_batch_k_means_random_init_partial_fit():

km = MiniBatchKMeans(n_clusters=n_clusters, init="random", random_state=42)

# use the partial_fit API for online learning

for X_minibatch in np.array_split(X, 10):

km.partial_fit(X_minibatch)

# compute the labeling on the complete dataset

labels = km.predict(X)

assert_equal(v_measure_score(true_labels, labels), 1.0)

Example 27

def binned_batch_stream(target_statistics, batch_size, n_batches, n_bins=64):

hist, bins = np.histogram(target_statistics, bins=n_bins)

indx = np.argsort(target_statistics)

indicies_categories = np.array_split(indx, np.cumsum(hist)[:-1])

per_category = batch_size / n_bins

weight_correction = (np.float64(hist) / per_category).astype('float32')

wc = np.repeat(weight_correction, per_category)

for i in xrange(n_batches):

sample = [

np.random.choice(ind, size=per_category, replace=True)

for ind in indicies_categories

]

yield np.hstack(sample), wc

Example 28

def binned_batch_stream(target_statistics, batch_size, n_batches, n_bins=64):

hist, bins = np.histogram(target_statistics, bins=n_bins)

indx = np.argsort(target_statistics)

indicies_categories = np.array_split(indx, np.cumsum(hist)[:-1])

n_samples = target_statistics.shape[0]

per_category = batch_size / n_bins

weight_correction = (n_bins * np.float64(hist) / n_samples).astype('float32')

wc = np.repeat(weight_correction, per_category)

for i in xrange(n_batches):

sample = [

np.random.choice(ind, size=per_category, replace=True)

for ind in indicies_categories

]

yield np.hstack(sample), wc

Example 29

def test_shape_factors(self):

"""

Tests for :func:`array_split.split.shape_factors`.

"""

f = shape_factors(4, 2)

self.assertTrue(_np.all(f == 2))

f = shape_factors(4, 1)

self.assertTrue(_np.all(f == 4))

f = shape_factors(5, 2)

self.assertTrue(_np.all(f == [1, 5]))

f = shape_factors(6, 2)

self.assertTrue(_np.all(f == [2, 3]))

f = shape_factors(6, 3)

self.assertTrue(_np.all(f == [1, 2, 3]))

Example 30

def scale(boxlist, y_scale, x_scale):

"""Scale box coordinates in x and y dimensions.

Args:

boxlist: BoxList holding N boxes

y_scale: float

x_scale: float

Returns:

boxlist: BoxList holding N boxes

"""

y_min, x_min, y_max, x_max = np.array_split(boxlist.get(), 4, axis=1)

y_min = y_scale * y_min

y_max = y_scale * y_max

x_min = x_scale * x_min

x_max = x_scale * x_max

scaled_boxlist = np_box_list.BoxList(np.hstack([y_min, x_min, y_max, x_max]))

fields = boxlist.get_extra_fields()

for field in fields:

extra_field_data = boxlist.get_field(field)

scaled_boxlist.add_field(field, extra_field_data)

return scaled_boxlist

Example 31

def iterbatches(arrays, num_batches=None, batch_size=None, shuffle=True, include_final_partial_batch=True):

assert (num_batches is None) != (batch_size is None), 'Provide num_batches or batch_size, but not both'

arrays = tuple(map(np.asarray, arrays))

n = arrays[0].shape[0]

assert all(a.shape[0] == n for a in arrays[1:])

inds = np.arange(n)

if shuffle: np.random.shuffle(inds)

sections = np.arange(0, n, batch_size)[1:] if num_batches is None else num_batches

for batch_inds in np.array_split(inds, sections):

if include_final_partial_batch or len(batch_inds) == batch_size:

yield tuple(a[batch_inds] for a in arrays)

Example 32

def _gen_init_n_blocks(na, nb, ka, kb):

num_nodes_a = np.arange(na)

n_blocks_a = map(len, np.array_split(num_nodes_a, ka))

num_nodes_b = np.arange(nb)

n_blocks_b = map(len, np.array_split(num_nodes_b, kb))

n_blocks_ = " ".join(map(str, n_blocks_a)) + " " + " ".join(map(str, n_blocks_b))

return n_blocks_

Example 33

def gen_equal_partition(n, total):

all_nodes = np.arange(total)

n_blocks = list(map(len, np.array_split(all_nodes, n)))

return n_blocks

Example 34

def run_par(self, function, **kwargs):

"""

Run a function on the agents in parallel.

"""

columns = kwargs["columns"] if "columns" in kwargs else self.agents.columns

# Garbage collect before creating new processes.

gc.collect()

return pd.concat(self.pool.imap(partial(function, **kwargs),

np.array_split(self.agents[columns],

self.processes * self.splits)))

Example 35

def run_par(self, function, **kwargs):

"""

Run a function on the agents in parallel.

"""

columns = kwargs["columns"] if "columns" in kwargs else self.agents.columns

# Garbage collect before creating new processes.

gc.collect()

return pd.concat(self.pool.imap(partial(function, **kwargs),

np.array_split(self.agents[columns],

self.processes * self.splits)))

Example 36

def split_in_chunks(minibatch, num_splits, flatten_keys=['labels']):

'''Return the splits per device

Return a list of dictionaries, one per device. Each dictionary

contains, for each key, the values that should be allocated on its

device.

'''

# Split the value of each key into chunks

for k, v in minibatch.iteritems():

minibatch[k] = np.array_split(v, num_splits)

if any(k == v for v in flatten_keys):

minibatch[k] = [el.flatten() for el in minibatch[k]]

return map(dict, zip(*[[(k, v) for v in value]

for k, value in minibatch.items()]))

Example 37

def chunk_iterator(dataset, chunk_size=1000):

chunk_indices = np.array_split(np.arange(len(dataset)),

len(dataset)/chunk_size)

for chunk_ixs in chunk_indices:

chunk = dataset[chunk_ixs]

yield (chunk_ixs, chunk)

raise StopIteration

Example 38

def array_split(ary, indices_or_sections, axis=0):

"""Splits an array into multiple sub arrays along a given axis.

This function is almost equivalent to :func:`cupy.split`. The only

difference is that this function allows an integer sections that does not

evenly divide the axis.

.. seealso:: :func:`cupy.split` for more detail, :func:`numpy.array_split`

"""

return core.array_split(ary, indices_or_sections, axis)

Example 39

def split(ary, indices_or_sections, axis=0):

"""Splits an array into multiple sub arrays along a given axis.

Args:

ary (cupy.ndarray): Array to split.

indices_or_sections (int or sequence of ints): A value indicating how

to divide the axis. If it is an integer, then is treated as the

number of sections, and the axis is evenly divided. Otherwise,

the integers indicate indices to split at. Note that the sequence

on the device memory is not allowed.

axis (int): Axis along which the array is split.

Returns:

A list of sub arrays. Each array is a view of the corresponding input

array.

.. seealso:: :func:`numpy.split`

"""

if ary.ndim <= axis:

raise IndexError('Axis exceeds ndim')

size = ary.shape[axis]

if numpy.isscalar(indices_or_sections):

if size % indices_or_sections != 0:

raise ValueError(

'indices_or_sections must divide the size along the axes.\n'

'If you want to split the array into non-equally-sized '

'arrays, use array_split instead.')

return array_split(ary, indices_or_sections, axis)

Example 40

def iterbatches(arrays, *, num_batches=None, batch_size=None, shuffle=True, include_final_partial_batch=True):

assert (num_batches is None) != (batch_size is None), 'Provide num_batches or batch_size, but not both'

arrays = tuple(map(np.asarray, arrays))

n = arrays[0].shape[0]

assert all(a.shape[0] == n for a in arrays[1:])

inds = np.arange(n)

if shuffle: np.random.shuffle(inds)

sections = np.arange(0, n, batch_size)[1:] if num_batches is None else num_batches

for batch_inds in np.array_split(inds, sections):

if include_final_partial_batch or len(batch_inds) == batch_size:

yield tuple(a[batch_inds] for a in arrays)

Example 41

def trim_data(data, resolution):

r = []

for i in numpy.array_split(data, resolution):

if len(i) > 0:

r.append(numpy.average(i))

return r

Example 42

def test_latlon2pix_edges(pix_size_single, origin_point, is_flipped,

num_chunks, chunk_position):

img = make_image(pix_size_single, origin_point, is_flipped,

num_chunks, chunk_position)

chunk_idx = img.chunk_idx

res_x = img._full_res[0]

res_y = img._full_res[1]

pix_size = (img.pixsize_x, img.pixsize_y)

origin = (img._start_lon, img._start_lat)

# compute chunks

lons = np.arange(res_x + 1) * pix_size[0] + origin[0] # right edge +1

all_lats = np.arange(res_y) * pix_size[1] + origin[1]

lats_chunks = np.array_split(all_lats, num_chunks)[chunk_idx]

pix_x = np.concatenate((np.arange(res_x), [res_x - 1]))

pix_y_chunks = range(lats_chunks.shape[0])

if chunk_position == 'end':

pix_y = np.concatenate((pix_y_chunks, [pix_y_chunks[-1]]))

lats = np.concatenate((lats_chunks, [res_y * pix_size[1] + origin[1]]))

else:

pix_y = pix_y_chunks

lats = lats_chunks

d = np.array([[a, b] for a in lons for b in lats])

xy = img.lonlat2pix(d)

true_xy = np.array([[a, b] for a in pix_x for b in pix_y])

assert np.all(xy == true_xy)

Example 43

def split_cfold(nsamples, k=5, seed=None):

"""

Function that returns indices for splitting data into random folds.

Parameters

----------

nsamples: int

the number of samples in the dataset

k: int, optional

the number of folds

seed: int, optional

random seed to provide to numpy

Returns

-------

cvinds: list

list of arrays of length k, each with approximate shape (nsamples /

k,) of indices. These indices are randomly permuted (without

replacement) of assignments to each fold.

cvassigns: ndarray

array of shape (nsamples,) with each element in [0, k), that can be

used to assign data to a fold. This corresponds to the indices of

cvinds.

"""

np.random.seed(seed)

pindeces = np.random.permutation(nsamples)

cvinds = np.array_split(pindeces, k)

cvassigns = np.zeros(nsamples, dtype=int)

for n, inds in enumerate(cvinds):

cvassigns[inds] = n

return cvinds, cvassigns

Example 44

def fit(self, x, y, *args, **kwargs):

# set a different random seed for each thread

np.random.seed(self.random_state + mpiops.chunk_index)

if self.parallel:

process_rfs = np.array_split(range(self.forests),

mpiops.chunks)[mpiops.chunk_index]

else:

process_rfs = range(self.forests)

for t in process_rfs:

print('training forest {} using '

'process {}'.format(t, mpiops.chunk_index))

# change random state in each forest

self.kwargs['random_state'] = np.random.randint(0, 10000)

rf = RandomForestTransformed(

target_transform=self.target_transform,

n_estimators=self.n_estimators,

**self.kwargs

)

rf.fit(x, y)

if self.parallel: # used in training

pk_f = join(self.temp_dir,

'rf_model_{}.pk'.format(t))

else: # used when parallel is false, i.e., during x-val

pk_f = join(self.temp_dir,

'rf_model_{}_{}.pk'.format(t, mpiops.chunk_index))

with open(pk_f, 'wb') as fp:

pickle.dump(rf, fp)

if self.parallel:

mpiops.comm.barrier()

# Mark that we are now trained

self._trained = True

Example 45

def kmean_distance2(x, C):

"""Compute squared euclidian distance to the nearest cluster centre

Parameters

----------

x : ndarray

(n, d) array of n d-dimensional points

C : ndarray

(k, d) array of k cluster centres

Returns

-------

d2_x : ndarray

(n,) length array of distances from each x to the nearest centre

"""

# To save memory we partition the computation

nsplits = max(1, int(x.shape[0]/distance_partition_size))

splits = np.array_split(x, nsplits)

d2_x = np.empty(x.shape[0])

idx = 0

for x_i in splits:

n_i = x_i.shape[0]

D2_x = scipy.spatial.distance.cdist(x_i, C, metric='sqeuclidean')

d2_x[idx:idx + n_i] = np.amin(D2_x, axis=1)

idx += n_i

return d2_x

Example 46

def compute_weights(x, C):

"""Number of points in x assigned to each centre c in C

Parameters

----------

x : ndarray

(n, d) array of n d-dimensional points

C : ndarray

(k, d) array of k cluster centres

Returns

-------

weights : ndarray

(k,) length array giving number of x closest to each c in C

"""

nsplits = max(1, int(x.shape[0]/distance_partition_size))

splits = np.array_split(x, nsplits)

closests = np.empty(x.shape[0], dtype=int)

idx = 0

for x_i in splits:

n_i = x_i.shape[0]

D2_x = scipy.spatial.distance.cdist(x_i, C, metric='sqeuclidean')

closests[idx: idx+n_i] = np.argmin(D2_x, axis=1)

idx += n_i

weights = np.bincount(closests, minlength=C.shape[0])

return weights

Example 47

def reseed_point(X, C, index):

""" Re-initialise the centre of a class if it loses all its members

This should almost never happen. If it does, find the point furthest

from all the other cluster centres and use that. Maybe a bad idea but

a decent first pass

Parameters

----------

X : ndarray

(n, d) array of points

C : ndarray

(k, d) array of cluster centres

index : int >= 0

index between 0..k-1 of the cluster that has lost it's points

Returns

-------

new_point : ndarray

d-dimensional point for replacing the empty cluster centre.

"""

log.info("Reseeding class with no members")

nsplits = max(1, int(X.shape[0]/distance_partition_size))

splits = np.array_split(X, nsplits)

empty_index = np.ones(C.shape[0], dtype=bool)

empty_index[index] = False

local_candidate = None

local_cost = 1e23

for x_i in splits:

D2_x = scipy.spatial.distance.cdist(x_i, C, metric='sqeuclidean')

costs = np.sum(D2_x[:, empty_index], axis=1)

potential_idx = np.argmax(costs)

potential_cost = costs[potential_idx]

if potential_cost < local_cost:

local_candidate = x_i[potential_idx]

local_cost = potential_cost

best_pernode = mpiops.comm.allgather(local_cost)

best_node = np.argmax(best_pernode)

new_point = mpiops.comm.bcast(local_candidate, root=best_node)

return new_point

Example 48

def __init__(self, shape, bbox, crs, name, n_subchunks, outputdir,

band_tags=None):

# affine

self.A, _, _ = image.bbox2affine(bbox[1, 0], bbox[0, 0],

bbox[0, 1], bbox[1, 1],

shape[0], shape[1])

self.shape = shape

self.outbands = len(band_tags)

self.bbox = bbox

self.name = name

self.outputdir = outputdir

self.n_subchunks = n_subchunks

self.sub_starts = [k[0] for k in np.array_split(

np.arange(self.shape[1]),

mpiops.chunks * self.n_subchunks)]

# file tags don't have spaces

if band_tags:

file_tags = ["_".join(k.lower().split()) for k in band_tags]

else:

file_tags = [str(k) for k in range(self.outbands)]

band_tags = file_tags

if mpiops.chunk_index == 0:

# create a file for each band

self.files = []

for band in range(self.outbands):

output_filename = os.path.join(outputdir, name + "_" +

file_tags[band] + ".tif")

f = rasterio.open(output_filename, 'w', driver='GTiff',

width=self.shape[0], height=self.shape[1],

dtype=np.float32, count=1,

crs=crs,

transform=self.A,

nodata=self.nodata_value)

f.update_tags(1, image_type=band_tags[band])

self.files.append(f)

Example 49

def gdalaverage(input_dir, out_dir, size):

"""

average data using gdal's averaging method.

Parameters

----------

input_dir: str

input dir name of the tifs that needs to be averaged

out_dir: str

output dir name

size: int, optional

size of kernel

Returns

-------

"""

input_dir = abspath(input_dir)

log.info('Reading tifs from {}'.format(input_dir))

tifs = glob.glob(join(input_dir, '*.tif'))

process_tifs = np.array_split(tifs, mpiops.chunks)[mpiops.chunk_index]

for tif in process_tifs:

data_set = gdal.Open(tif, gdal.GA_ReadOnly)

# band = data_set.GetRasterBand(1)

# data_type = gdal.GetDataTypeName(band.DataType)

# data = band.ReadAsArray()

# no_data_val = band.GetNoDataValue()

# averaged_data = filter_data(data, size, no_data_val)

log.info('Calculated average for {}'.format(basename(tif)))

output_file = join(out_dir, 'average_' + basename(tif))

src_gt = data_set.GetGeoTransform()

tmp_file = '/tmp/tmp_{}.tif'.format(mpiops.chunk_index)

resample_cmd = [TRANSLATE] + [tif, tmp_file] + \

['-tr', str(src_gt[1]*size), str(src_gt[1]*size)] + \

['-r', 'bilinear']

check_call(resample_cmd)

rollback_cmd = [TRANSLATE] + [tmp_file, output_file] + \

['-tr', str(src_gt[1]), str(src_gt[1])]

check_call(rollback_cmd)

log.info('Finished converting {}'.format(basename(tif)))

Example 50

def mean(input_dir, out_dir, size, func, partitions, mask):

input_dir = abspath(input_dir)

if isdir(input_dir):

log.info('Reading tifs from {}'.format(input_dir))

tifs = glob.glob(join(input_dir, '*.tif'))

else:

assert isfile(input_dir)

tifs = [input_dir]

process_tifs = np.array_split(tifs, mpiops.chunks)[mpiops.chunk_index]

for tif in process_tifs:

log.info('Starting to average {}'.format(basename(tif)))

treat_file(tif, out_dir, size, func, partitions, mask)

log.info('Finished averaging {}'.format(basename(tif)))

木-Star

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫