按固定条数拆分
谷歌的Lists#partition方法
方法引用地址:com.google.common.collect.Lists#partition
源码解读
Lists.java
package com.google.common.collect;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkElementIndex;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndex;
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.CollectPreconditions.checkNonnegative;
import static com.google.common.collect.CollectPreconditions.checkRemove;
import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtCompatible;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.math.IntMath;
import com.google.common.primitives.Ints;
import java.io.Serializable;
import java.math.RoundingMode;
import java.util.AbstractList;
import java.util.AbstractSequentialList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import java.util.RandomAccess;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.Predicate;
import javax.annotation.CheckForNull;
import org.checkerframework.checker.nullness.qual.Nullable;
/**
* Static utility methods pertaining to {@link List} instances. Also see this class's counterparts
* {@link Sets}, {@link Maps} and {@link Queues}.
*
* <p>See the Guava User Guide article on <a href=
* "https://github.com/google/guava/wiki/CollectionUtilitiesExplained#lists">{@code Lists}</a>.
*
* @author Kevin Bourrillion
* @author Mike Bostock
* @author Louis Wasserman
* @since 2.0
*/
@GwtCompatible(emulated = true)
@ElementTypesAreNonnullByDefault
public final class Lists {
private Lists() {
}
/**
* Returns consecutive {@linkplain List#subList(int, int) sublists} of a list, each of the same
* size (the final list may be smaller). For example, partitioning a list containing {@code [a, b,
* c, d, e]} with a partition size of 3 yields {@code [[a, b, c], [d, e]]} -- an outer list
* containing two inner lists of three and two elements, all in the original order.
*
* <p>The outer list is unmodifiable, but reflects the latest state of the source list. The inner
* lists are sublist views of the original list, produced on demand using {@link List#subList(int,
* int)}, and are subject to all the usual caveats about modification as explained in that API.
*
* @param list the list to return consecutive sublists of
* @param size the desired size of each sublist (the last may be smaller)
* @return a list of consecutive sublists
* @throws IllegalArgumentException if {@code partitionSize} is nonpositive
*/
public static <T extends @Nullable Object> List<List<T>> partition(List<T> list, int size) {
checkNotNull(list);
checkArgument(size > 0);
return (list instanceof RandomAccess) ? new RandomAccessPartition<>(list, size) : new Partition<>(list, size);
}
private static class Partition<T extends @Nullable Object> extends AbstractList<List<T>> {
final List<T> list;
final int size;
Partition(List<T> list, int size) {
this.list = list;
this.size = size;
}
@Override
public List<T> get(int index) {
checkElementIndex(index, size());
int start = index * size;
int end = Math.min(start + size, list.size());
return list.subList(start, end);
}
@Override
public int size() {
return IntMath.divide(list.size(), size, RoundingMode.CEILING);
}
@Override
public boolean isEmpty() {
return list.isEmpty();
}
}
private static class RandomAccessPartition<T extends @Nullable Object> extends Partition<T> implements RandomAccess {
RandomAccessPartition(List<T> list, int size) {
super(list, size);
}
}
}
RandomAccess.java
package java.util;
/**
* Marker interface used by <tt>List</tt> implementations to indicate that
* they support fast (generally constant time) random access. The primary
* purpose of this interface is to allow generic algorithms to alter their
* behavior to provide good performance when applied to either random or
* sequential access lists.
*
* <p>The best algorithms for manipulating random access lists (such as
* <tt>ArrayList</tt>) can produce quadratic behavior when applied to
* sequential access lists (such as <tt>LinkedList</tt>). Generic list
* algorithms are encouraged to check whether the given list is an
* <tt>instanceof</tt> this interface before applying an algorithm that would
* provide poor performance if it were applied to a sequential access list,
* and to alter their behavior if necessary to guarantee acceptable
* performance.
*
* <p>It is recognized that the distinction between random and sequential
* access is often fuzzy. For example, some <tt>List</tt> implementations
* provide asymptotically linear access times if they get huge, but constant
* access times in practice. Such a <tt>List</tt> implementation
* should generally implement this interface. As a rule of thumb, a
* <tt>List</tt> implementation should implement this interface if,
* for typical instances of the class, this loop:
* <pre>
* for (int i=0, n=list.size(); i < n; i++)
* list.get(i);
* </pre>
* runs faster than this loop:
* <pre>
* for (Iterator i=list.iterator(); i.hasNext(); )
* i.next();
* </pre>
*
* <p>This interface is a member of the
* <a href="{@docRoot}/../technotes/guides/collections/index.html">
* Java Collections Framework</a>.
*
* @since 1.4
*/
public interface RandomAccess {
}
说明:
-
Lists#partition方法并不是直接返回一个简单的List<List>,而是返回了一个 Partition 对象,这个对象是一个实现了List接口的自定义类。当调用 ListUtils.partition 方法时,得到的是一个“虚拟”的列表,其内部的get方法会在请求特定索引的元素时动态地返回原始列表的一个子列表。
这里的关键是Partition类继承自AbstractList并重写了get方法。当使用for-each循环或其他方式迭代这个返回的列表时,实际上是在迭代Partition对象。每次迭代访问到某个元素时,Partition#get方法会被调用来提供对应索引的子列表。
这种方式的优点是延迟计算(惰性求值):只有实际需要某个分区时,它才会被创建。这可以提高效率,尤其是只需要列表中某些分区或者只需要迭代一次时。 -
RandomAccessPartition实现了RandomAccess接口,用于表示支持快速(通常是恒定时间)随机访问。
使用示例
// 生成包含1-400的列表
Long[] idArray = new Long[0];
for (long i = 1; i <= 400; i++) {
idArray = ArrayUtils.add(idArray, i);
}
List<Long> allIdList = Arrays.asList(idArray);
// 每100条拆分1次
List<List<Long>> partitionIdList = Lists.partition(allIdList, 100);
注意事项
- **不可修改的外层列表:**分区列表是不可修改的,这意味着不能直接添加或删除分区,但可以修改分区内部的元素,因为它们是原始列表的视图。
- **子列表的修改问题:**虽然分区列表本身不可修改,但每个分区都是原始列表的视图,因此对分区的修改会反映到原始列表中。这一点在注释中(The outer list is unmodifiable, but reflects the latest state of the source list.)也有提到,需要注意。
- **性能和内存效率:**由于使用了subList,分区的性能和内存效率与原始列表紧密相关。如果原始列表很大,创建太多的分区可能会导致性能问题。
阿帕奇的ListUtils#partition方法
方法引用地址:org.apache.commons.collections4.ListUtils#partition
源码解读
ListUtils.java
package org.apache.commons.collections4;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.collections4.bag.HashBag;
import org.apache.commons.collections4.functors.DefaultEquator;
import org.apache.commons.collections4.list.FixedSizeList;
import org.apache.commons.collections4.list.LazyList;
import org.apache.commons.collections4.list.PredicatedList;
import org.apache.commons.collections4.list.TransformedList;
import org.apache.commons.collections4.list.UnmodifiableList;
import org.apache.commons.collections4.sequence.CommandVisitor;
import org.apache.commons.collections4.sequence.EditScript;
import org.apache.commons.collections4.sequence.SequencesComparator;
/**
* Provides utility methods and decorators for {@link List} instances.
*
* @since 1.0
*/
public class ListUtils {
/**
* <code>ListUtils</code> should not normally be instantiated.
*/
private ListUtils() {
}
/**
* Returns consecutive {@link List#subList(int, int) sublists} of a
* list, each of the same size (the final list may be smaller). For example,
* partitioning a list containing {@code [a, b, c, d, e]} with a partition
* size of 3 yields {@code [[a, b, c], [d, e]]} -- an outer list containing
* two inner lists of three and two elements, all in the original order.
* <p>
* The outer list is unmodifiable, but reflects the latest state of the
* source list. The inner lists are sublist views of the original list,
* produced on demand using {@link List#subList(int, int)}, and are subject
* to all the usual caveats about modification as explained in that API.
* <p>
* Adapted from http://code.google.com/p/guava-libraries/
*
* @param <T> the element type
* @param list the list to return consecutive sublists of
* @param size the desired size of each sublist (the last may be smaller)
* @return a list of consecutive sublists
* @throws NullPointerException if list is null
* @throws IllegalArgumentException if size is not strictly positive
* @since 4.0
*/
public static <T> List<List<T>> partition(final List<T> list, final int size) {
if (list == null) {
throw new NullPointerException("List must not be null");
}
if (size <= 0) {
throw new IllegalArgumentException("Size must be greater than 0");
}
return new Partition<>(list, size);
}
/**
* Provides a partition view on a {@link List}.
* @since 4.0
*/
private static class Partition<T> extends AbstractList<List<T>> {
private final List<T> list;
private final int size;
private Partition(final List<T> list, final int size) {
this.list = list;
this.size = size;
}
@Override
public List<T> get(final int index) {
final int listSize = size();
if (index < 0) {
throw new IndexOutOfBoundsException("Index " + index + " must not be negative");
}
if (index >= listSize) {
throw new IndexOutOfBoundsException("Index " + index + " must be less than size " +
listSize);
}
final int start = index * size;
final int end = Math.min(start + size, list.size());
return list.subList(start, end);
}
@Override
public int size() {
return (int) Math.ceil((double) list.size() / (double) size);
}
@Override
public boolean isEmpty() {
return list.isEmpty();
}
}
}
说明:
- 重点注意注释Adapted from http://code.google.com/p/guava-libraries/(改编自谷歌),也能发现代码和谷歌的com.google.common.collect.Lists#partition方法几乎一致,只是少了RandomAccessPartition的相关代码。
使用示例
// 生成包含1-400的列表
Long[] idArray = new Long[0];
for (long i = 1; i <= 400; i++) {
idArray = ArrayUtils.add(idArray, i);
}
List<Long> allIdList = Arrays.asList(idArray);
// 每100条拆分1次
List<List<Long>> partitionIdList = ListUtils.partition(allIdList, 100);
注意事项
- 同谷歌的Lists#partition方法几乎一样。
平均分成N份
Stream的方法
使用示例
// 生成包含1-400的列表
Long[] idArray = new Long[0];
for (long i = 1; i <= 400; i++) {
idArray = ArrayUtils.add(idArray, i);
}
List<Long> allIdList = Arrays.asList(idArray);
// 按每10个一组分割
Integer maxSize = 10;
// 计算切分次数
Integer limit = (allIdList.size() + maxSize - 1) / maxSize;
// Stream.iterate:生成无限序列
List<List<Long>> partitionIdList = Stream.iterate(0, n -> n + 1)
// 限制序列的长度为limit
.limit(limit)
// 将一个序列流(sequential stream)转换为并行流(parallel stream),以便可以并行处理每个分组。
.parallel()
// 返回一个流,该流包含将给定函数应用于此流的元素的结果。
.map(
// 使用skip(a * maxSize) 跳过前a * maxSize个元素,这样每个子列表都会从正确的位置开始。
a -> allIdList.stream().skip(a * maxSize)
// 限制每个子列表的最大大小
.limit(maxSize)
.parallel()
.collect(Collectors.toList())
)
.collect(Collectors.toList());
说明:
- 核心方法是
Stream.iterate
和map
的组合使用,以及skip
和limit
的链式调用来生成所需的子列表。并行流的使用(parallel()
)是为了提高分割操作的性能,尤其是在处理大型列表时。
注意事项
- 需要注意的是,由于使用了并行流,生成的子列表内部元素的顺序可能会与原始列表不同,因为并行操作不保证元素的处理顺序。此外,如果原始列表非常大,使用并行流可能会带来线程管理的开销,实际性能提升需要根据具体情况进行测试。