拆分List的方法

付聪1210

已于 2024-08-27 07:17:46 修改

阅读量1.7k

点赞数 48

分类专栏：数据结构和算法文章标签： list

于 2024-08-27 06:52:40 首次发布

本文链接：https://blog.csdn.net/Fu_Cong/article/details/141578591

版权

数据结构和算法专栏收录该内容

7 篇文章

订阅专栏

按固定条数拆分

谷歌的Lists#partition方法

方法引用地址：com.google.common.collect.Lists#partition

源码解读

Lists.java

package com.google.common.collect;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkElementIndex;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndex;
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.CollectPreconditions.checkNonnegative;
import static com.google.common.collect.CollectPreconditions.checkRemove;

import com.google.common.annotations.Beta;
import com.google.common.annotations.GwtCompatible;
import com.google.common.annotations.GwtIncompatible;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Function;
import com.google.common.base.Objects;
import com.google.common.math.IntMath;
import com.google.common.primitives.Ints;
import java.io.Serializable;
import java.math.RoundingMode;
import java.util.AbstractList;
import java.util.AbstractSequentialList;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.ListIterator;
import java.util.NoSuchElementException;
import java.util.RandomAccess;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.function.Predicate;
import javax.annotation.CheckForNull;
import org.checkerframework.checker.nullness.qual.Nullable;

/**
 * Static utility methods pertaining to {@link List} instances. Also see this class's counterparts
 * {@link Sets}, {@link Maps} and {@link Queues}.
 *
 * <p>See the Guava User Guide article on <a href=
 * "https://github.com/google/guava/wiki/CollectionUtilitiesExplained#lists">{@code Lists}</a>.
 *
 * @author Kevin Bourrillion
 * @author Mike Bostock
 * @author Louis Wasserman
 * @since 2.0
 */
@GwtCompatible(emulated = true)
@ElementTypesAreNonnullByDefault
public final class Lists {
    
    private Lists() {
        
    }
    
    /**
     * Returns consecutive {@linkplain List#subList(int, int) sublists} of a list, each of the same
     * size (the final list may be smaller). For example, partitioning a list containing {@code [a, b,
     * c, d, e]} with a partition size of 3 yields {@code [[a, b, c], [d, e]]} -- an outer list
     * containing two inner lists of three and two elements, all in the original order.
     *
     * <p>The outer list is unmodifiable, but reflects the latest state of the source list. The inner
     * lists are sublist views of the original list, produced on demand using {@link List#subList(int,
     * int)}, and are subject to all the usual caveats about modification as explained in that API.
     *
     * @param list the list to return consecutive sublists of
     * @param size the desired size of each sublist (the last may be smaller)
     * @return a list of consecutive sublists
     * @throws IllegalArgumentException if {@code partitionSize} is nonpositive
     */
    public static <T extends @Nullable Object> List<List<T>> partition(List<T> list, int size) {
        checkNotNull(list);
        checkArgument(size > 0);
        return (list instanceof RandomAccess) ? new RandomAccessPartition<>(list, size) : new Partition<>(list, size);
    }
    
    private static class Partition<T extends @Nullable Object> extends AbstractList<List<T>> {
        final List<T> list;
        final int size;
    
        Partition(List<T> list, int size) {
            this.list = list;
            this.size = size;
        }
    
        @Override
        public List<T> get(int index) {
            checkElementIndex(index, size());
            int start = index * size;
            int end = Math.min(start + size, list.size());
            return list.subList(start, end);
        }
    
        @Override
        public int size() {
            return IntMath.divide(list.size(), size, RoundingMode.CEILING);
        }
    
        @Override
        public boolean isEmpty() {
            return list.isEmpty();
        }
    }
    
    private static class RandomAccessPartition<T extends @Nullable Object> extends Partition<T> implements RandomAccess {
        RandomAccessPartition(List<T> list, int size) {
            super(list, size);
        }
    }
}

RandomAccess.java

package java.util;

/**
 * Marker interface used by <tt>List</tt> implementations to indicate that
 * they support fast (generally constant time) random access.  The primary
 * purpose of this interface is to allow generic algorithms to alter their
 * behavior to provide good performance when applied to either random or
 * sequential access lists.
 *
 * <p>The best algorithms for manipulating random access lists (such as
 * <tt>ArrayList</tt>) can produce quadratic behavior when applied to
 * sequential access lists (such as <tt>LinkedList</tt>).  Generic list
 * algorithms are encouraged to check whether the given list is an
 * <tt>instanceof</tt> this interface before applying an algorithm that would
 * provide poor performance if it were applied to a sequential access list,
 * and to alter their behavior if necessary to guarantee acceptable
 * performance.
 *
 * <p>It is recognized that the distinction between random and sequential
 * access is often fuzzy.  For example, some <tt>List</tt> implementations
 * provide asymptotically linear access times if they get huge, but constant
 * access times in practice.  Such a <tt>List</tt> implementation
 * should generally implement this interface.  As a rule of thumb, a
 * <tt>List</tt> implementation should implement this interface if,
 * for typical instances of the class, this loop:
 * <pre>
 *     for (int i=0, n=list.size(); i &lt; n; i++)
 *         list.get(i);
 * </pre>
 * runs faster than this loop:
 * <pre>
 *     for (Iterator i=list.iterator(); i.hasNext(); )
 *         i.next();
 * </pre>
 *
 * <p>This interface is a member of the
 * <a href="{@docRoot}/../technotes/guides/collections/index.html">
 * Java Collections Framework</a>.
 *
 * @since 1.4
 */
public interface RandomAccess {
}

说明：

Lists#partition方法并不是直接返回一个简单的List<List>，而是返回了一个 Partition 对象，这个对象是一个实现了List接口的自定义类。当调用 ListUtils.partition 方法时，得到的是一个“虚拟”的列表，其内部的get方法会在请求特定索引的元素时动态地返回原始列表的一个子列表。

这里的关键是Partition类继承自AbstractList并重写了get方法。当使用for-each循环或其他方式迭代这个返回的列表时，实际上是在迭代Partition对象。每次迭代访问到某个元素时，Partition#get方法会被调用来提供对应索引的子列表。
这种方式的优点是延迟计算（惰性求值）：只有实际需要某个分区时，它才会被创建。这可以提高效率，尤其是只需要列表中某些分区或者只需要迭代一次时。
RandomAccessPartition实现了RandomAccess接口，用于表示支持快速（通常是恒定时间）随机访问。

使用示例

// 生成包含1-400的列表
Long[] idArray = new Long[0];
for (long i = 1; i <= 400; i++) {
    idArray = ArrayUtils.add(idArray, i);
}
List<Long> allIdList = Arrays.asList(idArray);

// 每100条拆分1次
List<List<Long>> partitionIdList = Lists.partition(allIdList, 100);

注意事项

**不可修改的外层列表：**分区列表是不可修改的，这意味着不能直接添加或删除分区，但可以修改分区内部的元素，因为它们是原始列表的视图。
**子列表的修改问题：**虽然分区列表本身不可修改，但每个分区都是原始列表的视图，因此对分区的修改会反映到原始列表中。这一点在注释中（The outer list is unmodifiable, but reflects the latest state of the source list.）也有提到，需要注意。
**性能和内存效率：**由于使用了subList，分区的性能和内存效率与原始列表紧密相关。如果原始列表很大，创建太多的分区可能会导致性能问题。

阿帕奇的ListUtils#partition方法

方法引用地址：org.apache.commons.collections4.ListUtils#partition

源码解读

ListUtils.java

package org.apache.commons.collections4;

import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

import org.apache.commons.collections4.bag.HashBag;
import org.apache.commons.collections4.functors.DefaultEquator;
import org.apache.commons.collections4.list.FixedSizeList;
import org.apache.commons.collections4.list.LazyList;
import org.apache.commons.collections4.list.PredicatedList;
import org.apache.commons.collections4.list.TransformedList;
import org.apache.commons.collections4.list.UnmodifiableList;
import org.apache.commons.collections4.sequence.CommandVisitor;
import org.apache.commons.collections4.sequence.EditScript;
import org.apache.commons.collections4.sequence.SequencesComparator;

/**
 * Provides utility methods and decorators for {@link List} instances.
 *
 * @since 1.0
 */
public class ListUtils {

    /**
     * <code>ListUtils</code> should not normally be instantiated.
     */
    private ListUtils() {
        
    }

    /**
     * Returns consecutive {@link List#subList(int, int) sublists} of a
     * list, each of the same size (the final list may be smaller). For example,
     * partitioning a list containing {@code [a, b, c, d, e]} with a partition
     * size of 3 yields {@code [[a, b, c], [d, e]]} -- an outer list containing
     * two inner lists of three and two elements, all in the original order.
     * <p>
     * The outer list is unmodifiable, but reflects the latest state of the
     * source list. The inner lists are sublist views of the original list,
     * produced on demand using {@link List#subList(int, int)}, and are subject
     * to all the usual caveats about modification as explained in that API.
     * <p>
     * Adapted from http://code.google.com/p/guava-libraries/
     *
     * @param <T> the element type
     * @param list  the list to return consecutive sublists of
     * @param size  the desired size of each sublist (the last may be smaller)
     * @return a list of consecutive sublists
     * @throws NullPointerException if list is null
     * @throws IllegalArgumentException if size is not strictly positive
     * @since 4.0
     */
    public static <T> List<List<T>> partition(final List<T> list, final int size) {
        if (list == null) {
            throw new NullPointerException("List must not be null");
        }
        if (size <= 0) {
            throw new IllegalArgumentException("Size must be greater than 0");
        }
        return new Partition<>(list, size);
    }
    
    /**
     * Provides a partition view on a {@link List}.
     * @since 4.0
     */
    private static class Partition<T> extends AbstractList<List<T>> {
        private final List<T> list;
        private final int size;
    
        private Partition(final List<T> list, final int size) {
            this.list = list;
            this.size = size;
        }
    
        @Override
        public List<T> get(final int index) {
            final int listSize = size();
            if (index < 0) {
                throw new IndexOutOfBoundsException("Index " + index + " must not be negative");
            }
            if (index >= listSize) {
                throw new IndexOutOfBoundsException("Index " + index + " must be less than size " +
                                                    listSize);
            }
            final int start = index * size;
            final int end = Math.min(start + size, list.size());
            return list.subList(start, end);
        }
    
        @Override
        public int size() {
            return (int) Math.ceil((double) list.size() / (double) size);
        }
    
        @Override
        public boolean isEmpty() {
            return list.isEmpty();
        }
    }
    
}

说明：

重点注意注释Adapted from http://code.google.com/p/guava-libraries/（改编自谷歌），也能发现代码和谷歌的com.google.common.collect.Lists#partition方法几乎一致，只是少了RandomAccessPartition的相关代码。

使用示例

// 生成包含1-400的列表
Long[] idArray = new Long[0];
for (long i = 1; i <= 400; i++) {
    idArray = ArrayUtils.add(idArray, i);
}
List<Long> allIdList = Arrays.asList(idArray);

// 每100条拆分1次
List<List<Long>> partitionIdList = ListUtils.partition(allIdList, 100);

注意事项

同谷歌的Lists#partition方法几乎一样。

平均分成N份

Stream的方法

使用示例

// 生成包含1-400的列表
Long[] idArray = new Long[0];
for (long i = 1; i <= 400; i++) {
    idArray = ArrayUtils.add(idArray, i);
}
List<Long> allIdList = Arrays.asList(idArray);

// 按每10个一组分割
Integer maxSize = 10;
// 计算切分次数
Integer limit = (allIdList.size() + maxSize - 1) / maxSize;
// Stream.iterate：生成无限序列
List<List<Long>> partitionIdList = Stream.iterate(0, n -> n + 1)
    // 限制序列的长度为limit
    .limit(limit)
    // 将一个序列流（sequential stream）转换为并行流（parallel stream），以便可以并行处理每个分组。
    .parallel()
    // 返回一个流，该流包含将给定函数应用于此流的元素的结果。 
    .map(
        // 使用skip(a * maxSize) 跳过前a * maxSize个元素，这样每个子列表都会从正确的位置开始。
        a -> allIdList.stream().skip(a * maxSize)
            // 限制每个子列表的最大大小
            .limit(maxSize)
            .parallel()
            .collect(Collectors.toList())
    )
    .collect(Collectors.toList());