分批截取list && 分批读取csv文件 && Stream 大数据量List分批处理切割
背景:
有时候我们的List集合中的数据非常大,当需要进行数据库操作或远程调用时,大批数据一次性发送会降低吞吐量,影响性能,或者某些错误数据造成所有数据的回滚,这时我们会希望将数据量拆分成小集合进行分批处理,降低内存消耗。
java 1.7分批处理List
public class ListSplitDemo {
public static void main(String[] args) {
List<String> list = new ArrayList<>();
list.add("1");
list.add("2");
list.add("3");
list.add("4");
list.add("5");
list.add("6");
list.add("7");
list.add("8");
list.add("9");
int total = list.size();//总记录
int splitSize = 5;
int count = (total + splitSize - 1) / splitSize;//多少次
List<String> subList = new ArrayList<>();
for (int i = 0; i < count; i++) {
if ((i + 1) * splitSize < total) {
subList = list.subList(i * splitSize, (i + 1) * splitSize);
} else {
subList = list.subList(i * splitSize, total);
}
System.out.println(subList);
}
}
}
public static <T> List<List<T>> averageAssign(List<T> source, int n) {
List<List<T>> result = new ArrayList<>();
int remainder = source.size() % n; //(先计算出余数)
int number = source.size() / n; //然后是商
int offset = 0;//偏移量
for (int i = 0; i < n; i++) {
List<T> value;
if (remainder > 0) {
value = source.subList(i * number + offset, (i + 1) * number + offset + 1);
remainder--;
offset++;
} else {
value = source.subList(i * number + offset, (i + 1) * number + offset);
}
result.add(value);
}
return result;
}
分批读取csv文件:
public class CsvDemo {
public static void main(String[] args) throws Exception {
String csvFile = "D:\\test\\test.csv";
CsvReader reader = new CsvReader(csvFile, ',', Charset.forName("gbk"));
long lineNumber = getLineNumber(new File(csvFile)) - 1;
System.out.println("记录数:" + lineNumber + "条");
reader.readHeaders();
String[] headers = reader.getHeaders();
System.out.println("标题个数:" + headers.length);
int size = 2;//每次取多少行
List<String[]> list = new ArrayList();
while (reader.readRecord()) {
list.add(reader.getValues());
if (lineNumber == 0) return;
if (lineNumber < size) {
handle(list);
}
if (lineNumber > size && list.size() % size == 0) {
handle(list);
list.clear();
}
}
if(list.size()>0){
handle(list);
}
}
private static void handle(List<String[]> list) {
for(int i=0;i<list.size();i++){
System.out.print(Arrays.asList(list.get(i)));
}
System.out.println();
}
public static long getLineNumber(File file) {
if (file.exists()) {
try {
FileReader fileReader = new FileReader(file);
LineNumberReader lineNumberReader = new LineNumberReader(fileReader);
lineNumberReader.skip(Long.MAX_VALUE);
long lines = lineNumberReader.getLineNumber();
fileReader.close();
lineNumberReader.close();
return lines;
} catch (Exception e) {
e.printStackTrace();
}
}
return 0;
}
}
java 8 Stream 大数据量List分批处理切割
public class StreamHandList {
//按每3个一组分割
private static final Integer MAX_SEND = 3;
public static void main(String[] args) {
List<Integer> list = Arrays.asList(1, 2, 3, 4, 5, 6, 7);
int limit = countStep(list.size());
//方法一:使用流遍历操作
List<List<Integer>> mglist = new ArrayList<>();
Stream.iterate(0, n -> n + 1).limit(limit).forEach(i -> {
mglist.add(list.stream().skip(i * MAX_SEND).limit(MAX_SEND).collect(Collectors.toList()));
});
System.out.println(mglist);
//方法二:获取分割后的集合
List<List<Integer>> splitList = Stream.iterate(0, n -> n + 1).limit(limit).parallel().map(a -> list.stream().skip(a * MAX_SEND).limit(MAX_SEND).parallel().collect(Collectors.toList())).collect(Collectors.toList());
System.out.println(splitList);
}
/**
* 计算切分次数
*/
private static Integer countStep(Integer size) {
return (size + MAX_SEND - 1) / MAX_SEND;
}
}