合并 CSV 表格 结构树
工具作用
如果你有一个分级/树结构的电子表格的 CSV 文件,而想要将一列中相同内容的许多单元格合并成一个单元格。
比如
id | name |
---|---|
1 | a |
1 | c |
1 | b |
合并为
下面这个工具类就能帮助到你。
依赖
<dependencies>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>3.15</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>3.15</version>
</dependency>
</dependencies>
工具代码
package com.henvealf;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
// 用于 CSV 单元格合并
// 注意最后一列不会合并
public class CSVMerger {
private BufferedReader reader = null;
private OutputStream outputStream = null;
private Workbook outWorkbook;
private Sheet outSheet;
private int length;
// 代表一行
private List<String[]> rows;
/**
* 用于 CSV 单元格合并,最后一列不合并
* @param inputFile 要合并的 CSV
* @param outputFile 合并后的 CSV
* @param length CSV 的列数
* @throws IOException
* @throws InvalidFormatException
*/
public CSVMerger(String inputFile, String outputFile, int length) throws IOException, InvalidFormatException {
reader = new BufferedReader(
new InputStreamReader(
new FileInputStream(inputFile),"utf-8")); // 解决乱码
outputStream = new FileOutputStream(outputFile);
InputStream inputStream = new FileInputStream(inputFile);
outWorkbook = new XSSFWorkbook();
outSheet = outWorkbook.createSheet();
rows = new ArrayList<String[]>();
this.length = length;
}
public void goClean() throws IOException {
String line = null;
// 要合并的起始与结束,最后一列不合并
int[] start = new int[ length -1 ];
int[] end = new int[ length - 1 ];
// 用于记录新表单的行数
int[] newRowNumber = new int[ length - 1 ];
// 第0行
String[] recordRow = reader.readLine().split(",");
// 设置第一行的内容
Row first = outSheet.createRow(0);
for (int i = 0; i < recordRow.length; i++) {
Cell cell = first.createCell(i);
cell.setCellValue(recordRow[i]);
}
int rowNumber = 0;
while ((line = reader.readLine()) != null) {
rowNumber ++;
// 当前行
String[] currRow = line.split(",");
// 新的行
Row newRow = outSheet.createRow(rowNumber);
// 先设置第三列
Cell lastCell = newRow.createCell(length - 1);
lastCell.setCellValue(currRow[length - 1]);
// 其他列
for (int col = 0; col < length - 1; col++) {
Cell newCell = newRow.createCell(col);
newCell.setCellValue(currRow[col]);
// 发现不一样的
if(! currRow[col].equals(recordRow[col])) {
// 所以上一行是结束行
end[col] = rowNumber - 1;
newRowNumber[col] ++;
// 超过一行,就合并
if( end[col] - start[col] > 0 ) {
outSheet.addMergedRegion(new CellRangeAddress(
start[col], //first row (0-based)
end[col], //last row (0-based)
col, //first column (0-based)
col //last column (0-based)
));
}
recordRow[col] = currRow[col];
// 重置起始标签
start[col] = rowNumber;
}
}
}
// 最后一次合并。
for (int col = 0; col < length - 1; col++) {
end[col] = rowNumber;
// 超过一行,就合并
if( end[col] - start[col] > 0 ) {
outSheet.addMergedRegion(new CellRangeAddress(
start[col], //first row (0-based)
end[col], //last row (0-based)
col, //first column (0-based)
col //last column (0-based)
));
}
}
outWorkbook.write(outputStream);
outWorkbook.close();
System.out.println("合并结束");
}
public static void main(String[] args) throws IOException, InvalidFormatException {
CSVMerger cleanCity = new CSVMerger("regions.csv", "merged_regions.csv",3);
cleanCity.goClean();
}
}
使用说明
注意看构造函数中的解释,这里的编码写死了,可自行修改。
By Henvealf
End