1:使用的版本:
<dependency>
<groupId>com.linuxense</groupId>
<artifactId>javadbf</artifactId>
<version>0.4.0</version>
</dependency>
2:使用示例:
/**
* 写入dbf
* @param fileFullName
* @param table
* @param columnTypes
* @return
*/
public static boolean tableSaveAsDbf(String fileFullName, List<Map<String, Object>> table, Map<String, Integer> columnTypes) {
boolean res = false;
try {
File file = new File(fileFullName);
String savePath = file.getParent();
String dbfFileName = file.getName();
dbfFileName = dbfFileName.contains(".") ? dbfFileName.substring(0, dbfFileName.lastIndexOf(".")) + ".dbf" : dbfFileName + ".dbf";
File outputFile = new File(savePath, dbfFileName);
DBFField[] fields = new DBFField[columnTypes.size()];
int index = 0;
for (Map.Entry<String, Integer> entry : columnTypes.entrySet()) {
DBFField field = new DBFField();
field.setName(entry.getKey());
switch (entry.getValue()) {
case java.sql.Types.INTEGER:
field.setDataType(DBFField.FIELD_TYPE_N);
field.setFieldLength(10);
break;
case java.sql.Types.DECIMAL:
field.setDataType(DBFField.FIELD_TYPE_N);
field.setFieldLength(15);
field.setDecimalCount(2);
break;
case java.sql.Types.VARCHAR:
default:
field.setDataType(DBFField.FIELD_TYPE_C);
field.setFieldLength(50);
break;
}
fields[index++] = field;
}
try (OutputStream fos = new FileOutputStream(outputFile)) {
DBFWriter writer = new DBFWriter();
writer.setCharactersetName("GBK"); // 设置编码
writer.setFields(fields);
for (Map<String, Object> row : table) {
Object[] record = new Object[columnTypes.size()];
int i = 0;
for (String column : columnTypes.keySet()) {
Object value = row.get(column);
record[i++] = value != null ? value.toString() : null;
}
writer.addRecord(record);
}
writer.write(fos); // 写入文件
res = true;
}
} catch (Exception e) {
e.printStackTrace();
}
return res;
}
/**
* 根据表格数据的第一行推断每列的数据类型,并返回一个包含列名和对应数据类型的映射。
* @param table
* @return
*/
public static Map<String, Integer> getColumnsAndType(List<Map<String, Object>> table) {
Map<String, Integer> columnTypes = new LinkedHashMap<>();
if (table != null && !table.isEmpty()) {
Map<String, Object> firstRow = table.get(0);
for (Map.Entry<String, Object> entry : firstRow.entrySet()) {
Object val = entry.getValue();
int sqlType = Types.VARCHAR;
if (val instanceof Integer) {
sqlType = Types.INTEGER;
} else if (val instanceof Double || val instanceof Float) {
sqlType = Types.DECIMAL;
} else if (val instanceof String) {
sqlType = Types.VARCHAR;
}
columnTypes.put(entry.getKey(), sqlType);
}
}
return columnTypes;
}
3:问题(BUG):
这版本的JavaDBF在处理GBK格式的字符时会有一个bug,无法正确的裁剪字符长度
4:根本原因确认:
writer.setCharactersetName("GBK")
并没有真正控制写入时的字符编码,使用的 DBFWriter
仍可能以默认编码(如 ISO-8859-1
或 UTF-8
)处理字符串为字节。因此:
即使设置了字段长度为 50 字节,DBFWriter
实际是按字符长度(1 字符 = 1 字节)来处理,遇到 GBK 编码的汉字(通常是 2 字节),它无法正确截断、填充,于是直接裁剪或出错。
5:最可控的解决方法:
用 GBK 编码编码后,再反向构建 String:
public static String truncateAndPad(String input, String charsetName, int maxBytes) {
if (input == null) input = "";
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
for (int i = 0; i < input.length(); i++) {
String ch = input.substring(i, i + 1);
byte[] b = ch.getBytes(charsetName);
if (bos.size() + b.length > maxBytes) break;
bos.write(b);
}
byte[] resultBytes = bos.toByteArray();
// 补空格(空格的 GBK 编码是 0x20)
if (resultBytes.length < maxBytes) {
int padLength = maxBytes - resultBytes.length;
for (int i = 0; i < padLength; i++) {
bos.write(0x20);
}
resultBytes = bos.toByteArray();
}
// 关键:用GBK字节创建字符串,再原样送入DBFWriter
return new String(resultBytes, charsetName);
} catch (UnsupportedEncodingException e) {
return input;
}
}
替换写入逻辑中这一段:
if (columnTypes.get(column) == Types.VARCHAR) {
int fieldLen = fields[i].getFieldLength(); // 字节长度
strValue = truncateAndPad(strValue, "GBK", fieldLen);
}
record[i] = strValue;
6:修复后完整代码:
import java.io.*;
import java.sql.Types;
import java.util.*;
import com.linuxense.javadbf.*;
public class DbfExporter {
public static boolean tableSaveAsDbf(String fileFullName, List<Map<String, Object>> table, Map<String, Integer> columnTypes) {
boolean res = false;
try {
File file = new File(fileFullName);
String savePath = file.getParent();
String dbfFileName = file.getName();
dbfFileName = dbfFileName.contains(".") ?
dbfFileName.substring(0, dbfFileName.lastIndexOf(".")) + ".dbf" :
dbfFileName + ".dbf";
File outputFile = new File(savePath, dbfFileName);
// 保持字段顺序一致
List<String> columnOrder = new ArrayList<>(columnTypes.keySet());
// 设置字段定义
DBFField[] fields = new DBFField[columnOrder.size()];
for (int i = 0; i < columnOrder.size(); i++) {
String colName = columnOrder.get(i);
Integer sqlType = columnTypes.get(colName);
DBFField field = new DBFField();
field.setName(colName);
switch (sqlType) {
case Types.INTEGER:
field.setDataType(DBFField.FIELD_TYPE_N);
field.setFieldLength(10);
break;
case Types.DECIMAL:
field.setDataType(DBFField.FIELD_TYPE_N);
field.setFieldLength(15);
field.setDecimalCount(2);
break;
case Types.VARCHAR:
default:
field.setDataType(DBFField.FIELD_TYPE_C);
field.setFieldLength(50); // 注意:字节数,不是字符数
break;
}
fields[i] = field;
}
// 写入 DBF 文件
try (OutputStream fos = new FileOutputStream(outputFile)) {
DBFWriter writer = new DBFWriter();
writer.setCharactersetName("GBK"); // 仍需设置,表示数据是 GBK 编码
writer.setFields(fields);
for (Map<String, Object> row : table) {
Object[] record = new Object[columnOrder.size()];
for (int i = 0; i < columnOrder.size(); i++) {
String column = columnOrder.get(i);
Object value = row.get(column);
String strValue = value != null ? value.toString() : "";
if (columnTypes.get(column) == Types.VARCHAR) {
int fieldLen = fields[i].getFieldLength(); // 字段长度(字节)
strValue = truncateAndPad(strValue, "GBK", fieldLen);
}
record[i] = strValue;
}
writer.addRecord(record);
}
writer.write(fos);
res = true;
}
} catch (Exception e) {
e.printStackTrace();
}
return res;
}
/**
* 按 GBK 编码截断并补足空格,确保字符串按字段字节长度处理
*/
public static String truncateAndPad(String input, String charsetName, int maxBytes) {
if (input == null) input = "";
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
for (int i = 0; i < input.length(); i++) {
String ch = input.substring(i, i + 1);
byte[] b = ch.getBytes(charsetName);
if (bos.size() + b.length > maxBytes) break;
bos.write(b);
}
byte[] resultBytes = bos.toByteArray();
// 补空格(GBK空格为0x20)
if (resultBytes.length < maxBytes) {
int padLen = maxBytes - resultBytes.length;
for (int i = 0; i < padLen; i++) {
bos.write(0x20);
}
resultBytes = bos.toByteArray();
}
return new String(resultBytes, charsetName);
} catch (UnsupportedEncodingException e) {
return input;
}
}
}
7:调用示例:
List<Map<String, Object>> table = new ArrayList<>();
Map<String, Object> row = new HashMap<>();
row.put("省份", "浙江省");
row.put("城市", "杭州市石景山区");
table.add(row);
//根据表格数据的第一行推断每列的数据类型,并返回一个包含列名和对应数据类型的映射。
Map<String, Integer> columnsAndType = DbfUtils.getColumnsAndType(table);
//写入dbf
DbfUtils.tableSaveAsDbf(path, table, columnsAndType);
8:结果:
手动控制了 字符串 -> GBK 字节数组 -> 固定长度字符串 这个过程:
不依赖 DBFWriter 的内部编码行为(绕过 bug)