用javadbf 写 .dbf 文件使用GBK编码写入字符时无法正确裁剪长度

1:使用的版本:

        <dependency>
            <groupId>com.linuxense</groupId>
            <artifactId>javadbf</artifactId>
            <version>0.4.0</version>
        </dependency>

2:使用示例:

/**
     * 写入dbf
     * @param fileFullName
     * @param table
     * @param columnTypes
     * @return
     */
 public static boolean tableSaveAsDbf(String fileFullName, List<Map<String, Object>> table, Map<String, Integer> columnTypes) {
        boolean res = false;
        try {
            File file = new File(fileFullName);
            String savePath = file.getParent();
            String dbfFileName = file.getName();
            dbfFileName = dbfFileName.contains(".") ? dbfFileName.substring(0, dbfFileName.lastIndexOf(".")) + ".dbf" : dbfFileName + ".dbf";
            File outputFile = new File(savePath, dbfFileName);

            DBFField[] fields = new DBFField[columnTypes.size()];
            int index = 0;
            for (Map.Entry<String, Integer> entry : columnTypes.entrySet()) {
                DBFField field = new DBFField();
                field.setName(entry.getKey());
                switch (entry.getValue()) {
                    case java.sql.Types.INTEGER:
                        field.setDataType(DBFField.FIELD_TYPE_N);
                        field.setFieldLength(10);
                        break;
                    case java.sql.Types.DECIMAL:
                        field.setDataType(DBFField.FIELD_TYPE_N);
                        field.setFieldLength(15);
                        field.setDecimalCount(2);
                        break;
                    case java.sql.Types.VARCHAR:
                    default:
                        field.setDataType(DBFField.FIELD_TYPE_C);
                        field.setFieldLength(50);
                        break;
                }
                fields[index++] = field;
            }

            try (OutputStream fos = new FileOutputStream(outputFile)) {
                DBFWriter writer = new DBFWriter();
                writer.setCharactersetName("GBK"); // 设置编码
                writer.setFields(fields);

                for (Map<String, Object> row : table) {
                    Object[] record = new Object[columnTypes.size()];
                    int i = 0;
                    for (String column : columnTypes.keySet()) {
                        Object value = row.get(column);
                        record[i++] = value != null ? value.toString() : null;
                    }
                    writer.addRecord(record);
                }

                writer.write(fos); // 写入文件
                res = true;
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
        return res;
    }
  /**
     * 根据表格数据的第一行推断每列的数据类型,并返回一个包含列名和对应数据类型的映射。
     * @param table
     * @return
     */
    public static Map<String, Integer> getColumnsAndType(List<Map<String, Object>> table) {
        Map<String, Integer> columnTypes = new LinkedHashMap<>();
        if (table != null && !table.isEmpty()) {
            Map<String, Object> firstRow = table.get(0);
            for (Map.Entry<String, Object> entry : firstRow.entrySet()) {
                Object val = entry.getValue();
                int sqlType = Types.VARCHAR;
                if (val instanceof Integer) {
                    sqlType = Types.INTEGER;
                } else if (val instanceof Double || val instanceof Float) {
                    sqlType = Types.DECIMAL;
                } else if (val instanceof String) {
                    sqlType = Types.VARCHAR;
                }
                columnTypes.put(entry.getKey(), sqlType);
            }
        }
        return columnTypes;
    }

3:问题(BUG):

这版本的JavaDBF在处理GBK格式的字符时会有一个bug,无法正确的裁剪字符长度

4:根本原因确认:

writer.setCharactersetName("GBK") 并没有真正控制写入时的字符编码,使用的 DBFWriter 仍可能以默认编码(如 ISO-8859-1UTF-8)处理字符串为字节。因此:

即使设置了字段长度为 50 字节,DBFWriter 实际是按字符长度(1 字符 = 1 字节)来处理,遇到 GBK 编码的汉字(通常是 2 字节),它无法正确截断、填充,于是直接裁剪或出错。

5:最可控的解决方法:

用 GBK 编码编码后,再反向构建 String:

public static String truncateAndPad(String input, String charsetName, int maxBytes) {
    if (input == null) input = "";

    ByteArrayOutputStream bos = new ByteArrayOutputStream();
    try {
        for (int i = 0; i < input.length(); i++) {
            String ch = input.substring(i, i + 1);
            byte[] b = ch.getBytes(charsetName);
            if (bos.size() + b.length > maxBytes) break;
            bos.write(b);
        }

        byte[] resultBytes = bos.toByteArray();

        // 补空格(空格的 GBK 编码是 0x20)
        if (resultBytes.length < maxBytes) {
            int padLength = maxBytes - resultBytes.length;
            for (int i = 0; i < padLength; i++) {
                bos.write(0x20);
            }
            resultBytes = bos.toByteArray();
        }

        // 关键:用GBK字节创建字符串,再原样送入DBFWriter
        return new String(resultBytes, charsetName);

    } catch (UnsupportedEncodingException e) {
        return input;
    }
}

替换写入逻辑中这一段:

if (columnTypes.get(column) == Types.VARCHAR) {
    int fieldLen = fields[i].getFieldLength(); // 字节长度
    strValue = truncateAndPad(strValue, "GBK", fieldLen);
}
record[i] = strValue;

6:修复后完整代码:

import java.io.*;
import java.sql.Types;
import java.util.*;
import com.linuxense.javadbf.*;

public class DbfExporter {

    public static boolean tableSaveAsDbf(String fileFullName, List<Map<String, Object>> table, Map<String, Integer> columnTypes) {
        boolean res = false;
        try {
            File file = new File(fileFullName);
            String savePath = file.getParent();
            String dbfFileName = file.getName();
            dbfFileName = dbfFileName.contains(".") ?
                    dbfFileName.substring(0, dbfFileName.lastIndexOf(".")) + ".dbf" :
                    dbfFileName + ".dbf";
            File outputFile = new File(savePath, dbfFileName);

            // 保持字段顺序一致
            List<String> columnOrder = new ArrayList<>(columnTypes.keySet());

            // 设置字段定义
            DBFField[] fields = new DBFField[columnOrder.size()];
            for (int i = 0; i < columnOrder.size(); i++) {
                String colName = columnOrder.get(i);
                Integer sqlType = columnTypes.get(colName);
                DBFField field = new DBFField();
                field.setName(colName);

                switch (sqlType) {
                    case Types.INTEGER:
                        field.setDataType(DBFField.FIELD_TYPE_N);
                        field.setFieldLength(10);
                        break;
                    case Types.DECIMAL:
                        field.setDataType(DBFField.FIELD_TYPE_N);
                        field.setFieldLength(15);
                        field.setDecimalCount(2);
                        break;
                    case Types.VARCHAR:
                    default:
                        field.setDataType(DBFField.FIELD_TYPE_C);
                        field.setFieldLength(50); // 注意:字节数,不是字符数
                        break;
                }

                fields[i] = field;
            }

            // 写入 DBF 文件
            try (OutputStream fos = new FileOutputStream(outputFile)) {
                DBFWriter writer = new DBFWriter();
                writer.setCharactersetName("GBK"); // 仍需设置,表示数据是 GBK 编码
                writer.setFields(fields);

                for (Map<String, Object> row : table) {
                    Object[] record = new Object[columnOrder.size()];
                    for (int i = 0; i < columnOrder.size(); i++) {
                        String column = columnOrder.get(i);
                        Object value = row.get(column);
                        String strValue = value != null ? value.toString() : "";

                        if (columnTypes.get(column) == Types.VARCHAR) {
                            int fieldLen = fields[i].getFieldLength(); // 字段长度(字节)
                            strValue = truncateAndPad(strValue, "GBK", fieldLen);
                        }

                        record[i] = strValue;
                    }
                    writer.addRecord(record);
                }

                writer.write(fos);
                res = true;
            }

        } catch (Exception e) {
            e.printStackTrace();
        }

        return res;
    }

    /**
     * 按 GBK 编码截断并补足空格,确保字符串按字段字节长度处理
     */
    public static String truncateAndPad(String input, String charsetName, int maxBytes) {
        if (input == null) input = "";

        ByteArrayOutputStream bos = new ByteArrayOutputStream();
        try {
            for (int i = 0; i < input.length(); i++) {
                String ch = input.substring(i, i + 1);
                byte[] b = ch.getBytes(charsetName);
                if (bos.size() + b.length > maxBytes) break;
                bos.write(b);
            }

            byte[] resultBytes = bos.toByteArray();

            // 补空格(GBK空格为0x20)
            if (resultBytes.length < maxBytes) {
                int padLen = maxBytes - resultBytes.length;
                for (int i = 0; i < padLen; i++) {
                    bos.write(0x20);
                }
                resultBytes = bos.toByteArray();
            }

            return new String(resultBytes, charsetName);

        } catch (UnsupportedEncodingException e) {
            return input;
        }
    }
}

7:调用示例:

List<Map<String, Object>> table = new ArrayList<>();

Map<String, Object> row = new HashMap<>();
row.put("省份", "浙江省");
row.put("城市", "杭州市石景山区");
table.add(row);
//根据表格数据的第一行推断每列的数据类型,并返回一个包含列名和对应数据类型的映射。
 Map<String, Integer> columnsAndType = DbfUtils.getColumnsAndType(table);
//写入dbf
 DbfUtils.tableSaveAsDbf(path, table, columnsAndType);

8:结果:

手动控制了 字符串 -> GBK 字节数组 -> 固定长度字符串 这个过程:

不依赖 DBFWriter 的内部编码行为(绕过 bug)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

admiraldeworm

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值