parquet文件转JsonObject

/**
     * 读取parquet内容
     *
     * @param inPath
     */
    public int parquetReader(KafkaTemplate<String, String> kafkaTemplate, String topicName, String inPath) throws Exception {
        int fileSize = 0;

        GroupReadSupport readSupport = new GroupReadSupport();
        ParquetReader.Builder<Group> reader = ParquetReader.builder(readSupport, new Path(inPath));
        ParquetReader<Group> build = reader.build();

        // 获取第一行数据
        Group line = build.read();

        if (line != null) {
            // 处理此行数据为json
            JSONObject jsonObject = new JSONObject();
            List<Type> fields = line.getType().getFields();

            for (Type type : fields) {
                converterType2Java(line, type, jsonObject);
            }

            // 发送kafka
            this.sendKafka(kafkaTemplate, topicName, jsonObject.toJSONString());
            fileSize++;
        }

        // 处理剩余所有数据为json
        while ((line = build.read()) != null) {
            JSONObject jsonObject = new JSONObject();
            List<Type> fields = line.getType().getFields();

            for (Type type : fields) {
                converterType2Java(line, type, jsonObject);
            }

            // 发送kafka
            this.sendKafka(kafkaTemplate, topicName, jsonObject.toJSONString());
            fileSize++;
        }

        logger.info("***sendKafka, filePath:{}, fileSize:{}***", inPath, fileSize);
        return fileSize;
    }

/**
     * 处理文件
     *
     * @param line
     * @param type
     * @return
     */
    public static void converterType2Java(Group line, Type type, JSONObject jsonObject) {
        String value = "";
        String fieldType = type.asPrimitiveType().getPrimitiveTypeName().name();
        String fieldName = type.getName();

        int repetition = line.getFieldRepetitionCount(type.getName());
        if (repetition != 0) {
            switch (fieldType) {
                case "BOOLEAN":
                    value = String.valueOf(line.getBoolean(fieldName, 0));
                    break;
                case "INT32":
                    value = String.valueOf(line.getInteger(fieldName, 0));
                    break;
                case "INT64":
                    value = String.valueOf(line.getLong(fieldName, 0));
                    break;
                case "INT96":
                    value = String.valueOf(getTimestampMillis(line.getInt96(fieldName, 0)));
                    break;
                case "FLOAT":
                    value = String.valueOf(line.getFloat(fieldName, 0));
                    break;
                case "DOUBLE":
                    value = String.valueOf(line.getDouble(fieldName, 0));
                    break;
                case "FIXED_LEN_BYTE_ARRAY":
                    if (type.getOriginalType() != null && type.getOriginalType().name().equals("DECIMAL")) {
                        int precision = type.asPrimitiveType().getDecimalMetadata().getPrecision();
                        int scale = type.asPrimitiveType().getDecimalMetadata().getScale();
                        BigDecimal decimalValue = binaryToDecimal(precision, scale, line.getBinary(fieldName, 0).getBytes());
                        String precisionFormat = String.join("", Collections.nCopies(precision - 1, "#"));
                        String scaleFormat = String.join("", Collections.nCopies(scale, "0"));
                        String format = precisionFormat + "0." + scaleFormat;
                        DecimalFormat decimalFormat = new DecimalFormat(format);

                        value = decimalFormat.format(decimalValue);
                    }
                    break;
                default:
                    value = line.getString(fieldName, 0);
            }

            jsonObject.put(fieldName, value);
        }
    }

    private static final int JULIAN_EPOCH_OFFSET_DAYS = 2440588;
    private static final long NANOS_PER_MILLISECOND = TimeUnit.MICROSECONDS.toNanos(1);
    private static final long MILLIS_IN_DAY = TimeUnit.DAYS.toMillis(1);

    public static long getTimestampMillis(Binary timestampBinary) {
        if (timestampBinary.length() != 12) {
            return 0;
        }
        byte[] bytes = timestampBinary.getBytes();
        long timeOfDayNanos = Longs.fromBytes(bytes[7], bytes[6], bytes[5], bytes[4], bytes[3], bytes[2], bytes[1], bytes[0]);
        int julianDay = Ints.fromBytes(bytes[11], bytes[10], bytes[9], bytes[8]);

        return julianDayToMillis(julianDay) + (timeOfDayNanos / NANOS_PER_MILLISECOND);
    }

    private static long julianDayToMillis(int julianDay) {
        return (julianDay - JULIAN_EPOCH_OFFSET_DAYS) * MILLIS_IN_DAY;
    }

    static BigDecimal binaryToDecimal(int precision, int scale, byte[] bytes) {
        // Precision <= 18 checks for the max number of digits for an unscaled long, else treat with big integer conversion
        if (precision <= 18) {
            int start = 0;//buffer.arrayOffset() + buffer.position();
            int end = bytes.length; //buffer.arrayOffset() + buffer.limit();
            long unscaled = 0L;
            int i = start;

            while (i < end) {
                unscaled = (unscaled << 8 | bytes[i] & 0xff);
                i++;
            }

            int bits = 8 * (end - start);
            long unscaledNew = (unscaled << (64 - bits)) >> (64 - bits);
            BigDecimal result;

            if (unscaledNew <= -pow(10, 18) || unscaledNew >= pow(10, 18)) {
                result = new BigDecimal(unscaledNew);
            } else {
                result = BigDecimal.valueOf(unscaledNew / pow(10, scale));
            }

            return result;
        } else {
            return new BigDecimal(new BigInteger(bytes), scale);
        }
    }

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值