背景
由于业务需要,需要将SparkSQL中Array、Map、Struct使用avro序列化成一个大的字节数组进行存储。但是在序列化过程中,涉及到Java的BigDecimal类型,根据avro官网提示,定义schema如下:
{
"namespace":"com.bugboy.avro.bean",
"type":"record",
"name":"DecimalDemo",
"fields":[
{"name":"id", "type":"string"},
{"name":"value","type":{"type":"bytes","logicalType": "decimal","precision": 10,"scale": 2}}
]
}
使用该schema序列化时,还是遇到了很多问题,比如java.math.BigDecimal不能强转成ByteBuffer等等,这里就不细说了。反正就是官网上没有找到具体实现方案。
方案
也许源码中会有对应的demo,但由于时间紧迫,没办法自己看源码,就使用面对debug编程的笨办法一步一步的设置,摸索。在经过多次尝试之后,最终得到如下方案进行对包含有BigDecimal的Record(对应SparkSQL中的Struct)进行序列化与反序列化。
package com.bugboy.avro.bean;
import org.apache.avro.Conversions;
import org.apache.avro.LogicalTypes;
import org.apache.avro.Schema;
import org.apache.avro.SchemaBuilder;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.BinaryEncoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.EncoderFactory;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
public class SerRecord {
public static void main(String[] args) throws IOException {
// 构建record的schema,当然,也可以使用解析器解析定义好的json直接生成schema
SchemaBuilder.BaseTypeBuilder<Schema> builder = SchemaBuilder.builder();
SchemaBuilder.FieldAssembler<Schema> fieldAssembler = builder.record("Hello").namespace("").fields();
fieldAssembler.name("id")
.type(builder.stringType())
.noDefault();
Schema decimalSchema = builder.bytesType();
LogicalTypes.decimal(10, 2).addToSchema(decimalSchema);
fieldAssembler.name("value")
.type(decimalSchema)
.noDefault();
Schema schema = fieldAssembler.endRecord(); // 构建结束
// 准备好Record
GenericData.Record record = new GenericData.Record(schema);
record.put("id", "001");
record.put("value", BigDecimal.valueOf(67.78));
// 序列化
GenericDatumWriter<GenericData.Record> writer = new GenericDatumWriter<>();
// 需要设置DecimalConversion序列化器,否则会报BigDecimal不能强转ByteBuffer的异常。
writer.getData().addLogicalTypeConversion(new Conversions.DecimalConversion());
// 需要进行设置,否则会空指针异常
writer.setSchema(schema);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
BinaryEncoder encoder = EncoderFactory.get().directBinaryEncoder(baos, null);
// 进行序列化,得到字节数组
writer.write(record, encoder);
byte[] bytes = baos.toByteArray();
// 进行反序列化,将字节数组转化成Record
GenericDatumReader<GenericData.Record> reader = new GenericDatumReader<>();
ByteArrayInputStream bais = new ByteArrayInputStream(bytes);
BinaryDecoder decoder = DecoderFactory.get().directBinaryDecoder(bais, null);
// 设置schema,否则会报空指针异常
reader.setExpected(schema);
reader.setSchema(schema);
// 反序列化
GenericData.Record newRecord = reader.read(null, decoder);
// 取值进行验证
System.out.println(newRecord.get("id"));
System.out.println(newRecord.get("value"));
}
}
pom依赖如下:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.bugboy</groupId>
<artifactId>avro-bean-serder</artifactId>
<version>1.0.0</version>
<properties>
<avro-version>1.8.2</avro-version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.avro</groupId>
<artifactId>avro</artifactId>
<version>${avro-version}</version>
</dependency>
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<version>2.9.4</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.avro</groupId>
<artifactId>avro-maven-plugin</artifactId>
<version>${avro-version}</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>schema</goal>
</goals>
<configuration>
<sourceDirectory>${project.basedir}/src/main/avro/</sourceDirectory>
<outputDirectory>${project.basedir}/src/main/java/</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>
写在最后
过年不能回家,好烦啊!!!!!