Avro提供了两种序列化的方式:
avro-specific:
和thrift一样可以通过idl方式生成代码,生成命令:
java -jar avro-tools-1.7.4.jar compile schema data.avsc src/
avro-generic:
支持schema的动态加载,不需要重新编译就可以处理新的数据源
avro的数据类型:
基本类型:
null: no value
boolean: a binary value
int: 32-bit signed integer
long: 64-bit signed integer
float: single precision (32-bit) IEEE 754 floating-point number
double: double precision (64-bit) IEEE 754 floating-point number
bytes: sequence of 8-bit unsigned bytes
string: unicode character sequence
复杂类型:
avro支持6种复杂类型:records, enums, arrays, maps, unionsm, fixed
下面是一个定义为records例子:
{
"type": "record", // 指定record类型,(required)
"name": "AvroData", // 生成的类名称,(required)
"namespace": "com.wuwen.", // 命令空间,(optional)
"doc": "avro scheama test.", // 用来描述该schema的,(optional)
"aliases": ["avroaliases"], // name的别名,(optional)
"fields": [ // 字段名,(required)
{"name": "GUID", "type": "string"}, // 定义一个字段名为GUID,基本数据类型为string的字段
{"name": "Content", "type": "string"},
{"name": "Time", "type": "long"}
]
}
例子:
package com.wuwen;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.file.FileReader;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.BinaryDecoder;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.apache.avro.util.Utf8;
public class AvroDemo {
public void serializeGeneric() throws IOException {
Schema schema = Schema.parse(new File("E:/avro/AvroData.avsc"));
GenericRecord datum = new GenericData.Record(schema);
datum.put("GUID", new Utf8("1234567"));
datum.put("Content", new Utf8("Avro测试"));
datum.put("Time", 20130305134700L);
// Serialize it.
ByteArrayOutputStream out = new ByteArrayOutputStream();
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
writer.write(datum, encoder);
encoder.flush();
out.close();
// Deserialize it.
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
BinaryDecoder decoder = DecoderFactory.get().binaryDecoder(out.toByteArray(), null);
GenericRecord result = reader.read(null, decoder);
System.out.printf("GUID: %s, Content: %s, Time: %s\n", result.get("GUID"), result.get("Content"), result.get("Time"));
}
public void serializeSpecific() throws IOException {
AvroData datum = new AvroData();
datum.setGUID(new Utf8("1234567"));
datum.setContent(new Utf8("1234567"));
datum.setTime(20130305134700L);
File tmpFile = new File("E:/avro/myAvroExample.avro");
// Serialize it.
DataFileWriter<AvroData> writer = new DataFileWriter<AvroData>(new SpecificDatumWriter<AvroData>(AvroData.class));
writer.create(AvroData.SCHEMA$, tmpFile);
writer.append(datum);
writer.close();
// Deserialize it.
FileReader<AvroData> reader = DataFileReader.openReader(tmpFile, new SpecificDatumReader<AvroData>(AvroData.class));
while (reader.hasNext()) {
AvroData result = reader.next();
System.out.printf("GUID: %s, Content: %s, Time: %s\n", result.getGUID(), result.getContent(), result.getTime());
}
reader.close();
}
public static void main(String[] args) throws IOException {
AvroDemo example = new AvroDemo();
System.out.println("Generic");
example.serializeGeneric();
System.out.println("Specific");
example.serializeSpecific();
}
}