Avro 特点:
1. 丰富的数据结构
2. 紧凑、快速、二进制数据格式
3. 容器文件,持久化数据
4. 支持RPC
5. 代码生成是可选的,便于和动态语言进行整合
Avro 与 Thrift、Protobuf 对比:
1. Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages.
2. Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size.
3. No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names.
以下是下载安装步骤:
1. 下载avro-tools-1.8.1.jar, 导入开发包
http://avro.apache.org/releases.html
https://mirrors.tuna.tsinghua.edu.cn/apache/avro/avro-1.8.1/java/
导入开发包:
<dependency> <groupId>org.apache.avro</groupId> <artifactId>avro</artifactId> <version>1.8.1</version> </dependency>
2. 定义schema文件user.avsc
{"namespace": "example.avro",
"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}
3. 生成java代码,静态实现
语法:java -jar /path/to/avro-tools-1.8.1.jar compile schema <schema file> <destination>
java -jar /path/to/avro-tools-1.8.1.jar compile schema user.avsc .
public class SerializationByAvroSchema {
public static void main(String[] args) throws Exception{
readAvro() ;
}
static void writeAvro() throws Exception{
Schema schema = new Schema.Parser().parse(new File("D:/tmp/user.avsc"));
GenericRecord user1 = new GenericData.Record(schema);
user1.put("name", "Alyssa");
user1.put("favorite_number", 256);
user1.put("favorite_color", "red");
GenericRecord user2 = new GenericData.Record(schema);
user2.put("name", "Alyssa2");
user2.put("favorite_number", 258);
user2.put("favorite_color", "green");
DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
dataFileWriter.create(schema, new File("D:/tmp/users.avro"));
dataFileWriter.append(user1);
dataFileWriter.append(user2);
dataFileWriter.close();
}
static void readAvro() throws Exception{
Schema schema = new Schema.Parser().parse(new File("D:/tmp/user.avsc"));
File file = new File("D:/tmp/users.avro");
DatumReader<GenericRecord> userDatumReader = new GenericDatumReader<GenericRecord>(schema);
DataFileReader<GenericRecord> dataFileReader = new DataFileReader<GenericRecord>(file, userDatumReader);
GenericRecord user = null;
while (dataFileReader.hasNext()) {
user = dataFileReader.next(user);
System.out.println(user);
}
dataFileReader.close();
}
}
4. 不生成java代码 动态实现
public class SerializationByAvroClass {
public static void main(String[] args) throws Exception{
//readAvro();
//writeJavaObject();
}
static void writeAvro() throws Exception{
User user1 = new User();
user1.setName("Alyssa");
user1.setFavoriteNumber(256);
User user2 = new User("Ben", 7, "red");
User user3 = User.newBuilder()
.setName("Charlie")
.setFavoriteColor("blue")
.setFavoriteNumber(null)
.build();
DatumWriter<User> userDatumWriter = new SpecificDatumWriter<User>(User.class);
DataFileWriter<User> dataFileWriter = new DataFileWriter<User>(userDatumWriter);
dataFileWriter.create(user1.getSchema(), new File("D:/tmp/users.avro"));
dataFileWriter.append(user1);
dataFileWriter.append(user2);
dataFileWriter.append(user3);
dataFileWriter.close();
}
static void readAvro() throws Exception{
File file = new File("D:/tmp/users.avro");
DatumReader<User> userDatumReader = new SpecificDatumReader<User>(User.class);
DataFileReader<User> dataFileReader = new DataFileReader<User>(file, userDatumReader);
User user = null;
while (dataFileReader.hasNext()) {
user = dataFileReader.next(user);
System.out.println(user);
}
dataFileReader.close();
}
}