1、什么是串行化
串行化简而言之就是将对象转成字节流(二进制格式)存放在磁盘上或用于传输,便于减小存储空间、网络传输,提高效率。在分布式编程中显得尤为重要,如果不进行串行化,则传输效率将得到限制。
2、google的protobuf
1.下载google protobuf.
protoc-2.5.0-win32.zip
添加pom.xml依赖
<dependencies>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.5.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
</dependency>
</dependencies>
2.设计对象
3.描述对象
package tutorial;
option java_package = "com.example.tutorial";
option java_outer_classname = "AddressBookProtos";
message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;
enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}
message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}
repeated PhoneNumber phone = 4;
}
message AddressBook {
repeated Person person = 1;
}
4.编译描述
cmd>protoc --java_out . xxx.proto
5.导入源代码到项目中
6.使用对象
public class TestProtoBuf {
@Test
public void write() throws Exception{
AddressBookProtos.Person john = AddressBookProtos.Person.newBuilder()
.setId(12345)
.setName("tomas")
.setEmail("123@123.123")
.addPhone(AddressBookProtos.Person.PhoneNumber.newBuilder()
.setNumber("+351 999 999 999")
.setType(AddressBookProtos.Person.PhoneType.HOME)
.build())
.build();
john.writeTo(new FileOutputStream("d:/prototbuf.data"));
}
@Test
public void read() throws Exception{
AddressBookProtos.Person john = AddressBookProtos.Person.parseFrom(new FileInputStream("d:/prototbuf.data"));
System.out.println(john.getName());
}
}
3、avro
1.数据串行化系统
2.自描述语言.
数据结构和数据都存在文件中。跨语言。
使用json格式存储数据。
3.可压缩 + 可切割。
使用avro
a)定义schema
b)编译schema,生成java类
emp.avsc
{
"namespace": "tutorialspoint.com",
"type": "record",
"name": "emp",
"fields": [
{"name": "name", "type": "string"},
{"name": "id", "type": "int"},
{"name": "salary", "type": "int"},
{"name": "age", "type": "int"},
{"name": "address", "type": "string"}
]
}
生成java类,最终生成Tutorialspoint文件(根据定义schema来定的),文件中包含Employee.java类
cmd>java -jar avro-tools-1.7.7.jar compile schema emp.avsc .
c)使用java类
d)单元测试
package cn.ctgu.avrodemo.test;
import Tutorialspoint.Employee;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileReader;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.specific.SpecificDatumReader;
import org.apache.avro.specific.SpecificDatumWriter;
import org.junit.Test;
import java.io.File;
import java.util.Iterator;
public class TestAvro {
//串行化数据到磁盘
@Test
public void write() throws Exception {
//创建write对象
SpecificDatumWriter empDatumWriter=new SpecificDatumWriter<Employee>(Employee.class);
//写入文件
DataFileWriter<Employee>empFileWriter=new DataFileWriter<Employee>(empDatumWriter);
//创建对象
Employee el=new Employee();
el.setName("tomas");
el.setAge(12);
//串行化数据到磁盘
empFileWriter.create(el.getSchema(),new File("J:\\Program\\file\\avro\\el.avro"));
empFileWriter.append(el);
empFileWriter.append(el);
empFileWriter.append(el);
empFileWriter.append(el);
//关闭流
empFileWriter.close();
}
//反串行化
@Test
public void read() throws Exception {
//创建reader对象
SpecificDatumReader empDatumReader = new SpecificDatumReader<Employee>(Employee.class);
//读取文件
DataFileReader<Employee> dataReader = new DataFileReader<Employee>(new File("J:\\Program\\file\\avro\\el.avro"), empDatumReader);
Iterator<Employee> it = dataReader.iterator();
while (it.hasNext()) {
System.out.println(it.next().getName());
}
}
/*
*
* 直接使用schema文件进行读写,不需要编译
*
* */
@Test
public void writeInSchema() throws Exception {
//指定定义的avsc文件
Schema schema=new Schema.Parser().parse(new File("F:\\徐培成——spark\\线路一全\\05-avro和protobuf\\emp.avsc"));
//创建GenericRecord相当于Employee
GenericRecord el=new GenericData.Record(schema);
//设置javabean属性
el.put("Name","ramu");
el.put("age",25);
//
DatumWriter empDatumWriter=new SpecificDatumWriter(schema);
DataFileWriter empFileWriter=new DataFileWriter(empDatumWriter);
empFileWriter.create(schema,new File("J:\\Program\\file\\avro\\el2.avro"));
empFileWriter.append(el);
empFileWriter.append(el);
empFileWriter.append(el);
empFileWriter.append(el);
empFileWriter.append(el);
empFileWriter.close();
}
/*
*
* 建议使用这种
* 直接使用schema文件进行读写,不需要编译
*
* */
@Test
public void readInSchema() throws Exception {
//指定定义的avsc文件
Schema schema=new Schema.Parser().parse(new File("F:\\徐培成——spark\\线路一全\\05-avro和protobuf\\emp.avsc"));
DatumReader empDatumReader=new SpecificDatumReader(schema);
DataFileReader r=new DataFileReader(new File("J:\\Program\\file\\avro\\el2.avro"),empDatumReader);
while(r.hasNext()){
GenericRecord rec=(GenericRecord) r.next();
System.out.println(rec.get("Name"));
}
r.close();
}
}