hadoop笔记五:对象的序列化和比较

1.序列化和反序列化

1)序列化相关的接口和类

java中类可以序列化是实现接口Serializable。

hadoop中类可以序列化是实现接口Writable。

hadoop对应java基本数据类型实现序列化类:

203047_ERcw_3100849.png

Writable接口中定义了两个方法:

203355_j5vf_3100849.png

readFields(DataInput in )反序列化方法,write(DataOutput out)序列化方法。

官网例子:

public class MyWritable implements Writable{
	
	// Some data     
    private int counter;
    private long timestamp;
    
    public static MyWritable read(DataInput in) throws IOException {
        MyWritable w = new MyWritable();
        w.readFields(in);
        return w;
      }

	public void write(DataOutput out) throws IOException {
		//反序列化,从流中读取数据
		out.writeInt(counter);
        out.writeLong(timestamp);		
	}

	public void readFields(DataInput in) throws IOException {
		//序列化,将对象数据读入到流中
		 counter = in.readInt();
         timestamp = in.readLong();
	}

}

2)通过实例比较java和hadoop序列化差别

通过hadoop的IntWritable和java的Integer对比 

package com.jf.hdfs;

import java.io.ByteArrayOutputStream;
import java.io.ObjectOutputStream;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;

public class SerializationCompare {

	// hadoop所有对象类型的父类型Writable
	public static byte[] serialize(Writable writable) throws Exception {
		//序列化其实就是将对象转行为字节数组
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		ObjectOutputStream oos = new ObjectOutputStream(baos);
		writable.write(oos);
		oos.close();
		return baos.toByteArray();
	}
	
	//java中序列化将类类型对象转化为字节数组
	public static byte[] serialize(Integer integer) throws Exception{
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		ObjectOutputStream oos = new ObjectOutputStream(baos);
		oos.writeInt(integer);
		oos.close();
		return baos.toByteArray();
	}

	public static void main(String[] args) throws Exception {
		IntWritable intWritable = new IntWritable(200);
		byte[] bytes = serialize(intWritable);
		System.out.println("hadoop序列化:"+bytes.length);
		
		Integer integer = new Integer(200);
		byte[] bytes2 = serialize(integer);
		System.out.println("java序列化:"+bytes2.length);
		
	}
}

执行结果:虽然一样,其实在大数据里面hadoop更占优势。

hadoop序列化:10
java序列化:10

4)hadoop中复杂对象类型序列化

package com.jf.hdfs;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

public class ObjecSerialize {

	public static void main(String[] args) throws Exception {
		Student student = new Student();
		student.setId(new IntWritable(10001));
		student.setName(new Text("sean"));
		student.setGender(true);
		List<Text> list = new ArrayList<Text>();
		list.add(new Text("学校"));
		list.add(new Text("年纪"));
		list.add(new Text("班级"));
		student.setList(list);

		// 对象序列化,将对象写入到流中
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		DataOutputStream dos = new DataOutputStream(baos);
		student.write(dos);

		byte[] b = baos.toByteArray();
		System.out.println("序列化之后结果:" + Arrays.toString(b) + ",字节数组长度:" + b.length);

		// 进行反序列化
		ByteArrayInputStream bais = new ByteArrayInputStream(b);
		DataInputStream dis = new DataInputStream(bais);

		Student student2 = new Student();
		student2.readFields(dis);
		
		System.out.println("反序列化ID="+student2.getId().get()+",name="+student2.getName().toString()+",gender="+student2.isGender()+",list=["+student2.getList().get(0).toString()+","+student2.getList().get(1).toString()+","+student2.getList().get(2).toString()+"]");

	}
}

class Student implements Writable {

	private IntWritable id;

	private Text name;

	private boolean gender;

	private List<Text> list = new ArrayList<Text>();

	Student() {
		id = new IntWritable();
		name = new Text();
	}

	Student(Student student) {
		// 这种属于引用复制,hadoop中严格杜绝
		// this.id = student.id;
		// this.name = student.name;

		// 在hadoop中要使用这种属性值的复制
		id = new IntWritable(student.id.get());
		name = new Text(student.name.toString());
	}

	public void write(DataOutput out) throws IOException {
		// 序列化过程,将对象中所有数据写入到流中
		id.write(out);
		name.write(out);
		BooleanWritable genter = new BooleanWritable(gender);
		genter.write(out);

		// 在hadoop中序列化集合时,要将集合的长度也进行序列化
		int size = list.size();
		new IntWritable(size).write(out);
		// 然后再序列化集合中的每一个元素
		for (int i = 0; i < size; i++) {
			Text text = list.get(i);
			text.write(out);
		}
	}

	// 反序列化将流中的二进制读出到对象中
	public void readFields(DataInput in) throws IOException {
		id.readFields(in);
		name.readFields(in);
		// 从流中读出Writable类型,然后再复制给java基本类型
		BooleanWritable bw = new BooleanWritable();
		bw.readFields(in);
		gender = bw.get();

		// 反序列化集合时首选将集合长度进行反序列化
		IntWritable size = new IntWritable();
		size.readFields(in);
		list.clear();
		// 再反序列化流中集合的每一个元素
		for (int i = 0; i < size.get(); i++) {
			Text text = new Text();
			text.readFields(in);
			list.add(text);
		}
	}

	public IntWritable getId() {
		return id;
	}

	public void setId(IntWritable id) {
		this.id = id;
	}

	public Text getName() {
		return name;
	}

	public void setName(Text name) {
		this.name = name;
	}

	public boolean isGender() {
		return gender;
	}

	public void setGender(boolean gender) {
		this.gender = gender;
	}

	public List<Text> getList() {
		return list;
	}

	public void setList(List<Text> list) {
		this.list = list;
	}
}

执行结果:

序列化之后结果:[0, 0, 39, 17, 4, 115, 101, 97, 110, 1, 0, 0, 0, 3, 6, -27, -83, -90, -26, -96, -95, 6, -27, -71, -76, -25, -70, -86, 6, -25, -113, -83, -25, -70, -89],字节数组长度:35
反序列化ID=10001,name=sean,gender=true,list=[学校,年纪,班级]

2.对象比较

1)WritableComparable

WritableComparable<T>接口继承Comparable<T>和Writable接口,继承过来三个方法,从Writable继承过来readFieldswrite,从Comparable<T>继承过来compareTo

官网提供例子:

package com.jf.hdfs;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;

public class MyWritableComparable implements WritableComparable {

	private int counter;
	private long timestamp;

	public void write(DataOutput out) throws IOException {
		out.writeInt(counter);
		out.writeLong(timestamp);
	}

	public void readFields(DataInput in) throws IOException {
		counter = in.readInt();
		timestamp = in.readLong();
	}

	public int compareTo(Object o) {
		MyWritableComparable obj = (MyWritableComparable) o;
		int value = this.counter;
		int value2 = obj.counter;

		return value < value2 ? -1 : (value == value2 ? 0 : 1);
	}

	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + counter;
		result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
		return result;
	}

}

2)RawComparator

RawComparator<T>接口继承了java.util.Comparator<T>接口,除了从Comparator<T>继承过来的两个方法compareequals之外,它自己也定义了一个方法compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2)有6个参数。该方法是在字节流的层面上去做比较,第一个参数:指定字节数组,第二个参数:从哪里开始比较,第三个参数:比较多长。

3)WritableComparator

WritableComparator类,实现了ComparatorConfigurableRawComparator三个接口。

构造方法

224944_fPe1_3100849.png

部分实现方法

225105_Eo0C_3100849.png

4)hadoop中已经实现了一些可以序列化又可以比较的类

225307_zX15_3100849.png

5)比较两个对象大小

有两种方式,一种是该类实现WritableComparator接口,另一种是通过实现一个比较器去进行比较。

这里通过WritableComparator接口实现一个自定义类的比较方法。

package com.jf.hdfs;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;

public class AccountWritable implements WritableComparable<AccountWritable> {

	private IntWritable code;
	private Text name;
	private BooleanWritable gender;

	AccountWritable() {
		code = new IntWritable();
		name = new Text();
		gender = new BooleanWritable();
	}

	// 把参数类型和类类型相同的构造器,叫复制构造器
	AccountWritable(AccountWritable accountWritable) {
		code = new IntWritable(accountWritable.code.get());
		name = new Text(accountWritable.name.toString());
		gender = new BooleanWritable(accountWritable.gender.get());
	}

	// 注意要赋值类型,不要赋引用类型
	public void set(IntWritable code, Text name, BooleanWritable gender) {
		this.code = new IntWritable(code.get());
		this.name = new Text(name.toString());
		this.gender = new BooleanWritable(gender.get());
	}

	// 将值写到输出流中
	public void write(DataOutput out) throws IOException {
		code.write(out);
		name.write(out);
		gender.write(out);
	}

	// 将值从输入流中读取出来
	public void readFields(DataInput in) throws IOException {
		code.readFields(in);
		name.readFields(in);
		gender.readFields(in);
	}

	// 比较方法
	public int compareTo(AccountWritable o) {

		int result = this.code.compareTo(o.code);
		if (result == 0) {
			result = this.name.compareTo(o.name);
			if (result == 0) {
				result = this.gender.compareTo(o.gender);
			}
		}
		return result;
	}

	public int hashCode() {
		final int prime = 31;
		int result = 1;
		result = prime * result + code.get();
		result = prime * result + (int) (name.toString().hashCode() ^ (name.toString().hashCode() >>> 32));
		return result;
	}

	public IntWritable getCode() {
		return code;
	}

	public void setCode(IntWritable code) {
		this.code = code;
	}

	public Text getName() {
		return name;
	}

	public void setName(Text name) {
		this.name = name;
	}

	public BooleanWritable getGender() {
		return gender;
	}

	public void setGender(BooleanWritable gender) {
		this.gender = gender;
	}
}

测试:

public static void main(String[] args) {
		AccountWritable a1 = new AccountWritable();
		a1.set(new IntWritable(30), new Text("sean"), new BooleanWritable(true));

		AccountWritable a2 = new AccountWritable();
		a2.set(new IntWritable(30), new Text("sean"), new BooleanWritable(true));
		//比较a1和a2
		System.out.println(a1.compareTo(a2));

	}

 

转载于:https://my.oschina.net/u/3100849/blog/1614203

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值