Hadoop是用JAVA语言实现,然而它的基本数据类型却不是标准的JAVA对象,而是对他们的一个封装,序列化
序列化是指将结构化对象转换为字节流,以便于在网络上进行传输或写到磁盘进行永久存储
Hadoop常用的数据类型有:
这一套数据类型虽然能满足大部分的需求,但有些情况下要写出更灵活的程序,还是需要定制自己的Writable类型
实现的数据类型如果要作为value, 则只需要实现 Writable 接口
实现的数据类型如果要作为key, 则必须要实现
WritableComparable 接口
public interface Writable {
void readFields(DataInput in);
void write(DataOutput out);
}
方法readFields用于从DataInput二进制流中解析数据
方法write用于向DataOutput二进制流中封装数据
例如将一个三维坐标结构体struct point3d { float x; float y; float z;}定义为hadoop数据类型,并作为value值,
public class Point3D implements Writable {
public float x;
public float y;
public float z;
public float x;
public float y;
public float z;
public Point3D(float x, float y, float z) {
this.x = x;
this.y = y;
this.z = z;
}
this.x = x;
this.y = y;
this.z = z;
}
public Point3D() {
this(0.0f, 0.0f, 0.0f);
}
this(0.0f, 0.0f, 0.0f);
}
public void write(DataOutput out) throws IOException {
out.writeFloat(x);
out.writeFloat(y);
out.writeFloat(z);
}
out.writeFloat(x);
out.writeFloat(y);
out.writeFloat(z);
}
public void readFields(DataInput in) throws IOException {
x = in.readFloat();
y = in.readFloat();
z = in.readFloat();
}
x = in.readFloat();
y = in.readFloat();
z = in.readFloat();
}
public String toString() {
return Float.toString(x) + ", "
+ Float.toString(y) + ", "
+ Float.toString(z);
}
}
return Float.toString(x) + ", "
+ Float.toString(y) + ", "
+ Float.toString(z);
}
}
但是,如果该数据类型将要作为key, 则必须要
实现 WritableComparable 接口,要实现的方法有:
readFields()
write()
toString()
compareTo() //Key排序
将要使用
equals()
hashCode() //
Partitioner将要使用
set()
get()
仍旧以上述为例,
将一个三维坐标结构体struct point3d { float x; float y; float z;}定义为hadoop数据类型,并作为Key值
public class Point3D implements WritableComparable {
public float x;
public float y;
public float z;
public float x;
public float y;
public float z;
public Point3D(float x, float y, float z) {
this.x = x;
this.y = y;
this.z = z;
}
this.x = x;
this.y = y;
this.z = z;
}
public Point3D() {
this(0.0f, 0.0f, 0.0f);
}
this(0.0f, 0.0f, 0.0f);
}
public void write(DataOutput out) throws IOException {
out.writeFloat(x);
out.writeFloat(y);
out.writeFloat(z);
}
out.writeFloat(x);
out.writeFloat(y);
out.writeFloat(z);
}
public void readFields(DataInput in) throws IOException {
x = in.readFloat();
y = in.readFloat();
z = in.readFloat();
}
x = in.readFloat();
y = in.readFloat();
z = in.readFloat();
}
public String toString() {
return Float.toString(x) + ", "
+ Float.toString(y) + ", "
+ Float.toString(z);
}
return Float.toString(x) + ", "
+ Float.toString(y) + ", "
+ Float.toString(z);
}
/** return the Euclidean distance from (0, 0, 0) */
public float distanceFromOrigin() {
return (float)Math.sqrt(x*x + y*y + z*z);
}
public float distanceFromOrigin() {
return (float)Math.sqrt(x*x + y*y + z*z);
}
public int compareTo(Point3D other) {
float myDistance = distanceFromOrigin();
float otherDistance = other.distanceFromOrigin();
float myDistance = distanceFromOrigin();
float otherDistance = other.distanceFromOrigin();
return Float.compare(myDistance, otherDistance);
}
}
public boolean equals(Object o) {
if (!(o instanceof Point3D)) {
return false;
}
if (!(o instanceof Point3D)) {
return false;
}
Point3D other = (Point3D)o;
return this.x == other.x && this.y == other.y
&& this.z == other.z;
}
return this.x == other.x && this.y == other.y
&& this.z == other.z;
}
public int hashCode() {
return Float.floatToIntBits(x)
^ Float.floatToIntBits(y)
^ Float.floatToIntBits(z);
}
}
return Float.floatToIntBits(x)
^ Float.floatToIntBits(y)
^ Float.floatToIntBits(z);
}
}