读代码-VectorWritable

package org.apache.mahout.math;
public final class VectorWritable extends Configured implements Writable

VectorWritable 类包裹了Vector,提供了读写能力
private Vector vector;
private boolean writesLaxPrecision;

常量选项定义采用1,2,4,8方式,通过位操作使用
public static final int FLAG_DENSE = 0x01;
public static final int FLAG_SEQUENTIAL = 0x02;
public static final int FLAG_NAMED = 0x04;
public static final int FLAG_LAX_PRECISION = 0x08;
public static final int NUM_FLAGS = 4;


先写一个Byte包含向量属性描述,包含是否稠密,是否顺序访问,是否名称和对精度要求,或操作进行组合。
再写向量长度
如果稠密,则遍历每个向量元素,跟据精度要求写float或double
如果稀疏,采用非0遍历迭代器
如顺序访问,则在每个元素前写index偏移量
如随机访问则每个元素前写index
最后,如果含名称,写出名称

public static void writeVector(DataOutput out, Vector vector, boolean laxPrecision) throws IOException {
boolean dense = vector.isDense();
boolean sequential = vector.isSequentialAccess();
boolean named = vector instanceof NamedVector;

out.writeByte((dense ? FLAG_DENSE : 0)
| (sequential ? FLAG_SEQUENTIAL : 0)
| (named ? FLAG_NAMED : 0)
| (laxPrecision ? FLAG_LAX_PRECISION : 0));

Varint.writeUnsignedVarInt(vector.size(), out);
if (dense) {
for (Vector.Element element : vector) {
if (laxPrecision) {
out.writeFloat((float) element.get());
} else {
out.writeDouble(element.get());
}
}
} else {
Varint.writeUnsignedVarInt(vector.getNumNondefaultElements(), out);
Iterator<Vector.Element> iter = vector.iterateNonZero();
if (sequential) {
int lastIndex = 0;
while (iter.hasNext()) {
Vector.Element element = iter.next();
int thisIndex = element.index();
// Delta-code indices:
Varint.writeUnsignedVarInt(thisIndex - lastIndex, out);
lastIndex = thisIndex;
if (laxPrecision) {
out.writeFloat((float) element.get());
} else {
out.writeDouble(element.get());
}
}
} else {
while (iter.hasNext()) {
Vector.Element element = iter.next();
Varint.writeUnsignedVarInt(element.index(), out);
if (laxPrecision) {
out.writeFloat((float) element.get());
} else {
out.writeDouble(element.get());
}
}
}
}
if (named) {
out.writeUTF(((NamedVector) vector).getName());
}
}




写的逆过程。
读入属性,用与操作检验。
读入长度。
如果稠密构造DenseVector
如果稀疏
如果顺序访问则构造SequentialAccessSparseVector
如果随机访问则构造RandomAccessSparseVector
如果含名称,则包裹成NamedVector


@Override
public void readFields(DataInput in) throws IOException {
int flags = in.readByte();
Preconditions.checkArgument(flags >> NUM_FLAGS == 0, "Unknown flags set: %d", Integer.toString(flags, 2));
boolean dense = (flags & FLAG_DENSE) != 0;
boolean sequential = (flags & FLAG_SEQUENTIAL) != 0;
boolean named = (flags & FLAG_NAMED) != 0;
boolean laxPrecision = (flags & FLAG_LAX_PRECISION) != 0;

int size = Varint.readUnsignedVarInt(in);
Vector v;
if (dense) {
double[] values = new double[size];
for (int i = 0; i < size; i++) {
values[i] = laxPrecision ? in.readFloat() : in.readDouble();
}
v = new DenseVector(values);
} else {
int numNonDefaultElements = Varint.readUnsignedVarInt(in);
v = sequential
? new SequentialAccessSparseVector(size, numNonDefaultElements)
: new RandomAccessSparseVector(size, numNonDefaultElements);
if (sequential) {
int lastIndex = 0;
for (int i = 0; i < numNonDefaultElements; i++) {
int delta = Varint.readUnsignedVarInt(in);
int index = lastIndex + delta;
lastIndex = index;
double value = laxPrecision ? in.readFloat() : in.readDouble();
v.setQuick(index, value);
}
} else {
for (int i = 0; i < numNonDefaultElements; i++) {
int index = Varint.readUnsignedVarInt(in);
double value = laxPrecision ? in.readFloat() : in.readDouble();
v.setQuick(index, value);
}
}
}
if (named) {
String name = in.readUTF();
v = new NamedVector(v, name);
}
vector = v;
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值