Hadoop的Text类型是将字符串用UTF-8编码转换成bytes位数组。
/**
* Converts the provided String to bytes using the* UTF-8 encoding. If <code>replace</code> is true, then
* malformed input is replaced with the
* substitution character, which is U+FFFD. Otherwise the
* method throws a MalformedInputException.
* @return ByteBuffer: bytes stores at ByteBuffer.array()
* and length is ByteBuffer.limit()
*/
public static ByteBuffer encode(String string, boolean replace)
throws CharacterCodingException {
CharsetEncoder encoder = ENCODER_FACTORY.get();
if (replace) {
encoder.onMalformedInput(CodingErrorAction.REPLACE);
encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
}
ByteBuffer bytes =
encoder.encode(CharBuffer.wrap(string.toCharArray()));
if (replace) {
encoder.onMalformedInput(CodingErrorAction.REPORT);
encoder.onUnmappableCharacter(CodingErrorAction.REPORT);
}
return bytes;
}