\xE4\xBD\xA0\xE5\xA5\xBD 解码
感谢 http://www.bejson.com/convert/ox2str/
p e4 bd a0
P -100 -61 -32
P 11100100 10111101 10100000
J -28 -67 -96
J 10011100 11000011 11100000
p e5 a5 bd
P -101 -37 -61
P 11100101 10100101 10111101
J -27 -91 -67
J 10011011 11011011 11000011
Java:你好[-28, -67, -96, -27, -91, -67]
\xE4\xBD\xA0\xE5\xA5\xBD –> e4bda0e5a5bd –> 你好
与python的关系
python2
普通字符串常量,默认utf-8
>>> a = 'aa'
>>> type(a[0])
<type 'str'>
bytes
>>> bytes #(str == bytes)为 Ture
<type 'str'>
bytearray
<type 'bytearray'>
r'xxx';
非转义的原始字符串
u'xxx';
不是仅仅是针对中文, 可以针对任何的字符串,代表是对字符串进行unicode编码。
python3
普通字符串常量 <class 'str'>,默认unicode
>>> a = 'aa'
>>> type(a[0])
<class 'str'>
bytes :<class 'bytes'>
bytearray:<class 'bytearray'>
r'xxx';
u'xxx';
b'xxx';
python3.x里默认的str是unicode, bytes是(py2.x)的str, b”“前缀代表的就是bytes
python2.x里, b前缀没什么具体意义, 只是为了兼容python3.x的这种写法
HBase中取出来了字符串中文不能直接显示
所有这里实现了解析\x中文 英文字符集 混合编码,,,
#-*-coding:utf-8-*-
# python2 实现
def dvstr(r):
barr = bytearray();
rlen = len(r);
i = 0;
while i < rlen:
if r[i] == '\\' and r[i+1] == 'x':
barr.append(r[i+2])
barr.append(r[i+3])
i+=4;
else:
hexc = hex(ord(r[i]))
barr.append(hexc[2])
barr.append(hexc[3])
i+=1;
hexstr = str(barr);
return hexstr.decode('hex')
def evstr(s):
barr = bytearray();
for c in s:
size = ord(c);
if size >0 and size < 127:
barr.append(c)
else:
hexc = hex(size)
barr.append("\\")
barr.append("x")
barr.append(hexc[2])
barr.append(hexc[3])
return str(barr)
rstr = r'\xe4\xbd\xa0\xe5\xa5\xbd\xe5\x95\x8a121A\xe5\x8f\xb7'
sstr = dvstr(rstr)
print(sstr)
sstr = "你好啊121A号";
rstr = evstr(sstr);
print(rstr)
# python3 实现
def dvstr(r):
barr = bytearray();
rlen = len(r);
i = 0;
while i < rlen:
if r[i] == '\\' and r[i+1] == 'x':
barr.append(ord(r[i+2]))
barr.append(ord(r[i+3]))
i+=4;
else:
hexc = hex(ord(r[i]))
barr.append(ord(hexc[2]))
barr.append(ord(hexc[3]))
i+=1;
hexstr = str(barr,'utf-8');
return bytes.fromhex(hexstr).decode('utf-8')
def evstr(s):
s = s.encode("UTF-8")
barr = bytearray();
for c in s:
size = c;
if size >0 and size < 127:
barr.append(c)
else:
hexc = hex(size)
barr.append(92)# \ 92 x 120
barr.append(120)
barr.append(ord(hexc[2]))
barr.append(ord(hexc[3]))
return str(barr,'utf-8')
sstr = "你A";
rstr = evstr(sstr);
print(rstr)
rstr = r'\xe4\xbd\xa0A'
sstr = dvstr(rstr)
print(sstr)
java端解码
/**
* @ 解码字符串
*/
public static String drstr(String rstr) {
byte[] bs = rstr.getBytes();
ByteBuffer buf = ByteBuffer.allocate(bs.length);
for (int i = 0; i < bs.length; i++) {
if (bs[i] == 92 && bs[i + 1] == 120) {
Integer in = Integer.valueOf((char) bs[i + 2] + "" + (char) bs[i + 3], 16);
in = in & 0xFF;
buf.put(in.byteValue());
i += 3;
} else {
buf.put(bs[i]);
}
}
return new String(buf.array()).trim();
}
/**
* @ 字符串重编码
*/
public static String erstr(String str) {
final byte[] prefix = "\\x".getBytes();
ByteBuffer buf = ByteBuffer.allocate(str.length()*24);//
for (int i = 0; i < str.length(); i++) {
char ch = str.charAt(i);
if (ch >= 0 && ch <= 127) {
Integer num = (int) ch;
buf.put(num.byteValue());
} else {
byte[] cbs = String.valueOf(ch).getBytes();
for (byte b : cbs) {
int v = b & 0xFF;
String hv = Integer.toHexString(v).toUpperCase();
buf.put(prefix);
buf.put(hv.getBytes());
}
}
}
return new String(buf.array()).trim();
}