问题描述
<?xml version="1.0" encoding="hz-gb-2312"?>
<Informations>
<Information>
<~{Pr:E~}>1</~{Pr:E~}>
<~{PEO"DZH]~}>2016/7/17 9:45:16,~{11>)NwU>119c3!356`#,A+;(GEIO6+NwK+7=OrRT<0H};78(B7TQ5@S56B!#A+;(3XNwB76SN29}=p<R4eGE!"NwH};7PBPKGEVAAy@oGEK+7=OrM(PP;:B}!#~}</~{PEO"DZH]~}>
<~{PEO"5H<6~} />
</Information>
<sign>
<~{G)JU1jV>~}>1</~{G)JU1jV>~}>
</sign>
</Informations>
一段文本信息,需要解码,上面写的encoding是hz-gb-2312.
查了一下是简体中文编码
解码方法是:~{中间字母~},中间字母的byte值都加128,得到正常byte数组,new String(byte[], GBK)
我的代码
import java.io.UnsupportedEncodingException;
/**
* 简体中文hz
* encoding="hz-gb-2312"解码
* ~{中间字母~},中间字母的byte值都加128,得到正常byte数组,new String(byte[], GBK)
* @author lingling.zhang
*
*/
public class HzUtil {
static final String S_MARK = "~{";
static final String E_MARK = "~}";
/**
* 示例 ~{PEO"DZH]~}
* 解码后:
* @param rawStr
* @return
* @throws UnsupportedEncodingException
*/
public static String decode(String rawStr) throws HzDecodeException, UnsupportedEncodingException {
StringBuilder sb = new StringBuilder();
int start = -1, end = -2;
start = rawStr.indexOf(S_MARK);
while (start != -1) {
sb.append(rawStr.substring(end + 2, start));
end = rawStr.indexOf(E_MARK, start);
if (end == -1) {
// startMark和endMark不匹配
throw new HzDecodeException("can not find endMark "+E_MARK+", startMark index = "+start+",rawStr="+rawStr);
} else {
sb.append(decodeItem(rawStr.substring(start+2, end)));
}
start = rawStr.indexOf(S_MARK, end + 2);
}
sb.append(rawStr.substring(end + 2));
return sb.toString();
}
public static String decodeItem(String input) throws UnsupportedEncodingException{
byte[] temp = input.getBytes();
byte[] newByte = new byte[temp.length];
for (int j = 0; j < temp.length; j++) {
newByte[j] =(byte) (temp[j]+128);
}
return new String(newByte,"GBK");
}
/*
public static String decode2(String rawStr) throws UnsupportedEncodingException{
List<String> matchStr = new ArrayList<String>();
match(matchStr, rawStr);
Map<String, String> map = new HashMap<String, String>();
for (int i = 0; i < matchStr.size(); i++) {
String item = matchStr.get(i);
if (map.containsKey(item)) {
continue;
}
String middleStr = item.substring(2, item.length()-2);
byte[] temp = middleStr.getBytes();
byte[] newByte = new byte[temp.length];
for (int j = 0; j < temp.length; j++) {
newByte[j] =(byte) (temp[j]+128);
}
String result = new String(newByte,"GBK");
map.put(item, result);
}
for (String key:map.keySet()) {
rawStr = rawStr.replace(key, map.get(key));
}
return rawStr;
}
public static void match(List<String> result, String rawStr){
int start = -1, end = -1;
start = rawStr.indexOf(S_MARK);
while (start != -1) {
end = rawStr.indexOf(E_MARK, start);
if (end == -1) {
break;
} else {
result.add(rawStr.substring(start+2, end));
}
start = rawStr.indexOf(S_MARK, end + 2);
}
}
*/
public static void main(String[] args) throws UnsupportedEncodingException, HzDecodeException {
String str = "<?xml version=\"1.0\" encoding=\"hz-gb-2312\"?> <Informations> <Information> <~{Pr:E~}>1</~{Pr:E~> <~{PEO\"DZH]~}>2016/7/17 9:45:16,~{11>)NwU>119c3!356`#,A+;(GEIO6+NwK+7=OrRT<0H};78(B7TQ5@S56B!#A+;(3XNwB76SN29}=p<R4eGE!\"NwH};7PBPKGEVAAy@oGEK+7=OrM(PP;:B}!#~}</~{PEO\"DZH]~}> <~{PEO\"5H<6~} /> </Information> <sign> <~{G)JU1jV>~}>1</~{G)JU1jV>> </sign> </Informations> ";
String res = HzUtil.decode(str);
System.out.println(res);
}
}