实例代码如下:
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.hadoop.io.Text;
public class TravelLabel {
//判别规则
protected static String encodeReg = "^(?:[\\x00-\\x7f]|[\\xe0-\\xef][\\x80-\\xbf]{2})+$";
// 以下三个方法用于判断url的编码格式是utf-8或gbk
public static Boolean isUTF8(String string) {
Pattern encode_pattern = Pattern.compile(encodeReg);
String unescaped_string = unescape(string);
Matcher encode_matcher = encode_pattern.matcher(unescaped_string);
if (encode_matcher.matches()) {
return true;
} else {
return false;
}
}
public static boolean isGBK(String string) throws UnsupportedEncodingException {
if (string.equals(new String(string.getBytes("GBK"))))
return true;
else
return false;
}
p