2015-02-03 回答
package test;
import java.io.unsupportedencodingexception;
import java.util.regex.matcher;
import java.util.regex.pattern;
public class codeutil { public static void main(string[] args) throws unsupportedencodingexception {
string chinastring = "测试";
string unicode = converttounicode(chinastring);
system.out.println(unicode);
system.out.println(unicodetochinese(unicode));
string _x16 = convertto16code(chinastring, "utf-8").tolowercase();
system.out.println(_x16);
string[] sby = converttobitcode(_x16);
stringbuffer subf = new stringbuffer();
subf.append("byte[] bytes = {");
byte[] bytes = new byte[sby.length];
for (int i = 0; i < sby.length; i++) {
int icode = integer.decode(sby[i]).intvalue();
bytes[i] = (byte) icode;
subf.append("(byte)").append(sby[i]).append(",");
// system.out.println(sby[i]);
}
subf.deletecharat(subf.length() - 1);
subf.append("};");
system.out.println(subf.tostring());
system.out.println(new string(bytes, "utf-8"));
}
private final static string hexstring = "0123456789abcdef";
/**
* 转16进制,如果是中文,encoding-->>utf-8
*/
public static string convertto16code(string str, string encoding)
throws unsupportedencodingexception {
byte[] bytes = str.getbytes(encoding);
stringbuffer sb = new stringbuffer(bytes.length * 2);
for (int i = 0; i < bytes.length; i++) {
sb.append(hexstring.charat((bytes[i] & 0xf0) >> 4));
sb.append(hexstring.charat((bytes[i] & 0x0f) >> 0));
}
return sb.tostring();
}
/**
* 将汉字转unicode
*
* @param string
* s
* @return string sb.tostring();
*/
public static string converttounicode(string s) {
if (s == null) {
return s;
}
char[] chars = s.tochararray();
char c;
stringbuffer sb = new stringbuffer();
for (int i = 0; i < chars.length; i++) {
c = chars[i];
if (c > 0xff) {
sb.append("\\u").append(integer.tohexstring(c));
} else {
sb.append(c);
}
}
return sb.tostring();
}
/**
* 将16进制转为区位码
*
* @param
*/
public static string[] converttobitcode(string str_16) {
string[] result = null;
if (str_16 == null || str_16 == "" || str_16.length() % 2 != 0) {
return result;
}
result = new string[str_16.length() / 2];
for (int i = 0; i < str_16.length() / 2; i++) {
result[i] = "0x" + str_16.charat(i * 2) + str_16.charat(i * 2 + 1);
}
return result;
}
/**
*
*/
public static string unicodetochinese(string unicodestr) {
int start = 0;
int end = 0;
final stringbuffer buffer = new stringbuffer();
while (start > -1) {
end = unicodestr.indexof("\\u", start + 2);
string charstr = "";
if (end == -1) {
charstr = unicodestr.substring(start + 2, unicodestr.length());
} else {
charstr = unicodestr.substring(start + 2, end);
}
char letter = (char) integer.parseint(charstr, 16);
buffer.append(new character(letter).tostring());
start = end;
}
return buffer.tostring();
}
public static boolean ischinese(char c) {
character.unicodeblock ub = character.unicodeblock.of(c);
if (ub == character.unicodeblock.cjk_unified_ideographs
|| ub == character.unicodeblock.cjk_compatibility_ideographs
|| ub == character.unicodeblock.cjk_unified_ideographs_extension_a
|| ub == character.unicodeblock.general_punctuation
|| ub == character.unicodeblock.cjk_symbols_and_punctuation
|| ub == character.unicodeblock.halfwidth_and_fullwidth_forms) {
return true;
}
return false;
}
public static boolean ismessycode(string str) {
pattern p = pattern.compile("\\s*|\t*|\r*|\n*");
matcher m = p.matcher(str);
string after = m.replaceall("");
string temp = after.replaceall("\\p{p}", "");
char[] ch = temp.trim().tochararray();
float chlength = ch.length;
float count = 0;
for (int i = 0; i < ch.length; i++) {
char c = ch[i];
if (!character.isletterordigit(c)) {
if (!ischinese(c)) {
count = count + 1;
system.out.print(c);
}
}
}
float result = count / chlength;
if (result > 0.4) {
return true;
} else {
return false;
}
}
}
这个你懂了,解决你的问题就是小意思了