对汉字字符串按照拼音排序
seagull 发表于 2005-3-11 8:56:00
今天上水木发现有人问怎样对汉字字符串按照拼音排序,查了一遍文档,发现java缺省对字符串是按照每个Unicode字符的编码大小排序的。接着查到(要是没有了Google,世界将会怎样?)有一个java.text.Collator类,但是论坛上有人说这个类没用,和缺省的排序效果一样,要用C#写一个类生成java程序。这要是真的,那还学java干嘛?
于是把别人的Test.java改了改,分别用指定的Locale和不指定Locale试了试,结果都是OK的:
import java.util.*;
import java.text.*;
public class Test2 {
public static void testDefault() {
String[] arr = {"张三", "李四", "王五", "刘六"};
Arrays.sort(arr);
for (int i = 0; i < arr.length; i++)
System.out.println(arr[i]);
System.out.println();
}
public static void testChinaLocale() {
//Comparator cmp = (RuleBasedCollator)java.text.Collator.getInstance(java.util.Locale.CHINA);//try testing various locales
Comparator cmp = Collator.getInstance(java.util.Locale.CHINA);
String[] arr = {"张三", "李四", "王五", "刘六"};
Arrays.sort(arr, cmp);
for (int i = 0; i < arr.length; i++)
System.out.println(arr[i]);
System.out.println();
}
public static void testDefaultLocale() {
//Comparator cmp = (RuleBasedCollator)java.text.Collator.getInstance(java.util.Locale.CHINA);//try testing various locales
Comparator cmp = Collator.getInstance();
String[] arr = {"张三", "李四", "王五", "刘六"};
Arrays.sort(arr, cmp);
for (int i = 0; i < arr.length; i++)
System.out.println(arr[i]);
System.out.println();
}
public static void main(String args[]) {
testDefault();
testChinaLocale();
testDefaultLocale();
}
}
输出结果:
刘六
张三
李四
王五
李四
刘六
王五
张三
李四
刘六
王五
张三
看来Java还不是那么差嘛。至于那位老兄,估计是缺省的Locale有问题。
下面是java.net上Google到的对字符串中的字符按照拼音排序的测试代码:
import java.util.*;
import java.io.*;
import java.text.*;
public class CharSort implements Comparator{
java.text.RuleBasedCollator collator; // you can set your rules for the instance "collator"
CharSort(){
collator = (RuleBasedCollator)java.text.Collator.getInstance(java.util.Locale.CHINA);//try testing various locales
}
public void doSort(String str) throws java.io.IOException{
java.text.CollationKey[] keys = new java.text.CollationKey[str.length()];
for(int i=0;i<keys.length;i++){
keys[i] = collator.getCollationKey(str.substring(i,i+1));
}
java.util.Arrays.sort(keys, this);
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(new FileOutputStream("cjk.txt"), "UTF8"));
for(int i=0;i<keys.length;i++){
bw.write(keys[i].getSourceString());
}
bw.newLine();
bw.close();
}
public int compare(Object c1, Object c2) throws IllegalArgumentException{
if((c1 instanceof CollationKey) &&(c2 instanceof CollationKey)){
return collator.compare(((CollationKey)c1).getSourceString(), ((CollationKey)c2).getSourceString());
}
else throw new IllegalArgumentException();
}
public boolean equals(Object c1, Object c2){
if(this.compare(c1,c2)==0) return true;
else return false;
}
public static void main(String[] args) throws java.lang.Exception{
CharSort chSort = new CharSort();
String str = " ÎÒ¾õµÃÎÒÍê³ÉÕâ¸öûÓõ½Ê²Ã´¹¤¾ß";
chSort.doSort(str);
//System.out.println(str);
String[] arr = {"ÕÂÈý", "Àï˹", "ÍõÎå"};
java.util.Arrays.sort(arr, new CharSort());
for (int i = 0; i < arr.length; i++)
System.out.println(arr[i]);
}
}