1 private static void ShowCode() { 2 string[] strArray = { "b", "abcd", "乙", "甲乙丙丁" }; 3 byte[] buffer; 4 string mode, back; 5 6 foreach (string str in strArray) { 7 8 for (int i = 0; i <= 2; i++) { 9 if (i == 0) { 10 buffer = Encoding.ASCII.GetBytes(str); 11 back = Encoding.ASCII.GetString(buffer, 0, buffer.Length); 12 mode = "ASCII"; 13 } else if (i == 1) { 14 buffer = Encoding.UTF8.GetBytes(str); 15 back = Encoding.UTF8.GetString(buffer, 0, buffer.Length); 16 mode = "UTF8"; 17 } else { 18 buffer = Encoding.Unicode.GetBytes(str); 19 back = Encoding.Unicode.GetString(buffer, 0, buffer.Length); 20 mode = "Unicode"; 21 } 22 23 Console.WriteLine("Mode: {0}, String: {1}, Buffer.Length: {2}", 24 mode, str, buffer.Length); 25 26 Console.WriteLine("Buffer:"); 27 for (int j = 0; j <= buffer.Length - 1; j++) { 28 Console.Write(buffer[j] + " "); 29 } 30 31 Console.WriteLine("\nRetrived: {0}\n", back); 32 } 33 } 34 }
程序输出为:
Mode: ASCII, String: b, Buffer.Length: 1 Buffer: 98 Retrived: b Mode: UTF8, String: b, Buffer.Length: 1 Buffer: 98 Retrived: b Mode: Unicode, String: b, Buffer.Length: 2 Buffer: 98 0 Retrived: b Mode: ASCII, String: abcd, Buffer.Length: 4 Buffer: 97 98 99 100 Retrived: abcd Mode: UTF8, String: abcd, Buffer.Length: 4 Buffer: 97 98 99 100 Retrived: abcd Mode: Unicode, String: abcd, Buffer.Length: 8 Buffer: 97 0 98 0 99 0 100 0 Retrived: abcd Mode: ASCII, String: 乙, Buffer.Length: 1 Buffer: 63 Retrived: ? Mode: UTF8, String: 乙, Buffer.Length: 3 Buffer: 228 185 153 Retrived: 乙 Mode: Unicode, String: 乙, Buffer.Length: 2 Buffer: 89 78 Retrived: 乙 Mode: ASCII, String: 甲乙丙丁, Buffer.Length: 4 Buffer: 63 63 63 63 Retrived: ???? Mode: UTF8, String: 甲乙丙丁, Buffer.Length: 12 Buffer: 231 148 178 228 185 153 228 184 153 228 184 129 Retrived: 甲乙丙丁 Mode: Unicode, String: 甲乙丙丁, Buffer.Length: 8 Buffer: 50 117 89 78 25 78 1 78 Retrived: 甲乙丙丁
大体上可以得出这么几个结论:
- ASCII不能保存中文(貌似谁都知道=_-`)。
- UTF8是变长编码。在对ASCII字符编码时,UTF更省空间,只占1个字节,与ASCII编码方式和长度相同;Unicode在对ASCII字符编码时,占用2个字节,且第2个字节补零。
- UTF8在对中文编码时需要占用3个字节;Unicode对中文编码则只需要2个字节。