关于SQL Server数据表中乱码的情况,在涉及不同语言版本的SQL交互中容易碰到,有时间测试了一下,并放上结果与大家讨论。
--
[测试一]
create database latin_db collate Latin1_General_BIN
go
use latin_db
go
set nocount on
create table t1 (
id int ,
notes nchar ( 5 )
)
go
insert into t1 values ( 1 , ' abcde ' ) -- ok
insert into t1 values ( 2 , ' abcdef ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 3 , ' 上海多美n ' ) -- ok
insert into t1 values ( 4 , ' 上海多美好 ' ) -- ok
insert into t1 values ( 5 , ' 上海多美好d ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 6 ,N ' 上海多美好 ' ) -- ok
select * from t1
/**/ /*
id notes
----------- -----
1 abcde
3 ????n --非unicode下 这部分汉字在db code page没有匹配 随后以?(单字节)替代
4 ?????
6 上海多美好
*/
create table t2 (
id int ,
notes char ( 11 )
)
go
insert into t2 values ( 1 , ' abcdeabcdef ' ) -- ok
insert into t2 values ( 2 , ' abcdeabcdefg ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 3 , ' 上海多美好上海多美好好 ' ) -- ok
insert into t2 values ( 4 , ' 上海多美好上海多美好d ' ) -- ok
insert into t2 values ( 5 , ' 上海多美好上海多美好好好 ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 6 ,N ' 上海多美好上海多美好好 ' ) -- ok
select * from t2
/**/ /*
id notes
----------- -----------
1 abcdeabcdef
3 ??????????? --非unicode下 汉字仍无法匹配
4 ??????????d
6 ??????????? --汉字以unicode在db code page中得以解析 但存储的过程中发生困难
*/
-- [测试二]
create database chinese_db collate Chinese_PRC_BIN
go
use chinese_db
go
set nocount on
create table t1 (
id int ,
notes nchar ( 5 )
)
go
insert into t1 values ( 1 , ' abcde ' ) -- ok
insert into t1 values ( 2 , ' abcdef ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 3 , ' 上海多美n ' ) -- ok
insert into t1 values ( 4 , ' 上海多美好 ' ) -- ok
insert into t1 values ( 5 , ' 上海多美好d ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 6 ,N ' 上海多美好 ' ) -- ok
select * from t1
/**/ /*
id notes
----------- -----
1 abcde
3 上海多美n
4 上海多美好 --与测试一结果不同,非unicode编码能在db code page中被解析
6 上海多美好
*/
create table t2 (
id int ,
notes char ( 11 )
)
go
insert into t2 values ( 1 , ' abcdeabcdef ' ) -- ok
insert into t2 values ( 2 , ' abcdeabcdefg ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 3 , ' 上海多美好上海多美好好 ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 4 , ' 上海多美好上海多美好d ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 5 , ' 上海多美 ' ) -- ok
insert into t2 values ( 6 , ' 上海多美好 ' ) -- ok
insert into t2 values ( 7 , ' 上海多美好上 ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 8 ,N ' 上海多美好 ' ) -- ok
insert into t2 values ( 9 ,N ' 上海多美好上 ' )
-- string or binary data would be truncated. the statement has been terminated.
select * from t2
/**/ /*
id notes
----------- -----------
1 abcdeabcdef --2~4不能通过的原因是数据超长
5 ÉϺ£¶àÃÀ --非unicode汉字被解析 但在以char方式存储上发生困难
6 ÉϺ£¶àÃÀºÃ
8 ÉϺ£¶àÃÀºÃ
*/
-- 注:以上内容在 Windows 2003 EE ENG SP1 + SQL 2000 EE ENG SP4上测试 OS上已装中文支持
create database latin_db collate Latin1_General_BIN
go
use latin_db
go
set nocount on
create table t1 (
id int ,
notes nchar ( 5 )
)
go
insert into t1 values ( 1 , ' abcde ' ) -- ok
insert into t1 values ( 2 , ' abcdef ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 3 , ' 上海多美n ' ) -- ok
insert into t1 values ( 4 , ' 上海多美好 ' ) -- ok
insert into t1 values ( 5 , ' 上海多美好d ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 6 ,N ' 上海多美好 ' ) -- ok
select * from t1
/**/ /*
id notes
----------- -----
1 abcde
3 ????n --非unicode下 这部分汉字在db code page没有匹配 随后以?(单字节)替代
4 ?????
6 上海多美好
*/
create table t2 (
id int ,
notes char ( 11 )
)
go
insert into t2 values ( 1 , ' abcdeabcdef ' ) -- ok
insert into t2 values ( 2 , ' abcdeabcdefg ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 3 , ' 上海多美好上海多美好好 ' ) -- ok
insert into t2 values ( 4 , ' 上海多美好上海多美好d ' ) -- ok
insert into t2 values ( 5 , ' 上海多美好上海多美好好好 ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 6 ,N ' 上海多美好上海多美好好 ' ) -- ok
select * from t2
/**/ /*
id notes
----------- -----------
1 abcdeabcdef
3 ??????????? --非unicode下 汉字仍无法匹配
4 ??????????d
6 ??????????? --汉字以unicode在db code page中得以解析 但存储的过程中发生困难
*/
-- [测试二]
create database chinese_db collate Chinese_PRC_BIN
go
use chinese_db
go
set nocount on
create table t1 (
id int ,
notes nchar ( 5 )
)
go
insert into t1 values ( 1 , ' abcde ' ) -- ok
insert into t1 values ( 2 , ' abcdef ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 3 , ' 上海多美n ' ) -- ok
insert into t1 values ( 4 , ' 上海多美好 ' ) -- ok
insert into t1 values ( 5 , ' 上海多美好d ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t1 values ( 6 ,N ' 上海多美好 ' ) -- ok
select * from t1
/**/ /*
id notes
----------- -----
1 abcde
3 上海多美n
4 上海多美好 --与测试一结果不同,非unicode编码能在db code page中被解析
6 上海多美好
*/
create table t2 (
id int ,
notes char ( 11 )
)
go
insert into t2 values ( 1 , ' abcdeabcdef ' ) -- ok
insert into t2 values ( 2 , ' abcdeabcdefg ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 3 , ' 上海多美好上海多美好好 ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 4 , ' 上海多美好上海多美好d ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 5 , ' 上海多美 ' ) -- ok
insert into t2 values ( 6 , ' 上海多美好 ' ) -- ok
insert into t2 values ( 7 , ' 上海多美好上 ' )
-- string or binary data would be truncated. the statement has been terminated.
insert into t2 values ( 8 ,N ' 上海多美好 ' ) -- ok
insert into t2 values ( 9 ,N ' 上海多美好上 ' )
-- string or binary data would be truncated. the statement has been terminated.
select * from t2
/**/ /*
id notes
----------- -----------
1 abcdeabcdef --2~4不能通过的原因是数据超长
5 ÉϺ£¶àÃÀ --非unicode汉字被解析 但在以char方式存储上发生困难
6 ÉϺ£¶àÃÀºÃ
8 ÉϺ£¶àÃÀºÃ
*/
-- 注:以上内容在 Windows 2003 EE ENG SP1 + SQL 2000 EE ENG SP4上测试 OS上已装中文支持
通过以上测试,我们似乎可以认为字符类型(char/varchar/nchar/nvarchar)以如下方式被存储
1 字符串本身需要被操作系统解析
2 字符串解析的结果会跟数据库code page做匹配, 无论能否匹配, 数据库需要计算字符串的长度是否可以被当前字段容纳, 如果不可以容纳, 放弃该语句, 如果可以容纳, 以字段类型进行存储
欢迎大家讨论。