注意:本文试用于试用insert插入数据时,产生不识别Incorrect string value: '\xF0\x9F\x92\xBC' for column 'Rins' at row 1 at 的问题
最近在写一个爬虫爬医学相关网站,其中遇到了 Incorrect string value: '\xF0\x9F\x92\xBC' for column 'Rins' at row 1 at 错误
经过一番谷歌百度后得知 Mysql 数据库的默认 utf-8 只识别最多3个字符的 utf-8 编码,当遇到一些生僻的字或者符号时就会报上述错误
解决方法就是在创建表格的时候将默认的 utf-8 字符改为 CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci
具体如下所示:
CREATE TABLE `cdd` (
`cdd_ID` int(11) NOT NULL,
`url` varchar(255) NOT NULL,
`title` varchar(255) NOT NULL,
`english_name` varchar(255) NOT NULL COMMENT '英文名',
`alias` varchar(255) CHARACTER SET utf8 COLLATE utf8_unicode_ci NOT NULL COMMENT '别名',
`department` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '疾病分类',
`icd_num` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT 'ICD号',
`summary` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '概述',
`epidemiology` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '流行病学',
`pathogen` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL,
`nosogenesis` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '发病机制',
`clinical_manifestation` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '临床表现',
`complication` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '并发症',
`laboratory_examination` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '实验室检查',
`auxiliary_examination` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '辅助检查',
`diagnose` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '诊断',
`antidiastole` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '鉴别诊断',
`cure` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '治疗',
`prognosis` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '预后',
`precaution` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '预防',
PRIMARY KEY (`cdd_ID`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;