什么是JSON
- JSON(JavaScript Object Notation)是一种轻量级的数据交换语言,并且是独立于语言的文本格式。
- 一些NoSQL数据库选择JSON作为其数据存储格式,比如:MongoDB、CouchDB等。
- MySQL5.7.x开始支持JSON数据类型。
{
"firstName": "John", -- Key : Value 格式
"lastName": "Smith",
"sex": "male",
"age": 25,
"address": -- Key : Value ; 其中 Value 也是一个 Key-Value 的结构
{
"streetAddress": "21 2nd Street",
"city": "New York",
"state": "NY",
"postalCode": "10021"
},
"phoneNumber":
[
{
"type": "home",
"number": "212 555-1234"
},
{
"type": "fax",
"number": "646 555-4567"
}
]
}
结构化和非结构化
结构化
- 二维表结构(行和列)
- 使用SQL语句进行操作
非结构化
- 使用Key-Value格式定义数据,无结构定义
- Value可以嵌套Key-Value格式的数据
- 使用JSON进行实现
--
-- SQL创建User表
--
create table user (
id bigint not null auto_increment,
user_name varchar(10),
age int,
primary key(id)
);
#
# JSON定义的User表
#
db.user.insert({
user_name:"tom",
age:30
})
db.createCollection("user")
JSON VS BLOB
JSON
- JSON数据可以做有效性检查;
- JSON使得查询性能提升;
- JSON支持部分属性索引,通过虚拟列的功能可以对JSON中的部分数据进行索引;
BLOB
- BLOB类型无法在数据库层做约束性检查;
- BLOB进行查询,需要遍历所有字符串;
- BLOB做只能做指定长度的索引;
5.7之前,只能把JSON当作BLOB进行存储。数据库层面无法对JSON数据做一些操作,只能由应用程序处理。
JSON实践
创建json表&插入数据
--
-- 创建带json字段的表
--
mysql> create table user (
-> uid int auto_increment,
-> data json,
-> primary key(uid)
-> );
--
-- 插入json数据
--
mysql> insert into user values (
-> null, -- 自增长数据,可以插入null
-> '{
'> "name":"tom",
'> "age":18,
'> "address":"SZ"
'> }'
-> );
mysql> insert into user values (
-> 0,
-> '{
'> "name":"jim",
'> "mail":"jim@163.com"
'> }'
-> );
mysql> insert into user values ( null, "can you insert it?");
ERROR 3140 (22032): Invalid JSON text: "Invalid value." at position 0 in value for column 'user.data'.
mysql> select * from user;
+-----+----------------------------------------------------+
| uid | data |
+-----+----------------------------------------------------+
| 1 | {"age": 18, "name": "jack", "address": "hangzhuo"} |
| 2 | {"mail": "jim@163.com", "name": "jim"} |
+-----+----------------------------------------------------+
JSON常用函数
json_extract
--
-- 使用json_extract提取数据
-- 原型 : JSON_EXTRACT(json_doc, path[, path] ...)
--
mysql> select json_extract('[10, 20, [30, 40]]', '$[1]');
+--------------------------------------------+
| json_extract('[10, 20, [30, 40]]', '$[1]') |
+--------------------------------------------+
| 20 | -- 从list中抽取 下标 为1的元素(下标从0开始)
+--------------------------------------------+
mysql> select
-> json_extract(data, '$.name'), -- 提起name字段的数据
-> json_extract(data, '$.address') -- 提取address字段的数据
-> from user;
+------------------------------+---------------------------------+
| json_extract(data, '$.name') | json_extract(data, '$.address') |
+------------------------------+---------------------------------+
| "jack" | "hangzhuo" |
| "jim" | NULL | jim 没有address字段,填充了NULL
+------------------------------+---------------------------------+
json_object
--
-- json_object 将list(K-V对)封装成json格式
-- 原型 : JSON_OBJECT([key, val[, key, val] ...])
--
mysql> select json_object("name", "jery", "email", "jery@163.com", "age",33);
+----------------------------------------------------------------+
| json_object("name", "jery", "email", "jery@163.com", "age",33) |
+----------------------------------------------------------------+
| {"age": 33, "name": "jery", "email": "jery@163.com"} | -- 封装成了K-V对
+----------------------------------------------------------------+
mysql> insert into user values (
-> null,
-> json_object("name", "jery", "email", "jery@163.com", "age",33) -- 进行封装
-> );
mysql> select * from user;
+-----+------------------------------------------------------+
| uid | data |
+-----+------------------------------------------------------+
| 1 | {"age": 18, "name": "jack", "address": "hangzhuo"} |
| 2 | {"mail": "jim@163.com", "name": "jim"} |
| 3 | {"age": 33, "name": "jery", "email": "jery@163.com"} |
+-----+------------------------------------------------------+
json_insert
--
-- json_insert 插入数据
-- 原型 : JSON_INSERT(json_doc, path, val[, path, val] ...)
--
mysql> set @j = '{ "a": 1, "b": [2, 3]}';
mysql> select json_insert(@j, '$.a', 10, '$.c', '[true, false]');
+----------------------------------------------------+
| json_insert(@j, '$.a', 10, '$.c', '[true, false]') |
+----------------------------------------------------+
| {"a": 1, "b": [2, 3], "c": "[true, false]"} | -- a还是=1,存在的被忽略,不受影响
+----------------------------------------------------+ -- c之前不存在,则插入
mysql> update user set data = json_insert(data, "$.address_2", "BJ") where uid = 1; -- 插入 addres_2
Query OK, 1 row affected (0.07 sec)
Rows matched: 1 Changed: 1 Warnings: 0
mysql> select * from user;
+-----+-----------------------------------------------------------------------+
| uid | data |
+-----+-----------------------------------------------------------------------+
| 1 | {"age": 18, "name": "jack", "address": "hangzhuo", "address_2": "BJ"} | -- 增加了addres_2 : "BJ"
| 2 | {"mail": "jim@163.com", "name": "jim"} |
| 3 | {"age": 33, "name": "jery", "email": "jery@163.com"} |
+-----+-----------------------------------------------------------------------+
json_merge
--
-- json_merge 合并数据并返回。注意:原数据不受影响
-- 原型 : JSON_MERGE(json_doc, json_doc[, json_doc] ...)
--
mysql> select json_merge('{"name": "x"}', '{"id": 47}'); -- 原来有两个JSON
+-------------------------------------------+
| json_merge('{"name": "x"}', '{"id": 47}') |
+-------------------------------------------+
| {"id": 47, "name": "x"} | -- 合并成一个
+-------------------------------------------+
mysql> select
-> json_merge(
-> json_extract(data, '$.address'), -- json 1
-> json_extract(data, '$.address_2')) -- jons 2
-> from user where uid = 1;
+---------------------------------------------------------------------------------+
| json_merge( json_extract(data, '$.address'), json_extract(data, '$.address_2')) |
+---------------------------------------------------------------------------------+
| ["SZ", "BJ"] | -- 合并成一个json
+---------------------------------------------------------------------------------+
mysql> select * from user;
+-----+-----------------------------------------------------------------------+
| uid | data |
+-----+-----------------------------------------------------------------------+
| 1 | {"age": 18, "name": "jack", "address": "hangzhuo", "address_2": "BJ"} | --不受影响
| 2 | {"mail": "jim@163.com", "name": "jim"} |
| 3 | {"age": 33, "name": "jery", "email": "jery@163.com"} |
+-----+-----------------------------------------------------------------------+
json_array_append
--
-- json_array_append 追加数据
-- 原型 : JSON_ARRAY_APPEND(json_doc, path, val[, path, val] ...)
-- json_append 在5.7.9 中重命名为 json_array_append
--
mysql> set @j = '["a", ["b", "c"], "d"]'; -- 下标为1的元素中只有["b", "c"]
mysql> select json_array_append(@j, '$[1]', 1);
+----------------------------------+
| json_array_append(@j, '$[1]', 1) |
+----------------------------------+
| ["a", ["b", "c", 1], "d"] | -- 现在插入了 数字 1
+----------------------------------+
mysql> update user set data = json_array_append(
-> data,
-> '$.address',
-> json_extract(data, '$.address_2'))
-> where uid = 1;
Query OK, 1 row affected (0.10 sec)
Rows matched: 1 Changed: 1 Warnings: 0
mysql> select * from user;
+-----+-------------------------------------------------------------------------------+
| uid | data |
+-----+-------------------------------------------------------------------------------+
| 1 | {"age": 18, "name": "jack", "address": ["hangzhuo", "BJ"], "address_2": "BJ"} | --address_2追加到address
| 2 | {"mail": "jim@163.com", "name": "jim"} |
| 3 | {"age": 33, "name": "jery", "email": "jery@163.com"} |
+-----+-------------------------------------------------------------------------------+
json_remove
--
-- json_remove 从json记录中删除数据
-- 原型 : JSON_REMOVE(json_doc, path[, path] ...)
--
mysql> set @j = '["a", ["b", "c"], "d"]';
mysql> select json_remove(@j, '$[1]');
+-------------------------+
| json_remove(@j, '$[1]') |
+-------------------------+
| ["a", "d"] | -- 删除了下标为1的元素["b", "c"]
+-------------------------+
mysql> update user set data = json_remove(data, "$.address_2") where uid = 1;
mysql> select * from user;
+-----+------------------------------------------------------+
| uid | data |
+-----+------------------------------------------------------+
| 1 | {"age": 18, "name": "tom", "address": ["SZ", "BJ"]} | -- address_2 的字段删除了
| 2 | {"age": 28, "mail": "jim@163.com", "name": "jim"} |
| 4 | {"age": 33, "name": "jery", "email": "jery@163.com"} |
+-----+------------------------------------------------------+
官方文档:https://dev.mysql.com/doc/refman/8.0/en/json-function-reference.html
JSON创建索引
JSON类型数据本身无法直接创建索引,需要将需要索引的JSON数据重新生成虚拟列(Virtual Columns)之后,对该列进行索引
新建表时创建JSON索引
mysql> create table test_inex_1(
-> data json,
-> gen_col varchar(10) generated always as (json_extract(data, '$.name')), -- 抽取data中的name, 生成新的一列,名字为gen_col
-> index idx (gen_col) -- 将gen_col 作为索引
-> );
mysql> insert into test_inex_1(data) values ('{"name":"tom", "age":18, "address":"SH"}'), ('{"name":"jim", "age":28, "address":"SZ"}');
mysql> select * from test_inex_1;
+---------------------------------------------+---------+
| data | gen_col |
+---------------------------------------------+---------+
| {"age": 18, "name": "tom", "address": "SH"} | "tom" |
| {"age": 28, "name": "jim", "address": "SZ"} | "jim" |
+---------------------------------------------+---------+
mysql> select json_extract(data,"$.name") as username from test_index_1 where gen_col="tom"; -- 如果这样做,为空,原因如下
Empty set (0.00 sec)
mysql> select hex('"');
+----------+
| hex('"') |
+----------+
| 22 | -- 双引号的 16进制
+----------+
mysql> select hex(gen_col) from test_index_1;
+--------------+
| hex(gen_col) |
+--------------+
| 226A696D22 | -- 双引号本身也作为了存储内容
| 22746F6D22 |
+--------------+
mysql> select json_extract(data,"$.name") as username from test_index_1 where gen_col='"tom"'; -- 使用'"tome"',用单引号括起来
+----------+
| username |
+----------+
| "tom" | -- 找到了对应的数据
+----------+
mysql> explain select json_extract(data,"$.name") as username from test_inex_1 where gen_col="tom"\G
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: test_inex_1
partitions: NULL
type: ref
possible_keys: idx
key: idx
key_len: 43
ref: const
rows: 1
filtered: 100.00
Extra: NULL
1 row in set, 1 warning (0.00 sec)
mysql> explain select json_extract(data,"$.name") as username from test_inex_1 where gen_col='"tom"'\G
*************************** 1. row ***************************
id: 1
select_type: SIMPLE
table: test_inex_1
partitions: NULL
type: ref
possible_keys: idx
key: idx
key_len: 43
ref: const
rows: 1
filtered: 100.00
Extra: NULL
建表的时候去掉双引用
mysql> create table test_index_2 (
-> data json,
-> gen_col varchar(10) generated always as (
-> json_unquote( -- 使用json_unquote函数进行去掉双引号
-> json_extract(data, "$.name")
-> )),
-> key idx(gen_col)
-> );
mysql> insert into test_index_2(data) values ('{"name":"tom", "age":18, "address":"SH"}'), ('{"name":"jim", "age":28, "address":"SZ"}');
mysql> select json_extract(data,"$.name") as username from test_index_2 where gen_col="tom"; -- 未加单引号
+----------+
| username |
+----------+
| "tom" |
+----------+
修改已存在的表创建JSON索引
原来的表
mysql> show create table user;
+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| user | CREATE TABLE `user` (
`uid` int(11) NOT NULL AUTO_INCREMENT,
`data` json DEFAULT NULL,
PRIMARY KEY (`uid`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci |
+-------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
mysql> select * from user;
+-----+------------------------------------------------------------+
| uid | data |
+-----+------------------------------------------------------------+
| 1 | {"age": 18, "name": "jack", "address": ["hangzhuo", "BJ"]} |
| 2 | {"mail": "jim@163.com", "name": "jim"} |
| 3 | {"age": 33, "name": "jery", "email": "jery@163.com"} |
+-----+------------------------------------------------------------+
mysql> alter table user
-> add user_name varchar(32)
-> generated always as (json_extract(data, '$.name')) virtual; -- virtual 关键字是不将该列的字段值存储,对应的是stored
mysql> select * from user;
+-----+------------------------------------------------------------+-----------+
| uid | data | user_name |
+-----+------------------------------------------------------------+-----------+
| 1 | {"age": 18, "name": "jack", "address": ["hangzhuo", "BJ"]} | "jack" |
| 2 | {"mail": "jim@163.com", "name": "jim"} | "jim" |
| 3 | {"age": 33, "name": "jery", "email": "jery@163.com"} | "jery" |
+-----+------------------------------------------------------------+-----------+
mysql> alter table user add index idx(user_name);
mysql> select * from user where user_name='"jim"'; -- 加单引号
+-----+----------------------------------------+-----------+
| uid | data | user_name |
+-----+----------------------------------------+-----------+
| 2 | {"mail": "jim@163.com", "name": "jim"} | "jim" |
+-----+----------------------------------------+-----------+
mysql> explain select * from user where user_name='"jim"'; -- 加单引号
+----+-------------+-------+------------+------+---------------+------+---------+-------+------+----------+-------+
| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |
+----+-------------+-------+------------+------+---------------+------+---------+-------+------+----------+-------+
| 1 | SIMPLE | user | NULL | ref | idx | idx | 131 | const | 1 | 100.00 | NULL |
+----+-------------+-------+------------+------+---------------+------+---------+-------+------+----------+-------+
mysql> show create table user;
+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Table | Create Table |
+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| user | CREATE TABLE `user` (
`uid` int(11) NOT NULL AUTO_INCREMENT,
`data` json DEFAULT NULL,
`user_name` varchar(32) GENERATED ALWAYS AS (json_extract(`data`,_utf8mb4'$.name')) VIRTUAL,
PRIMARY KEY (`uid`),
KEY `idx` (`user_name`)
) ENGINE=InnoDB AUTO_INCREMENT=4 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_0900_ai_ci |
+-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
官方文档:https://dev.mysql.com/doc/refman/8.0/en/create-table.html
官方文档:https://dev.mysql.com/doc/refman/8.0/en/json.html