PostgreSQL json jsonb 支持的value数据类型，如何构造一个jsonb

最新推荐文章于 2024-04-17 17:14:48 发布

Postgresql中国用户会

最新推荐文章于 2024-04-17 17:14:48 发布

阅读量7.6k

点赞数

分类专栏：数据库应用开发转载文章标签： postgresql 数据库 json 数据类型

转载同时被 2 个专栏收录

108 篇文章 0 订阅

订阅专栏

数据库应用开发

19 篇文章 0 订阅

订阅专栏

Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大，国内顶级PostgreSQL数据库专家将悉数到场，并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:

Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
Postgres-XL的项目发起人Mason Sharp
pgpool的作者石井达夫(Tatsuo Ishii)
PG-Strom的作者海外浩平(Kaigai Kohei)
Greenplum研发总监姚延栋
周正中(德哥), PostgreSQL中国用户会创始人之一
汪洋，平安科技数据库技术部经理
……

 
 2015年度PG大象会报名地址：http://postgres2015.eventdove.com/
PostgreSQL中国社区： http://postgres.cn/
PostgreSQL专业1群： 3336901（已满）
PostgreSQL专业2群： 100910388
PostgreSQL专业3群： 150657323

PostgreSQL 支持JSON已久，那么JSON的value中支持哪些类型呢？

目前支持string, numeric, boolean (true, false), null。区分大小写哦。

* Recursive Descent parse routines. There is one for each structural

* element in a json document:

* - scalar (string, number, true, false, null)

* - array ( [ ] )

* - array element

* - object ( { } )

* - object field

static inline void

parse_scalar(JsonLexContext *lex, JsonSemAction *sem)

{

char *val = NULL;

json_scalar_action sfunc = sem->scalar;

char **valaddr;

JsonTokenType tok = lex_peek(lex);

valaddr = sfunc == NULL ? NULL : &val;

/* a scalar must be a string, a number, true, false, or null */

switch (tok)

{

case JSON_TOKEN_TRUE:

lex_accept(lex, JSON_TOKEN_TRUE, valaddr);

break;

case JSON_TOKEN_FALSE:

lex_accept(lex, JSON_TOKEN_FALSE, valaddr);

break;

case JSON_TOKEN_NULL:

lex_accept(lex, JSON_TOKEN_NULL, valaddr);

break;

case JSON_TOKEN_NUMBER:

lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);

break;

case JSON_TOKEN_STRING:

lex_accept(lex, JSON_TOKEN_STRING, valaddr);

break;

default:

report_parse_error(JSON_PARSE_VALUE, lex);

}

if (sfunc != NULL)

(*sfunc) (sem->semstate, val, tok);

}

例子:


    
    
     
     postgres=# select jsonb '{"a": TRUE}';
    
    
    
    
     
     ERROR:  invalid input syntax for type json
    
    
    
    
     
     LINE 1: select jsonb '{"a": TRUE}';
    
    
    
    
     
                          ^
    
    
    
    
     
     DETAIL:  Token "TRUE" is invalid.
    
    
    
    
     
     CONTEXT:  JSON data, line 1: {"a": TRUE...
    
    
    
    
     
     postgres=# select jsonb '{"a": true}';
    
    
    
    
     
         jsonb    
    
    
    
    
     
     -------------
    
    
    
    
     
      {"a": true}
    
    
    
    
     
     (1 row)
    
    
    
    
     
     

    
    
    
    
     
     postgres=# select jsonb '{"a": false}';
    
    
    
    
     
         jsonb     
    
    
    
    
     
     --------------
    
    
    
    
     
      {"a": false}
    
    
    
    
     
     (1 row)
    
    
    
    
     
     

    
    
    
    
     
     postgres=# select jsonb '{"a": NULL}';
    
    
    
    
     
     ERROR:  invalid input syntax for type json
    
    
    
    
     
     LINE 1: select jsonb '{"a": NULL}';
    
    
    
    
     
                          ^
    
    
    
    
     
     DETAIL:  Token "NULL" is invalid.
    
    
    
    
     
     CONTEXT:  JSON data, line 1: {"a": NULL...
    
    
    
    
     
     postgres=# select jsonb '{"a": null}';
    
    
    
    
     
         jsonb    
    
    
    
    
     
     -------------
    
    
    
    
     
      {"a": null}
    
    
    
    
     
     (1 row)
    
    
    
    
     
     

    
    
    
    
     
     postgres=# select jsonb '{"a": 10.001}';
    
    
    
    
     
          jsonb     
    
    
    
    
     
     ---------------
    
    
    
    
     
      {"a": 10.001}
    
    
    
    
     
     (1 row)
    
    
    
    
     
     

    
    
    
    
     
     postgres=# select jsonb '{"a": "10.001"}';
    
    
    
    
     
           jsonb      
    
    
    
    
     
     -----------------
    
    
    
    
     
      {"a": "10.001"}
    
    
    
    
     
     (1 row)

src/backend/utils/adt/json.c

解析json,jsonb时，定义的token类型，这个和JSONB里面能存什么类型没有关系：

* The context of the parser is maintained by the recursive descent

* mechanism, but is passed explicitly to the error reporting routine

* for better diagnostics.

typedef enum /* contexts of JSON parser */

{

JSON_PARSE_VALUE, /* expecting a value */

JSON_PARSE_STRING, /* expecting a string (for a field name) */

JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */

JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */

JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */

JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */

JSON_PARSE_OBJECT_NEXT /* saw object value, expecting ',' or '}' */

JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */

JSON_PARSE_END /* saw the end of a document, expect nothing */

} JsonParseContext;

实际上jsonb里没有类型的概念，你前面看到的TOKEN实际上也就是字符串匹配而已，你可以理解为jsonb内部就是一个有一定规则的字符串。

那么我们怎么来构造一个JSONB类型，并且在使用时可以很好的处理里面的数据类型呢？

这里需要用到PostgreSQL的输入输出函数，一般每个类型都会有对应的in和out函数，例如：

pg_catalog | anyrange_in | anyrange | cstring, oid, integer | normal

pg_catalog | anyrange_out | cstring | anyrange | normal

pg_catalog | timestamptz_out | cstring | timestamp with time zone | normal

pg_catalog | timestamptz_in | timestamp with time zone | cstring, oid, integer | normal


  
  
   
   /* timestamptz_in()
  
  
  
  
   
    * Convert a string to internal form.
  
  
  
  
   
    */
  
  
  
  
   
   Datum
  
  
  
  
   
   timestamptz_in(PG_FUNCTION_ARGS)
  
  
  
  
   
   {
  
  
  
  
   
           char       *str = PG_GETARG_CSTRING(0);
  
  
  
  
   
   

  
  
  
  
   
   #ifdef NOT_USED
  
  
  
  
   
           Oid                     typelem = PG_GETARG_OID(1);
  
  
  
  
   
   #endif
  
  
  
  
   
           int32           typmod = PG_GETARG_INT32(2);

postgres=# select timestamptz_out(now());

timestamptz_out

-------------------------------

2015-09-24 19:44:16.076233+08

(1 row)

postgres=# select oid,* from pg_type where typname='timestamptz';

-[ RECORD 1 ]--+---------------------

oid | 1184

typname | timestamptz

typnamespace | 11

typowner | 10

typlen | 8

typbyval | t

typtype | b

typcategory | D

typispreferred | t

typisdefined | t

typdelim | ,

typrelid | 0

typelem | 0

typarray | 1185

typinput | timestamptz_in

typoutput | timestamptz_out

typreceive | timestamptz_recv

typsend | timestamptz_send

typmodin | timestamptztypmodin

typmodout | timestamptztypmodout

typanalyze | -

typalign | d

typstorage | p

typnotnull | f

typbasetype | 0

typtypmod | -1

typndims | 0

typcollation | 0

typdefaultbin |

typdefault |

typacl |

typmode表示精度


  
  
   
   
    
    postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,1);
   
   
   
   
    
    -[ RECORD 1 ]--+-------------------------
   
   
   
   
    
    timestamptz_in | 2015-09-24 19:44:16.1+08
   
   
  
  
  
  
   
   
    
    postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,6);
   
   
   
   
    
    -[ RECORD 1 ]--+------------------------------
   
   
   
   
    
    timestamptz_in | 2015-09-24 19:44:16.076233+08

使用format格式来构造jsonb类型是比较靠谱的。

代码见varlena.c

* Returns a formatted string

Datum

text_format(PG_FUNCTION_ARGS)

{

......

例子

构造一个jsonb的cstring


  
  
   
   postgres=# select format('{"K": "%s"}', int8range(1,10));
  
  
  
  
   
        format      
  
  
  
  
   
   -----------------
  
  
  
  
   
    {"K": "[1,10)"}
  
  
  
  
   
   (1 row)

使用jsonb_in把cstring转换为JSONB


  
  
   
   postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring );
  
  
  
  
   
       jsonb_in     
  
  
  
  
   
   -----------------
  
  
  
  
   
    {"K": "[1,10)"}
  
  
  
  
   
   (1 row)

取出K的element


  
  
   
   postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K';
  
  
  
  
   
    ?column? 
  
  
  
  
   
   ----------
  
  
  
  
   
    [1,10)
  
  
  
  
   
   (1 row)

将element转换回去


  
  
   
   postgres=# select (jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K') :: int8range;
  
  
  
  
   
    int8range 
  
  
  
  
   
   -----------
  
  
  
  
   
    [1,10)
  
  
  
  
   
   (1 row)

再举个例子


  
  
   
   
    
    postgres=# select format('{"K": "%s"}', '你好'::bytea);
   
   
   
   
    
             format          
   
   
   
   
    
    -------------------------
   
   
   
   
    
     {"K": "\xe4bda0e5a5bd"}
   
   
   
   
    
    (1 row)
   
   
  
  
  
  
   
   
    
    postgres=# select convert_from('\xe4bda0e5a5bd', 'utf8');
   
   
   
   
    
     convert_from 
   
   
   
   
    
    --------------
   
   
   
   
    
     你好
   
   
   
   
    
    (1 row)
   
   
  
  
  
  
   
   
    
    postgres=# \set VERBOSITY verbose
   
   
   
   
    
    postgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring );
   
   
   
   
    
    ERROR:  22P02: invalid input syntax for type json
   
   
   
   
    
    DETAIL:  Escape sequence "\x" is invalid.
   
   
   
   
    
    CONTEXT:  JSON data, line 1: {"K": "\x...
   
   
   
   
    
    LOCATION:  json_lex_string, json.c:890

非常遗憾，JSONB会处理escape，而\x是不合法的，报错了。这块代码可以改一改。

代码如下：

* The next token in the input stream is known to be a string; lex it.

static inline void

json_lex_string(JsonLexContext *lex)

{

......

switch (*s)

{

case '"':

case '\\':

case '/':

appendStringInfoChar(lex->strval, *s);

break;

case 'b':

appendStringInfoChar(lex->strval, '\b');

break;

case 'f':

appendStringInfoChar(lex->strval, '\f');

break;

case 'n':

appendStringInfoChar(lex->strval, '\n');

break;

case 'r':

appendStringInfoChar(lex->strval, '\r');

break;

case 't':

appendStringInfoChar(lex->strval, '\t');

break;

default:

/* Not a valid string escape, so error out. */

lex->token_terminator = s + pg_mblen(s);

ereport(ERROR,

(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),

errmsg("invalid input syntax for type json"),

errdetail("Escape sequence \"\\%s\" is invalid.",

extract_mb_char(s)),

report_json_context(lex)));

}

修改后，可以实现存储BYTEA，但是输出时还需要转换一下。


  
  
   
   
    
    postgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K';
   
   
   
   
    
       ?column?   
   
   
   
   
    
    --------------
   
   
   
   
    
     e4bda0e5a5bd
   
   
   
   
    
    (1 row)
   
   
  
  
  
  
   
   

  
  
  
  
   
   
    
    postgres=# select convert_from( byteain (('\x'||(jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K'))::cstring), 'utf8'::name);
   
   
   
   
    
     convert_from 
   
   
   
   
    
    --------------
   
   
   
   
    
     你好
   
   
   
   
    
    (1 row)

用format来构造一个字符串，在里面使用%s填充需要的cstring，从而达到构造jsonb的目的。

这种方法同样适用构造JSONB中包含postgis的几何类型。

[参考]

1. http://www.postgresql.org/docs/9.4/static/functions-string.html

2. src/backend/utils/adt/json.c

* Lex one token from the input stream.

static inline void

json_lex(JsonLexContext *lex)

{

char *s;

int len;

/* Skip leading whitespace. */

s = lex->token_terminator;

len = s - lex->input;

while (len < lex->input_length &&

(*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r'))

{

if (*s == '\n')

++lex->line_number;

++s;

++len;

}

lex->token_start = s;

/* Determine token type. */

if (len >= lex->input_length)

{

lex->token_start = NULL;

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s;

lex->token_type = JSON_TOKEN_END;

}

else

switch (*s)

{

/* Single-character token, some kind of punctuation mark. */

case '{':

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_OBJECT_START;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_OBJECT_START;

break;

case '}':

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_OBJECT_END;

break;

case '[':

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_ARRAY_START;

break;

case ']':

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_ARRAY_END;

break;

case ',':

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_COMMA;

break;

case ':':

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

lex->token_type = JSON_TOKEN_COLON;

break;

case '"':

/* string */

json_lex_string(lex);

lex->token_type = JSON_TOKEN_STRING;

break;

case '-':

/* Negative number. */

json_lex_number(lex, s + 1, NULL);

lex->token_type = JSON_TOKEN_NUMBER;

break;

case '0':

case '1':

case '2':

case '3':

case '4':

case '5':

case '6':

case '7':

case '8':

case '9':

/* Positive number. */

json_lex_number(lex, s, NULL);

lex->token_type = JSON_TOKEN_NUMBER;

break;

default:

{

char *p;

{

char *p;

* We're not dealing with a string, number, legal

* punctuation mark, or end of string. The only legal

* tokens we might find here are true, false, and null,

* but for error reporting purposes we scan until we see a

* non-alphanumeric character. That way, we can report

* the whole word as an unexpected token, rather than just

* some unintuitive prefix thereof.

for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)

/* skip */ ;

* We got some sort of unexpected punctuation or an

* otherwise unexpected character, so just complain about

* that one character.

if (p == s)

{

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = s + 1;

report_invalid_token(lex);

}

* We've got a real alphanumeric token here. If it

* happens to be true, false, or null, all is well. If

* not, error out.

lex->prev_token_terminator = lex->token_terminator;

lex->token_terminator = p;

if (p - s == 4)

{

if (memcmp(s, "true", 4) == 0)

lex->token_type = JSON_TOKEN_TRUE;

else if (memcmp(s, "null", 4) == 0)

lex->token_type = JSON_TOKEN_NULL;

else

report_invalid_token(lex);

}

else if (p - s == 5 && memcmp(s, "false", 5) == 0)

lex->token_type = JSON_TOKEN_FALSE;

else

report_invalid_token(lex);

}

} /* end of switch */

}

Postgresql中国用户会

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
PostgreSQL json jsonb 支持的value数据类型，如何构造一个jsonb

PostgreSQL 支持JSON已久，那么JSON的value中支持哪些类型呢？目前支持string, numeric, boolean (true, false), null。区分大小写哦。/* * Recursive Descent parse routines. There is one for each structural * element in a
复制链接

扫一扫