![](https://i-blog.csdnimg.cn/blog_migrate/85458856c557f88674ef240850c3be7f.png)
Postgres2015全国用户大会将于11月20至21日在北京丽亭华苑酒店召开。本次大会嘉宾阵容强大,国内顶级PostgreSQL数据库专家将悉数到场,并特邀欧洲、俄罗斯、日本、美国等国家和地区的数据库方面专家助阵:
- Postgres-XC项目的发起人铃木市一(SUZUKI Koichi)
- Postgres-XL的项目发起人Mason Sharp
- pgpool的作者石井达夫(Tatsuo Ishii)
- PG-Strom的作者海外浩平(Kaigai Kohei)
- Greenplum研发总监姚延栋
- 周正中(德哥), PostgreSQL中国用户会创始人之一
- 汪洋,平安科技数据库技术部经理
- ……
|
![]() |
目前支持string, numeric, boolean (true, false), null。 区分大小写哦。
/** Recursive Descent parse routines. There is one for each structural* element in a json document:* - scalar (string, number, true, false, null)* - array ( [ ] )* - array element* - object ( { } )* - object field*/static inline voidparse_scalar(JsonLexContext *lex, JsonSemAction *sem){char *val = NULL;json_scalar_action sfunc = sem->scalar;char **valaddr;JsonTokenType tok = lex_peek(lex);
valaddr = sfunc == NULL ? NULL : &val;
/* a scalar must be a string, a number, true, false, or null */switch (tok){case JSON_TOKEN_TRUE:lex_accept(lex, JSON_TOKEN_TRUE, valaddr);break;case JSON_TOKEN_FALSE:lex_accept(lex, JSON_TOKEN_FALSE, valaddr);break;case JSON_TOKEN_NULL:lex_accept(lex, JSON_TOKEN_NULL, valaddr);break;case JSON_TOKEN_NUMBER:lex_accept(lex, JSON_TOKEN_NUMBER, valaddr);break;case JSON_TOKEN_STRING:lex_accept(lex, JSON_TOKEN_STRING, valaddr);break;default:report_parse_error(JSON_PARSE_VALUE, lex);}
if (sfunc != NULL)(*sfunc) (sem->semstate, val, tok);}
例子:
postgres=# select jsonb '{"a": TRUE}';ERROR: invalid input syntax for type jsonLINE 1: select jsonb '{"a": TRUE}';^DETAIL: Token "TRUE" is invalid.CONTEXT: JSON data, line 1: {"a": TRUE...postgres=# select jsonb '{"a": true}';jsonb-------------{"a": true}(1 row)
postgres=# select jsonb '{"a": false}';jsonb--------------{"a": false}(1 row)
postgres=# select jsonb '{"a": NULL}';ERROR: invalid input syntax for type jsonLINE 1: select jsonb '{"a": NULL}';^DETAIL: Token "NULL" is invalid.CONTEXT: JSON data, line 1: {"a": NULL...postgres=# select jsonb '{"a": null}';jsonb-------------{"a": null}(1 row)
postgres=# select jsonb '{"a": 10.001}';jsonb---------------{"a": 10.001}(1 row)
postgres=# select jsonb '{"a": "10.001"}';jsonb-----------------{"a": "10.001"}(1 row)
src/backend/utils/adt/json.c
解析json,jsonb时,定义的token类型,这个和JSONB里面能存什么类型没有关系:
/** The context of the parser is maintained by the recursive descent* mechanism, but is passed explicitly to the error reporting routine* for better diagnostics.*/typedef enum /* contexts of JSON parser */{JSON_PARSE_VALUE, /* expecting a value */JSON_PARSE_STRING, /* expecting a string (for a field name) */JSON_PARSE_ARRAY_START, /* saw '[', expecting value or ']' */JSON_PARSE_ARRAY_NEXT, /* saw array element, expecting ',' or ']' */JSON_PARSE_OBJECT_START, /* saw '{', expecting label or '}' */JSON_PARSE_OBJECT_LABEL, /* saw object label, expecting ':' */JSON_PARSE_OBJECT_NEXT /* saw object value, expecting ',' or '}' */JSON_PARSE_OBJECT_COMMA, /* saw object ',', expecting next label */JSON_PARSE_END /* saw the end of a document, expect nothing */} JsonParseContext;
实际上jsonb里没有类型的概念,你前面看到的TOKEN实际上也就是字符串匹配而已,你可以理解为jsonb内部就是一个有一定规则的字符串。
那么我们怎么来构造一个JSONB类型,并且在使用时可以很好的处理里面的数据类型呢?
这里需要用到PostgreSQL的输入输出函数,一般每个类型都会有对应的in和out函数,例如:
pg_catalog | anyrange_in | anyrange | cstring, oid, integer | normalpg_catalog | anyrange_out | cstring | anyrange | normalpg_catalog | timestamptz_out | cstring | timestamp with time zone | normalpg_catalog | timestamptz_in | timestamp with time zone | cstring, oid, integer | normal
/* timestamptz_in()* Convert a string to internal form.*/Datumtimestamptz_in(PG_FUNCTION_ARGS){char *str = PG_GETARG_CSTRING(0);
#ifdef NOT_USEDOid typelem = PG_GETARG_OID(1);#endifint32 typmod = PG_GETARG_INT32(2);
postgres=# select timestamptz_out(now());timestamptz_out-------------------------------2015-09-24 19:44:16.076233+08(1 row)
postgres=# select oid,* from pg_type where typname='timestamptz';-[ RECORD 1 ]--+---------------------oid | 1184typname | timestamptztypnamespace | 11typowner | 10typlen | 8typbyval | ttyptype | btypcategory | Dtypispreferred | ttypisdefined | ttypdelim | ,typrelid | 0typelem | 0typarray | 1185typinput | timestamptz_intypoutput | timestamptz_outtypreceive | timestamptz_recvtypsend | timestamptz_sendtypmodin | timestamptztypmodintypmodout | timestamptztypmodouttypanalyze | -typalign | dtypstorage | ptypnotnull | ftypbasetype | 0typtypmod | -1typndims | 0typcollation | 0typdefaultbin |typdefault |typacl |
typmode表示精度
postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,1);-[ RECORD 1 ]--+-------------------------timestamptz_in | 2015-09-24 19:44:16.1+08postgres=# select timestamptz_in('2015-09-24 19:44:16.076233+08',1184,6);-[ RECORD 1 ]--+------------------------------timestamptz_in | 2015-09-24 19:44:16.076233+08
使用format格式来构造jsonb类型是比较靠谱的。
代码见varlena.c
/** Returns a formatted string*/Datumtext_format(PG_FUNCTION_ARGS){......
例子
构造一个jsonb的cstring
postgres=# select format('{"K": "%s"}', int8range(1,10));format-----------------{"K": "[1,10)"}(1 row)
使用jsonb_in把cstring转换为JSONB
postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring );jsonb_in-----------------{"K": "[1,10)"}(1 row)
取出K的element
postgres=# select jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K';?column?----------[1,10)(1 row)
将element转换回去
postgres=# select (jsonb_in( format('{"K": "%s"}', int8range(1,10))::cstring ) ->> 'K') :: int8range;int8range-----------[1,10)(1 row)
再举个例子
postgres=# select format('{"K": "%s"}', '你好'::bytea);format-------------------------{"K": "\xe4bda0e5a5bd"}(1 row)postgres=# select convert_from('\xe4bda0e5a5bd', 'utf8');convert_from--------------你好(1 row)postgres=# \set VERBOSITY verbosepostgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring );ERROR: 22P02: invalid input syntax for type jsonDETAIL: Escape sequence "\x" is invalid.CONTEXT: JSON data, line 1: {"K": "\x...LOCATION: json_lex_string, json.c:890
非常遗憾,JSONB会处理escape,而\x是不合法的,报错了。这块代码可以改一改。
代码如下:
/** The next token in the input stream is known to be a string; lex it.*/static inline voidjson_lex_string(JsonLexContext *lex){......switch (*s){case '"':case '\\':case '/':appendStringInfoChar(lex->strval, *s);break;case 'b':appendStringInfoChar(lex->strval, '\b');break;case 'f':appendStringInfoChar(lex->strval, '\f');break;case 'n':appendStringInfoChar(lex->strval, '\n');break;case 'r':appendStringInfoChar(lex->strval, '\r');break;case 't':appendStringInfoChar(lex->strval, '\t');break;default:/* Not a valid string escape, so error out. */lex->token_terminator = s + pg_mblen(s);ereport(ERROR,(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),errmsg("invalid input syntax for type json"),errdetail("Escape sequence \"\\%s\" is invalid.",extract_mb_char(s)),report_json_context(lex)));}
修改后,可以实现存储BYTEA,但是输出时还需要转换一下。
postgres=# select jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K';?column?--------------e4bda0e5a5bd(1 row)
postgres=# select convert_from( byteain (('\x'||(jsonb_in( format('{"K": "%s"}', '你好'::bytea)::cstring ) ->> 'K'))::cstring), 'utf8'::name);convert_from--------------你好(1 row)
用format来构造一个字符串,在里面使用%s填充需要的cstring,从而达到构造jsonb的目的。
这种方法同样适用构造JSONB中包含postgis的几何类型。
2. src/backend/utils/adt/json.c
/** Lex one token from the input stream.*/static inline voidjson_lex(JsonLexContext *lex){char *s;int len;
/* Skip leading whitespace. */s = lex->token_terminator;len = s - lex->input;while (len < lex->input_length &&(*s == ' ' || *s == '\t' || *s == '\n' || *s == '\r')){if (*s == '\n')++lex->line_number;++s;++len;}lex->token_start = s;
/* Determine token type. */if (len >= lex->input_length){lex->token_start = NULL;lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s;lex->token_type = JSON_TOKEN_END;}elseswitch (*s){/* Single-character token, some kind of punctuation mark. */case '{':lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_OBJECT_START;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_OBJECT_START;break;case '}':lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_OBJECT_END;break;case '[':lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_ARRAY_START;break;case ']':lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_ARRAY_END;break;case ',':lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_COMMA;break;case ':':lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;lex->token_type = JSON_TOKEN_COLON;break;case '"':/* string */json_lex_string(lex);lex->token_type = JSON_TOKEN_STRING;break;case '-':/* Negative number. */json_lex_number(lex, s + 1, NULL);lex->token_type = JSON_TOKEN_NUMBER;break;case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':/* Positive number. */json_lex_number(lex, s, NULL);lex->token_type = JSON_TOKEN_NUMBER;break;default:{char *p;{char *p;
/** We're not dealing with a string, number, legal* punctuation mark, or end of string. The only legal* tokens we might find here are true, false, and null,* but for error reporting purposes we scan until we see a* non-alphanumeric character. That way, we can report* the whole word as an unexpected token, rather than just* some unintuitive prefix thereof.*/for (p = s; p - s < lex->input_length - len && JSON_ALPHANUMERIC_CHAR(*p); p++)/* skip */ ;
/** We got some sort of unexpected punctuation or an* otherwise unexpected character, so just complain about* that one character.*/if (p == s){lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = s + 1;report_invalid_token(lex);}
/** We've got a real alphanumeric token here. If it* happens to be true, false, or null, all is well. If* not, error out.*/lex->prev_token_terminator = lex->token_terminator;lex->token_terminator = p;if (p - s == 4){if (memcmp(s, "true", 4) == 0)lex->token_type = JSON_TOKEN_TRUE;else if (memcmp(s, "null", 4) == 0)lex->token_type = JSON_TOKEN_NULL;elsereport_invalid_token(lex);}else if (p - s == 5 && memcmp(s, "false", 5) == 0)lex->token_type = JSON_TOKEN_FALSE;elsereport_invalid_token(lex);
}} /* end of switch */}