

Built-in Functions内置函数

Aggregate Functions聚合函数


Returns true if at least one value of expr is true.
approx_count_distinct(expr[, relativeSD])
Returns the estimated cardinality by HyperLogLog++. relativeSD defines the maximum relative standard deviation allowed.
approx_percentile(col, percentage [, accuracy])
Returns the approximate percentile value of numeric column col at the given percentage. The value of percentage must be between 0.0 and 1.0. The accuracy parameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of accuracy yields better accuracy, 1.0/accuracy is the relative error of the approximation. When percentage is an array, each value of the percentage array must be between 0.0 and 1.0. In this case, returns the approximate percentile array of column col at the given percentage array.
Returns the mean calculated from values of a group.
Returns the bitwise OR of all non-null input values, or null if none.
Returns the bitwise XOR of all non-null input values, or null if none.
Returns true if all values of expr are true.
Returns true if at least one value of expr is true.
Collects and returns a list of non-unique elements.
Collects and returns a set of unique elements.
corr(expr1, expr2)
Returns Pearson coefficient of correlation between a set of number pairs.
Returns the total number of retrieved rows, including rows containing null.
count(expr[, expr…])
Returns the number of rows for which the supplied expression(s) are all non-null.
count(DISTINCT expr[, expr…])
Returns the number of rows for which the supplied expression(s) are unique and non-null.
Returns the number of TRUE values for the expression.
count_min_sketch(col, eps, confidence, seed)
Returns a count-min sketch of a column with the given esp, confidence and seed. The result is an array of bytes, which can be deserialized to a CountMinSketch before usage. Count-min sketch is a probabilistic data structure used for cardinality estimation using sub-linear space.
covar_pop(expr1, expr2)
Returns the population covariance of a set of number pairs.
covar_samp(expr1, expr2)
Returns the sample covariance of a set of number pairs.
Returns true if all values of expr are true.
first(expr[, isIgnoreNull])
Returns the first value of expr for a group of rows. If isIgnoreNull is true, returns only non-null values.
first_value(expr[, isIgnoreNull])
Returns the first value of expr for a group of rows. If isIgnoreNull is true, returns only non-null values.
Returns the kurtosis value calculated from values of a group.
last(expr[, isIgnoreNull])
Returns the last value of expr for a group of rows. If isIgnoreNull is true, returns only non-null values
last_value(expr[, isIgnoreNull])
Returns the last value of expr for a group of rows. If isIgnoreNull is true, returns only non-null values
Returns the maximum value of expr.
max_by(x, y)
Returns the value of x associated with the maximum value of y.
Returns the mean calculated from values of a group.
Returns the minimum value of expr.
min_by(x, y)
Returns the value of x associated with the minimum value of y.
percentile(col, percentage [, frequency])
Returns the exact percentile value of numeric column col at the given percentage. The value of percentage must be between 0.0 and 1.0. The value of frequency should be positive integral
percentile(col, array(percentage1 [, percentage2]…) [, frequency])
Returns the exact percentile value array of numeric column col at the given percentage(s). Each value of the percentage array must be between 0.0 and 1.0. The value of frequency should be positive integral
percentile_approx(col, percentage [, accuracy])
Returns the approximate percentile value of numeric column col at the given percentage. The value of percentage must be between 0.0 and 1.0. The accuracy parameter (default: 10000) is a positive numeric literal which controls approximation accuracy at the cost of memory. Higher value of accuracy yields better accuracy, 1.0/accuracy is the relative error of the approximation. When percentage is an array, each value of the percentage array must be between 0.0 and 1.0. In this case, returns the approximate percentile array of column col at the given percentage array.
Returns the skewness value calculated from values of a group.
Returns true if at least one value of expr is true.
Returns the sample standard deviation calculated from values of a group.
Returns the sample standard deviation calculated from values of a group.
Returns the population standard deviation calculated from values of a group.
Returns the sample standard deviation calculated from values of a group.
Returns the sum calculated from values of a group.
Returns the population variance calculated from values of a group.
Returns the sample variance calculated from values of a group.
Returns the sample variance calculated from values of a group.

-- any
SELECT any(col) FROM VALUES (true), (false), (false) AS tab(col);
|    true|

SELECT any(col) FROM VALUES (NULL), (true), (false) AS tab(col);
|    true|

SELECT any(col) FROM VALUES (false), (false), (NULL) AS tab(col);
|   false|

-- approx_count_distinct
SELECT approx_count_distinct(col1) FROM VALUES (1), (1), (2), (2), (3) tab(col1);
|                          3|

-- approx_percentile
SELECT approx_percentile(10.0, array(0.5, 0.4, 0.1), 100);
|approx_percentile(10.0, array(0.5, 0.4, 0.1), 100)|
|                                [10.0, 10.0, 10.0]|

SELECT approx_percentile(10.0, 0.5, 100);
|approx_percentile(10.0, CAST(0.5 AS DOUBLE), 100)|
|                                             10.0|

-- avg
SELECT avg(col) FROM VALUES (1), (2), (3) AS tab(col);
|     2.0|

SELECT avg(col) FROM VALUES (1), (2), (NULL) AS tab(col);
|     1.5|

-- bit_or
SELECT bit_or(col) FROM VALUES (3), (5) AS tab(col);
|          7|

-- bit_xor
SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col);
|           6|

-- bool_and
SELECT bool_and(col) FROM VALUES (true), (true), (true) AS tab(col);
|         true|

SELECT bool_and(col) FROM VALUES (NULL), (true), (true) AS tab(col);
|         true|

SELECT bool_and(col) FROM VALUES (true), (false), (true) AS tab(col);
|        false|

-- bool_or
SELECT bool_or(col) FROM VALUES (true), (false), (false) AS tab(col);
|        true|

SELECT bool_or(col) FROM VALUES (NULL), (true), (false) AS tab(col);
|        true|

SELECT bool_or(col) FROM VALUES (false), (false), (NULL) AS tab(col);
|       false|

-- collect_list
SELECT collect_list(col) FROM VALUES (1), (2), (1) AS tab(col);
|        [1, 2, 1]|

-- collect_set
SELECT collect_set(col) FROM VALUES (1), (2), (1) AS tab(col);
|          [1, 2]|

-- corr
SELECT corr(c1, c2) FROM VALUES (3, 2), (3, 3), (6, 4) as tab(c1, c2);
|                          0.8660254037844387|

-- count
SELECT count(*) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
|       4|

SELECT count(col) FROM VALUES (NULL), (5), (5), (20) AS tab(col);
|         3|

SELECT count(DISTINCT col) FROM VALUES (NULL), (5), (5), (10) AS tab(col);
|count(DISTINCT col)|
|                  2|

-- count_if
SELECT count_if(col % 2 = 0) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);
|count_if(((col % 2) = 0))|
|                        2|

SELECT count_if(col IS NULL) FROM VALUES (NULL), (0), (1), (2), (3) AS tab(col);
|count_if((col IS NULL))|
|                      1|

-- covar_pop
SELECT covar_pop(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
|covar_pop(CAST(c1 AS DOUBLE), CAST(c2 AS DOUBLE))|
|                               0.6666666666666666|

-- covar_samp
SELECT covar_samp(c1, c2) FROM VALUES (1,1), (2,2), (3,3) AS tab(c1, c2);
|covar_samp(CAST(c1 AS DOUBLE), CAST(c2 AS DOUBLE))|
|                                               1.0|

-- every
SELECT every(col) FROM VALUES (true), (true), (true) AS tab(col);
|      true|

SELECT every(col) FROM VALUES (NULL), (true), (true) AS tab(col);
|      true|

SELECT every(col) FROM VALUES (true), (false), (true) AS tab(col);
|     false|

-- first
SELECT first(col) FROM VALUES (10), (5), (20) AS tab(col);
|        10|

SELECT first(col) FROM VALUES (NULL), (5), (20) AS tab(col);
|      null|

SELECT first(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
|         5|

-- first_value
SELECT first_value(col) FROM VALUES (10), (5), (20) AS tab(col);
|              10|

SELECT first_value(col) FROM VALUES (NULL), (5), (20) AS tab(col);
|            null|

SELECT first_value(col, true) FROM VALUES (NULL), (5), (20) AS tab(col);
|               5|

-- kurtosis
SELECT kurtosis(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
|kurtosis(CAST(col AS DOUBLE))|
|          -0.7014368047529618|

SELECT kurtosis(col) FROM VALUES (1), (10), (100), (10), (1) as tab(col);
|kurtosis(CAST(col AS DOUBLE))|
|          0.19432323191698986|

-- last
SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col);
|       20|

SELECT last(col) FROM VALUES (10), (5), (NULL) AS tab(col);
|     null|

SELECT last(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
|        5|

-- last_value
SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col);
|             20|

SELECT last_value(col) FROM VALUES (10), (5), (NULL) AS tab(col);
|           null|

SELECT last_value(col, true) FROM VALUES (10), (5), (NULL) AS tab(col);
|              5|

-- max
SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col);
|      50|

-- max_by
SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);
|max_by(x, y)|
|           b|

-- mean
SELECT mean(col) FROM VALUES (1), (2), (3) AS tab(col);
|      2.0|

SELECT mean(col) FROM VALUES (1), (2), (NULL) AS tab(col);
|      1.5|

-- min
SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col);
|      -1|

-- min_by
SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y);
|min_by(x, y)|
|           a|

-- percentile
SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col);
|percentile(col, CAST(0.3 AS DOUBLE), 1)|
|                                    3.0|

SELECT percentile(col, array(0.25, 0.75)) FROM VALUES (0), (10) AS tab(col);
|percentile(col, array(0.25, 0.75), 1)|
|                           [2.5, 7.5]|

-- percentile_approx
SELECT percentile_approx(10.0, array(0.5, 0.4, 0.1), 100);
|percentile_approx(10.0, array(0.5, 0.4, 0.1), 100)|
|                                [10.0, 10.0, 10.0]|

SELECT percentile_approx(10.0, 0.5, 100);
|percentile_approx(10.0, CAST(0.5 AS DOUBLE), 100)|
|                                             10.0|

-- skewness
SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col);
|skewness(CAST(col AS DOUBLE))|
|           1.1135657469022013|

SELECT skewness(col) FROM VALUES (-1000), (-100), (10), (20) AS tab(col);
|skewness(CAST(col AS DOUBLE))|
|          -1.1135657469022011|

-- some
SELECT some(col) FROM VALUES (true), (false), (false) AS tab(col);
|     true|

SELECT some(col) FROM VALUES (NULL), (true), (false) AS tab(col);
|     true|

SELECT some(col) FROM VALUES (false), (false), (NULL) AS tab(col);
|    false|

-- std
SELECT std(col) FROM VALUES (1), (2), (3) AS tab(col);
|std(CAST(col AS DOUBLE))|
|                     1.0|

-- stddev
SELECT stddev(col) FROM VALUES (1), (2), (3) AS tab(col);
|stddev(CAST(col AS DOUBLE))|
|                        1.0|

-- stddev_pop
SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col);
|stddev_pop(CAST(col AS DOUBLE))|
|              0.816496580927726|

-- stddev_samp
SELECT stddev_samp(col) FROM VALUES (1), (2), (3) AS tab(col);
|stddev_samp(CAST(col AS DOUBLE))|
|                             1.0|

-- sum
SELECT sum(col) FROM VALUES (5), (10), (15) AS tab(col);
|      30|

SELECT sum(col) FROM VALUES (NULL), (10), (15) AS tab(col);
|      25|

SELECT sum(col) FROM VALUES (NULL), (NULL) AS tab(col);
|    null|

-- var_pop
SELECT var_pop(col) FROM VALUES (1), (2), (3) AS tab(col);
|var_pop(CAST(col AS DOUBLE))|
|          0.6666666666666666|

-- var_samp
SELECT var_samp(col) FROM VALUES (1), (2), (3) AS tab(col);
|var_samp(CAST(col AS DOUBLE))|
|                          1.0|

-- variance
SELECT variance(col) FROM VALUES (1), (2), (3) AS tab(col);
|variance(CAST(col AS DOUBLE))|
|                          1.0|

Window Functions窗口函数

Computes the position of a value relative to all values in the partition.
Computes the rank of a value in a group of values. The result is one plus the previously assigned rank value. Unlike the function rank, dense_rank will not produce gaps in the ranking sequence.
lag(input[, offset[, default]])
Returns the value of input at the offsetth row before the current row in the window. The default value of offset is 1 and the default value of default is null. If the value of input at the offsetth row is null, null is returned. If there is no such offset row (e.g., when the offset is 1, the first row of the window does not have any previous row), default is returned.
lead(input[, offset[, default]])
Returns the value of input at the offsetth row after the current row in the window. The default value of offset is 1 and the default value of default is null. If the value of input at the offsetth row is null, null is returned. If there is no such an offset row (e.g., when the offset is 1, the last row of the window does not have any subsequent row), default is returned.
Divides the rows for each window partition into n buckets ranging from 1 to at most n.
Computes the percentage ranking of a value in a group of values.
rank() Computes the rank of a value in a group of values. The result is one plus the number of rows preceding or equal to the current row in the ordering of the partition. The values will produce gaps in the sequence.
Assigns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition.

Array Functions数组函数

array_contains(array, value)
Returns true if the array contains the value.
Removes duplicate values from the array.
array_except(array1, array2)
Returns an array of the elements in array1 but not in array2, without duplicates.
array_intersect(array1, array2)
Returns an array of the elements in the intersection of array1 and array2, without duplicates.
array_join(array, delimiter[, nullReplacement]) Concatenates the elements of the given array using the delimiter and an optional string to replace nulls. If no value is set for nullReplacement, any null value is filtered.
Returns the maximum value in the array. NULL elements are skipped.
Returns the minimum value in the array. NULL elements are skipped.
array_position(array, element)
Returns the (1-based) index of the first element of the array as long.
array_remove(array, element)
Remove all elements that equal to element from array.
array_repeat(element, count)
Returns the array containing element count times.
array_union(array1, array2)
Returns an array of the elements in the union of array1 and array2, without duplicates.
arrays_overlap(a1, a2)
Returns true if a1 contains at least a non-null element present also in a2. If the arrays have no common element and they are both non-empty and either of them contains a null element null is returned, false otherwise.
arrays_zip(a1, a2, …)
Returns a merged array of structs in which the N-th struct contains all N-th values of input arrays.
concat(col1, col2, …, colN)
Returns the concatenation of col1, col2, …, colN.
Transforms an array of arrays into a single array.
Returns a reversed string or an array with reverse order of elements.
sequence(start, stop, step)
Generates an array of elements from start to stop (inclusive), incrementing by step. The type of the returned elements is the same as the type of argument expressions. Supported types are: byte, short, integer, long, date, timestamp. The start and stop expressions must resolve to the same type. If start and stop expressions resolve to the ‘date’ or ‘timestamp’ type then the step expression must resolve to the ‘interval’ type, otherwise to the same type as the start and stop expressions.
Returns a random permutation of the given array.
slice(x, start, length)
Subsets array x starting from index start (array indices start at 1, or starting from the end if start is negative) with the specified length.
sort_array(array[, ascendingOrder])
Sorts the input array in ascending or descending order according to the natural ordering of the array elements. Null elements will be placed at the beginning of the returned array in ascending order or at the end of the returned array in descending order.

-- array_contains
SELECT array_contains(array(1, 2, 3), 2);
|array_contains(array(1, 2, 3), 2)|
|                             true|

-- array_distinct
SELECT array_distinct(array(1, 2, 3, null, 3));
|array_distinct(array(1, 2, 3, CAST(NULL AS INT), 3))|
|                                          [1, 2, 3,]|

-- array_except
SELECT array_except(array(1, 2, 3), array(1, 3, 5));
|array_except(array(1, 2, 3), array(1, 3, 5))|
|                                         [2]|

-- array_intersect
SELECT array_intersect(array(1, 2, 3), array(1, 3, 5));
|array_intersect(array(1, 2, 3), array(1, 3, 5))|
|                                         [1, 3]|

-- array_join
SELECT array_join(array('hello', 'world'), ' ');
|array_join(array(hello, world),  )|
|                       hello world|

SELECT array_join(array('hello', null ,'world'), ' ');
|array_join(array(hello, CAST(NULL AS STRING), world),  )|
|                                             hello world|

SELECT array_join(array('hello', null ,'world'), ' ', ',');
|array_join(array(hello, CAST(NULL AS STRING), world),  , ,)|
|                                              hello , world|

-- array_max
SELECT array_max(array(1, 20, null, 3));
|array_max(array(1, 20, CAST(NULL AS INT), 3))|
|                                           20|

-- array_min
SELECT array_min(array(1, 20, null, 3));
|array_min(array(1, 20, CAST(NULL AS INT), 3))|
|                                            1|

-- array_position
SELECT array_position(array(3, 2, 1), 1);
|array_position(array(3, 2, 1), 1)|
|                                3|

-- array_remove
SELECT array_remove(array(1, 2, 3, null, 3), 3);
|array_remove(array(1, 2, 3, CAST(NULL AS INT), 3), 3)|
|                                              [1, 2,]|

-- array_repeat
SELECT array_repeat('123', 2);
|array_repeat(123, 2)|
|          [123, 123]|

-- array_union
SELECT array_union(array(1, 2, 3), array(1, 3, 5));
|array_union(array(1, 2, 3), array(1, 3, 5))|
|                               [1, 2, 3, 5]|

-- arrays_overlap
SELECT arrays_overlap(array(1, 2, 3), array(3, 4, 5));
|arrays_overlap(array(1, 2, 3), array(3, 4, 5))|
|                                          true|

-- arrays_zip
SELECT arrays_zip(array(1, 2, 3), array(2, 3, 4));
|arrays_zip(array(1, 2, 3), array(2, 3, 4))|
|                      [[1, 2], [2, 3], ...|

SELECT arrays_zip(array(1, 2), array(2, 3), array(3, 4));
|arrays_zip(array(1, 2), array(2, 3), array(3, 4))|
|                             [[1, 2, 3], [2, 3...|

-- concat
SELECT concat('Spark', 'SQL');
|concat(Spark, SQL)|
|          SparkSQL|

SELECT concat(array(1, 2, 3), array(4, 5), array(6));
|concat(array(1, 2, 3), array(4, 5), array(6))|
|                           [1, 2, 3, 4, 5, 6]|

-- flatten
SELECT flatten(array(array(1, 2), array(3, 4)));
|flatten(array(array(1, 2), array(3, 4)))|
|                            [1, 2, 3, 4]|

-- reverse
SELECT reverse('Spark SQL');
|reverse(Spark SQL)|
|         LQS krapS|

SELECT reverse(array(2, 1, 4, 3));
|reverse(array(2, 1, 4, 3))|
|              [3, 4, 1, 2]|

-- sequence
SELECT sequence(1, 5);
| sequence(1, 5)|
|[1, 2, 3, 4, 5]|

SELECT sequence(5, 1);
| sequence(5, 1)|
|[5, 4, 3, 2, 1]|

SELECT sequence(to_date('2018-01-01'), to_date('2018-03-01'), interval 1 month);
|sequence(to_date('2018-01-01'), to_date('2018-03-01'), INTERVAL '1 months')|
|                                                       [2018-01-01, 2018...|

-- shuffle
SELECT shuffle(array(1, 20, 3, 5));
|shuffle(array(1, 20, 3, 5))|
|              [5, 3, 20, 1]|

SELECT shuffle(array(1, 20, null, 3));
|shuffle(array(1, 20, CAST(NULL AS INT), 3))|
|                                [20, 1,, 3]|

-- slice
SELECT slice(array(1, 2, 3, 4), 2, 2);
|slice(array(1, 2, 3, 4), 2, 2)|
|                        [2, 3]|

SELECT slice(array(1, 2, 3, 4), -2, 2);
|slice(array(1, 2, 3, 4), -2, 2)|
|                         [3, 4]|

-- sort_array
SELECT sort_array(array('b', 'd', null, 'c', 'a'), true);
|sort_array(array(b, d, CAST(NULL AS STRING), c, a), true)|
|                                           [, a, b, c, d]|

Map Functions Map函数

map_concat(map, …)
Returns the union of all the given maps
Returns an unordered array of all entries in the given map.
Returns a map created from the given array of entries.
Returns an unordered array containing the keys of the map.
Returns an unordered array containing the values of the map.

-- map_concat
SELECT map_concat(map(1, 'a', 2, 'b'), map(3, 'c'));
|map_concat(map(1, a, 2, b), map(3, c))|
|                  [1 -> a, 2 -> b, ...|

-- map_entries
SELECT map_entries(map(1, 'a', 2, 'b'));
|map_entries(map(1, a, 2, b))|
|            [[1, a], [2, b]]|

-- map_from_entries
SELECT map_from_entries(array(struct(1, 'a'), struct(2, 'b')));
|map_from_entries(array(struct(1, a), struct(2, b)))|
|                                   [1 -> a, 2 -> b]|

-- map_keys
SELECT map_keys(map(1, 'a', 2, 'b'));
|map_keys(map(1, a, 2, b))|
|                   [1, 2]|

-- map_values
SELECT map_values(map(1, 'a', 2, 'b'));
|map_values(map(1, a, 2, b))|
|                     [a, b]|

Date and Timestamp Functions日期函数

add_months(start_date, num_months)
Returns the date that is num_months after start_date.
Returns the current date at the start of query evaluation.
Returns the current date at the start of query evaluation.
Returns the current timestamp at the start of query evaluation.
Returns the current timestamp at the start of query evaluation.
date_add(start_date, num_days)
Returns the date that is num_days after start_date.
date_format(timestamp, fmt)
Converts timestamp to a value of string in the format specified by the date format fmt.
date_part(field, source)
Extracts a part of the date/timestamp or interval source.
date_sub(start_date, num_days)
Returns the date that is num_days before start_date.
date_trunc(fmt, ts)
Returns timestamp ts truncated to the unit specified by the format model fmt.
datediff(endDate, startDate)
Returns the number of days from startDate to endDate.
Returns the day of the week for date/timestamp (1 = Sunday, 2 = Monday, …, 7 = Saturday).
Returns the day of year of the date/timestamp.
from_unixtime(unix_time, format)
Returns unix_time in the specified format.
from_utc_timestamp(timestamp, timezone)
Given a timestamp like ‘2017-07-14 02:40:00.0’, interprets it as a time in UTC, and renders that time as a timestamp in the given time zone. For example, ‘GMT+1’ would yield ‘2017-07-14 03:40:00.0’.
Returns the hour component of the string/timestamp.
Returns the last day of the month which the date belongs to.
make_date(year, month, day)
Create date from year, month and day fields.
make_timestamp(year, month, day, hour, min, sec[, timezone])
Create timestamp from year, month, day, hour, min, sec and timezone fields.
Returns the minute component of the string/timestamp.
Returns the month component of the date/timestamp.
months_between(timestamp1, timestamp2[, roundOff])
If timestamp1 is later than timestamp2, then the result is positive. If timestamp1 and timestamp2 are on the same day of month, or both are the last day of month, time of day will be ignored. Otherwise, the difference is calculated based on 31 days per month, and rounded to 8 digits unless roundOff=false.
next_day(start_date, day_of_week)
Returns the first date which is later than start_date and named as indicated.
Returns the current timestamp at the start of query evaluation.
Returns the quarter of the year for date, in the range 1 to 4.
Returns the second component of the string/timestamp.
to_date(date_str[, fmt])
Parses the date_str expression with the fmt expression to a date. Returns null with invalid input. By default, it follows casting rules to a date if the fmt is omitted.
to_timestamp(timestamp_str[, fmt])
Parses the timestamp_str expression with the fmt expression to a timestamp. Returns null with invalid input. By default, it follows casting rules to a timestamp if the fmt is omitted.
to_unix_timestamp(timeExp[, format])
Returns the UNIX timestamp of the given time.
to_utc_timestamp(timestamp, timezone)
Given a timestamp like ‘2017-07-14 02:40:00.0’, interprets it as a time in the given time zone, and renders that time as a timestamp in UTC. For example, ‘GMT+1’ would yield ‘2017-07-14 01:40:00.0’.
trunc(date, fmt)
Returns date with the time portion of the day truncated to the unit specified by the format model fmt.
unix_timestamp([timeExp[, format]])
Returns the UNIX timestamp of current or specified time.
Returns the day of the week for date/timestamp (0 = Monday, 1 = Tuesday, …, 6 = Sunday).
Returns the week of the year of the given date. A week is considered to start on a Monday and week 1 is the first week with >3 days.
Returns the year component of the date/timestamp.

-- add_months
SELECT add_months('2016-08-31', 1);
|add_months(CAST(2016-08-31 AS DATE), 1)|
|                             2016-09-30|

-- current_date
SELECT current_date();
|    2020-08-28|

SELECT current_date;
|    2020-08-28|

-- current_timestamp
SELECT current_timestamp();
| current_timestamp()|
|2020-08-28 10:16:...|

SELECT current_timestamp;
| current_timestamp()|
|2020-08-28 10:16:...|

-- date_add
SELECT date_add('2016-07-30', 1);
|date_add(CAST(2016-07-30 AS DATE), 1)|
|                           2016-07-31|

-- date_format
SELECT date_format('2016-04-08', 'y');
|date_format(CAST(2016-04-08 AS TIMESTAMP), y)|
|                                         2016|

-- date_part
SELECT date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456');
|date_part('YEAR', TIMESTAMP '2019-08-12 01:00:00.123456')|
|                                                     2019|

SELECT date_part('week', timestamp'2019-08-12 01:00:00.123456');
|date_part('week', TIMESTAMP '2019-08-12 01:00:00.123456')|
|                                                       33|

SELECT date_part('doy', DATE'2019-08-12');
|date_part('doy', DATE '2019-08-12')|
|                                224|

SELECT date_part('SECONDS', timestamp'2019-10-01 00:00:01.000001');
|date_part('SECONDS', TIMESTAMP '2019-10-01 00:00:01.000001')|
|                                                    1.000001|

SELECT date_part('days', interval 1 year 10 months 5 days);
|date_part('days', INTERVAL '1 years 10 months 5 days')|
|                                                     5|

SELECT date_part('seconds', interval 5 hours 30 seconds 1 milliseconds 1 microseconds);
|date_part('seconds', INTERVAL '5 hours 30.001001 seconds')|
|                                                 30.001001|

-- date_sub
SELECT date_sub('2016-07-30', 1);
|date_sub(CAST(2016-07-30 AS DATE), 1)|
|                           2016-07-29|

-- date_trunc
SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359');
|date_trunc(YEAR, CAST(2015-03-05T09:32:05.359 AS TIMESTAMP))|
|                                         2015-01-01 00:00:00|

SELECT date_trunc('MM', '2015-03-05T09:32:05.359');
|date_trunc(MM, CAST(2015-03-05T09:32:05.359 AS TIMESTAMP))|
|                                       2015-03-01 00:00:00|

SELECT date_trunc('DD', '2015-03-05T09:32:05.359');
|date_trunc(DD, CAST(2015-03-05T09:32:05.359 AS TIMESTAMP))|
|                                       2015-03-05 00:00:00|

SELECT date_trunc('HOUR', '2015-03-05T09:32:05.359');
|date_trunc(HOUR, CAST(2015-03-05T09:32:05.359 AS TIMESTAMP))|
|                                         2015-03-05 09:00:00|

SELECT date_trunc('MILLISECOND', '2015-03-05T09:32:05.123456');
|date_trunc(MILLISECOND, CAST(2015-03-05T09:32:05.123456 AS TIMESTAMP))|
|                                                  2015-03-05 09:32:...|

-- datediff
SELECT datediff('2009-07-31', '2009-07-30');
|datediff(CAST(2009-07-31 AS DATE), CAST(2009-07-30 AS DATE))|
|                                                           1|

SELECT datediff('2009-07-30', '2009-07-31');
|datediff(CAST(2009-07-30 AS DATE), CAST(2009-07-31 AS DATE))|
|                                                          -1|

-- dayofweek
SELECT dayofweek('2009-07-30');
|dayofweek(CAST(2009-07-30 AS DATE))|
|                                  5|

-- dayofyear
SELECT dayofyear('2016-04-09');
|dayofyear(CAST(2016-04-09 AS DATE))|
|                                100|

-- from_unixtime
SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss');
|from_unixtime(CAST(0 AS BIGINT), yyyy-MM-dd HH:mm:ss)|
|                                  1970-01-01 00:00:00|

-- from_utc_timestamp
SELECT from_utc_timestamp('2016-08-31', 'Asia/Seoul');
|from_utc_timestamp(CAST(2016-08-31 AS TIMESTAMP), Asia/Seoul)|
|                                          2016-08-31 09:00:00|

-- hour
SELECT hour('2009-07-30 12:58:59');
|hour(CAST(2009-07-30 12:58:59 AS TIMESTAMP))|
|                                          12|

-- last_day
SELECT last_day('2009-01-12');
|last_day(CAST(2009-01-12 AS DATE))|
|                        2009-01-31|

-- make_date
SELECT make_date(2013, 7, 15);
|make_date(2013, 7, 15)|
|            2013-07-15|

SELECT make_date(2019, 13, 1);
|make_date(2019, 13, 1)|
|                  null|

SELECT make_date(2019, 7, NULL);
|make_date(2019, 7, CAST(NULL AS INT))|
|                                 null|

SELECT make_date(2019, 2, 30);
|make_date(2019, 2, 30)|
|                  null|

-- make_timestamp
SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887);
|make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6)))|
|                                             2014-12-28 06:30:...|

SELECT make_timestamp(2014, 12, 28, 6, 30, 45.887, 'CET');
|make_timestamp(2014, 12, 28, 6, 30, CAST(45.887 AS DECIMAL(8,6)), CET)|
|                                                  2014-12-28 05:30:...|

SELECT make_timestamp(2019, 6, 30, 23, 59, 60);
|make_timestamp(2019, 6, 30, 23, 59, CAST(60 AS DECIMAL(8,6)))|
|                                          2019-07-01 00:00:00|

SELECT make_timestamp(2019, 13, 1, 10, 11, 12, 'PST');
|make_timestamp(2019, 13, 1, 10, 11, CAST(12 AS DECIMAL(8,6)), PST)|
|                                                              null|

SELECT make_timestamp(null, 7, 22, 15, 30, 0);
|make_timestamp(CAST(NULL AS INT), 7, 22, 15, 30, CAST(0 AS DECIMAL(8,6)))|
|                                                                     null|

-- minute
SELECT minute('2009-07-30 12:58:59');
|minute(CAST(2009-07-30 12:58:59 AS TIMESTAMP))|
|                                            58|

-- month
SELECT month('2016-07-30');
|month(CAST(2016-07-30 AS DATE))|
|                              7|

-- months_between
SELECT months_between('1997-02-28 10:30:00', '1996-10-30');
|months_between(CAST(1997-02-28 10:30:00 AS TIMESTAMP), CAST(1996-10-30 AS TIMESTAMP), true)|
|                                                                                 3.94959677|

SELECT months_between('1997-02-28 10:30:00', '1996-10-30', false);
|months_between(CAST(1997-02-28 10:30:00 AS TIMESTAMP), CAST(1996-10-30 AS TIMESTAMP), false)|
|                                                                          3.9495967741935485|

-- next_day
SELECT next_day('2015-01-14', 'TU');
|next_day(CAST(2015-01-14 AS DATE), TU)|
|                            2015-01-20|

-- now
SELECT now();
|               now()|
|2020-08-28 10:16:...|

-- quarter
SELECT quarter('2016-08-31');
|quarter(CAST(2016-08-31 AS DATE))|
|                                3|

-- second
SELECT second('2009-07-30 12:58:59');
|second(CAST(2009-07-30 12:58:59 AS TIMESTAMP))|
|                                            59|

-- to_date
SELECT to_date('2009-07-30 04:17:52');
|to_date('2009-07-30 04:17:52')|
|                    2009-07-30|

SELECT to_date('2016-12-31', 'yyyy-MM-dd');
|to_date('2016-12-31', 'yyyy-MM-dd')|
|                         2016-12-31|

-- to_timestamp
SELECT to_timestamp('2016-12-31 00:12:00');
|to_timestamp('2016-12-31 00:12:00')|
|                2016-12-31 00:12:00|

SELECT to_timestamp('2016-12-31', 'yyyy-MM-dd');
|to_timestamp('2016-12-31', 'yyyy-MM-dd')|
|                     2016-12-31 00:00:00|

-- to_unix_timestamp
SELECT to_unix_timestamp('2016-04-08', 'yyyy-MM-dd');
|to_unix_timestamp(2016-04-08, yyyy-MM-dd)|
|                               1460073600|

-- to_utc_timestamp
SELECT to_utc_timestamp('2016-08-31', 'Asia/Seoul');
|to_utc_timestamp(CAST(2016-08-31 AS TIMESTAMP), Asia/Seoul)|
|                                        2016-08-30 15:00:00|

-- trunc
SELECT trunc('2019-08-04', 'week');
|trunc(CAST(2019-08-04 AS DATE), week)|
|                           2019-07-29|

SELECT trunc('2019-08-04', 'quarter');
|trunc(CAST(2019-08-04 AS DATE), quarter)|
|                              2019-07-01|

SELECT trunc('2009-02-12', 'MM');
|trunc(CAST(2009-02-12 AS DATE), MM)|
|                         2009-02-01|

SELECT trunc('2015-10-27', 'YEAR');
|trunc(CAST(2015-10-27 AS DATE), YEAR)|
|                           2015-01-01|

-- unix_timestamp
SELECT unix_timestamp();
|unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss)|
|                                              1598609785|

SELECT unix_timestamp('2016-04-08', 'yyyy-MM-dd');
|unix_timestamp(2016-04-08, yyyy-MM-dd)|
|                            1460073600|

-- weekday
SELECT weekday('2009-07-30');
|weekday(CAST(2009-07-30 AS DATE))|
|                                3|

-- weekofyear
SELECT weekofyear('2008-02-20');
|weekofyear(CAST(2008-02-20 AS DATE))|
|                                   8|

-- year
SELECT year('2016-07-30');
|year(CAST(2016-07-30 AS DATE))|
|                          2016|

JSON Functions JAON函数

from_json(jsonStr, schema[, options])
Returns a struct value with the given jsonStr and schema.
get_json_object(json_txt, path)
Extracts a json object from path.
json_tuple(jsonStr, p1, p2, …, pn)
Returns a tuple like the function get_json_object, but it takes multiple names. All the input parameters and output column types are string.
schema_of_json(json[, options])
Returns schema in the DDL format of JSON string.
to_json(expr[, options])
Returns a JSON string with a given struct value

-- from_json
SELECT from_json('{"a":1, "b":0.8}', 'a INT, b DOUBLE');
|from_json({"a":1, "b":0.8})|
|                   [1, 0.8]|

SELECT from_json('{"time":"26/08/2015"}', 'time Timestamp', map('timestampFormat', 'dd/MM/yyyy'));
|            [2015-08-26 00:00...|

-- get_json_object
SELECT get_json_object('{"a":"b"}', '$.a');
|get_json_object({"a":"b"}, $.a)|
|                              b|

-- json_tuple
SELECT json_tuple('{"a":1, "b":2}', 'a', 'b');
| c0| c1|
|  1|  2|

-- schema_of_json
SELECT schema_of_json('[{"col":0}]');
|       array<struct<col:...|

SELECT schema_of_json('[{"col":01}]', map('allowNumericLeadingZeros', 'true'));
|        array<struct<col:...|

-- to_json
SELECT to_json(named_struct('a', 1, 'b', 2));
|to_json(named_struct(a, 1, b, 2))|
|                    {"a":1,"b":2}|

SELECT to_json(named_struct('time', to_timestamp('2015-08-26', 'yyyy-MM-dd')), map('timestampFormat', 'dd/MM/yyyy'));
|to_json(named_struct(time, to_timestamp('2015-08-26', 'yyyy-MM-dd')))|
|                                                 {"time":"26/08/20...|

SELECT to_json(array(named_struct('a', 1, 'b', 2)));
|to_json(array(named_struct(a, 1, b, 2)))|
|                         [{"a":1,"b":2}]|

SELECT to_json(map('a', named_struct('b', 1)));
|to_json(map(a, named_struct(b, 1)))|
|                      {"a":{"b":1}}|

SELECT to_json(map(named_struct('a', 1),named_struct('b', 2)));
|to_json(map(named_struct(a, 1), named_struct(b, 2)))|
|                                     {"[1]":{"b":2}}|

SELECT to_json(map('a', 1));
|to_json(map(a, 1))|
|           {"a":1}|

SELECT to_json(array((map('a', 1))));
|to_json(array(map(a, 1)))|
|                [{"a":1}]|
