ClickHouse函数用法+操作集合

1.算数函数

–求和

SELECT plus(9, 88)as "plus 9+88", plus(10, -10) as "plus 10-10", plus(-10, -10) as " plus-10-10";
┌─plus 9+88─┬─plus 10-10─┬─ plus-10-10─┐
│        970-20 │
└───────────┴────────────┴─────────────┘

–差值

SELECT minus(10, 5)as "minus 10-5 ", minus(10, -10)as "minus 10-(-10)",minus(-10, -10)as "minus -10-(-10)";
┌─minus 10-5 ─┬─minus 10-(-10)─┬─minus -10-(-10)─┐
│           5200  │
└─────────────┴────────────────┴────────────-────┘

–积

SELECT multiply(12, 2), multiply(12, -2), multiply(-12, -2);
┌─multiply(12, 2)─┬─multiply(12, -2)─┬─multiply(-12, -2)─┐
│              24-2424 │
└─────────────────┴──────────────────┴───────────────────┘

–平均值

SELECT divide(12, 4), divide(10, 3), divide(2, 4), divide(-4, -2), divide(-4, 2), divide(-4.5, 3);
┌─divide(12, 4)─┬──────divide(10, 3)─┬─divide(2, 4)─┬─divide(-4, -2)─┬─divide(-4, 2)─┬─divide(-4.5, 3)─┐
│             33.33333333333333350.52-2-1.5 │
└───────────────┴────────────────────┴──────────────┴────────────────┴───────────────┴─────────────────┘
SELECT intDiv(10, 3), divide(10, 3); 
┌─intDiv(10, 3)─┬──────divide(10, 3)─┐
│             33.3333333333333335 │
└───────────────┴────────────────────┘

SELECT divide(10, 0), divide(-10, 0); -- 出现无穷大字符“ ∞ ”或“ -∞ ”
┌─divide(10, 0)─┬─divide(-10, 0)─┐
│           inf │           -inf │
└───────────────┴────────────────┘

SELECT divide(0, 0); -- 特殊字符(类似乱码)
┌─divide(0, 0)─┐
│          nan │
└──────────────┘

SELECT intDivOrZero(10, 0); -- 0
┌─intDivOrZero(10, 0)─┐
│                   0 │
└─────────────────────┘

–求余数

SELECT modulo(10, 3); 
┌─modulo(10, 3)─┐
│             1 │
└───────────────┘

SELECT modulo(10.5, 3); 
┌─modulo(10.5, 3)─┐
│             1.5 │
└─────────────────┘

–取反

SELECT negate(10), negate(-10); 
┌─negate(10)─┬─negate(-10)─┐
│        -1010 │
└────────────┴─────────────┘

–绝对值

SELECT abs(-10), abs(10);
┌─abs(-10)─┬─abs(10)─┐
│       1010 │
└──────────┴─────────┘

–最大公约数

SELECT gcd(12, 24), gcd(-12, -24), gcd(-12, 24);
┌─gcd(12, 24)─┬─gcd(-12, -24)─┬─gcd(-12, 24)─┐
│          121212 │
└─────────────┴───────────────┴──────────────┘

–最小公倍数

SELECT lcm(12, 24), lcm(-12, -24), lcm(-3, 4);
┌─lcm(12, 24)─┬─lcm(-12, -24)─┬─lcm(-3, 4)─┐
│          242412 │
└─────────────┴───────────────┴────────────┘

2.比较函数

–>>>>>> 比较函数(始终返回0表示false 或 1表示true)

SELECT 12 == 12, 12 != 10, 12 == 132, 12 != 12, 12 <> 12;
┌─equals(12, 12)─┬─notEquals(12, 10)─┬─equals(12, 132)─┬─notEquals(12, 12)─┬─notEquals(12, 12)─┐
│              11000 │
└────────────────┴───────────────────┴─────────────────┴───────────────────┴───────────────────┘

SELECT equals(12, 12), notEquals(12, 10), equals(12, 10), notEquals(12,123);
┌─equals(12, 12)─┬─notEquals(12, 10)─┬─equals(12, 10)─┬─notEquals(12, 123)─┐
│              1101 │
└────────────────┴───────────────────┴────────────────┴────────────────────┘

SELECT greater(12, 10), greater(10, 12), greater(12, 12);
┌─greater(12, 10)─┬─greater(10, 12)─┬─greater(12, 12)─┐
│               100 │
└─────────────────┴─────────────────┴─────────────────┘

SELECT greaterOrEquals(12,10), greaterOrEquals(12,12);
┌─greaterOrEquals(12, 10)─┬─greaterOrEquals(12, 12)─┐
│                       11 │
└─────────────────────────┴─────────────────────────┘

SELECT less(12, 21), less(12, 10), less(120, 120);
┌─less(12, 21)─┬─less(12, 10)─┬─less(120, 120)─┐
│            100 │
└──────────────┴──────────────┴────────────────┘

SELECT lessOrEquals(12, 120), lessOrEquals(12, 12);
┌─lessOrEquals(12, 120)─┬─lessOrEquals(12, 12)─┐
│                     11 │
└───────────────────────┴──────────────────────┘

3.逻辑函数

–>>>>>> 逻辑操作符(返回0表示false 或 1表示true)

SELECT 12==12 or 12!=10;
SELECT 12==12 and 12!=10;
SELECT not 12, not 0;
SELECT or(equals(12, 12), notEquals(12, 10)); --函数表示法:或
SELECT and(equals(12, 12), notEquals(12, 10));--函数表示法:且
SELECT not(12), not(0);

4.类型转换函数

SELECT toInt8(12.3334343), toFloat32(10.001), toFloat64(1.000040);
SELECT toString(now());
SELECT now() AS now_local, toString(now(), 'Asia/Yekaterinburg') AS now_yekat;
SELECT now() AS now_local, toDate(now()), toDateTime(now()), toUnixTimestamp(now());

SELECT '2021-03-31 16:00:00' AS timestamp, CAST(timestamp AS DateTime) AS datetime, CAST(timestamp AS Date) AS date, CAST(timestamp, 'String') AS string, CAST(timestamp, 'FixedString(22)') AS fixed_string;
┌─timestamp───────────┬────────────datetime─┬───────date─┬─string──────────────┬─fixed_string────────┐
│ 2021-03-31 16:00:002021-03-31 16:00:002021-03-312021-03-31 16:00:002021-03-31 16:00:00 │
└─────────────────────┴─────────────────────┴────────────┴─────────────────────┴─────────────────────┘


WITH 
    toDate('2021-03-31') AS date,
    toIntervalWeek(1) AS interval_week,
    toIntervalWeek(1) AS interval_to_week,
    toIntervalMonth(1) AS interval_to_month
SELECT 
    date + interval_week,
    date + interval_to_week,
    date + interval_to_month;

┌─plus(date, interval_week)─┬─plus(date, interval_to_week)─┬─plus(date, interval_to_month)─┐
│                2021-04-072021-04-072021-04-30 │
└───────────────────────────┴──────────────────────────────┴───────────────────────────────┘

WITH 
    toDateTime('2021-03-31 12:10:10') AS datetime,
    toIntervalHour(1) AS interval_hour,
    toIntervalHour(1) AS invterval_to_hour
SELECT 
    datetime + interval_hour,
    datetime + invterval_to_hour;

┌─plus(datetime, interval_hour)─┬─plus(datetime, invterval_to_hour)─┐
│           2021-03-31 13:10:102021-03-31 13:10:10 │
└───────────────────────────────┴───────────────────────────────────┘

5.时间日期函数

SELECT
toDateTime('2019-07-30 10:10:10') AS time,
toUnixTimestamp(time) as unixTimestamp,
toDate(time) as date_local,
toTime(time) as date_time,
toMonth(time) as get_month,
toQuarter(time) as get_quarter,
toHour(time) as get_hour,
toMinute(time) as get_minute,
toSecond(time) as get_second,
toDayOfYear(time) as "当前年份中的第几天",
toDayOfMonth(time) as "当前月份的第几天",
toDayOfWeek(time) as "星期",
toDate(time, 'Asia/Shanghai') AS date_shanghai,
toDateTime(time, 'Asia/Shanghai') AS time_shanghai,
toStartOfYear(time),
toStartOfMonth(time),
toStartOfQuarter(time),
toStartOfDay(time) AS cur_start_daytime,
toStartOfHour(time) as cur_start_hour,
toStartOfMinute(time) AS cur_start_minute,
toRelativeYearNum(time),
toRelativeQuarterNum(time)\G

Row 1:
──────
time:                                                    2021-03-31 10:10:10
unixTimestamp:                                           1617156610
date_local:                                              2021-03-31
date_time:                                               1970-01-02 10:10:10
get_month:                                               3
get_quarter:                                             1
get_hour:                                                10
get_minute:                                              10
get_second:                                              10
当前年份中的第几天:                                       90
当前月份的第几天:                                         31
星期:                                                    3
date_shanghai:                                           2021-03-31
time_shanghai:                                           2021-03-31 10:10:10
toStartOfYear(toDateTime('2021-03-31 10:10:10')):        2021-01-01
toStartOfMonth(toDateTime('2021-03-31 10:10:10')):       2021-03-01
toStartOfQuarter(toDateTime('2021-03-31 10:10:10')):     2021-01-01
cur_start_daytime:                                       2021-03-31 00:00:00
cur_start_hour:                                          2021-03-31 10:00:00
cur_start_minute:                                        2021-03-31 10:10:00
toRelativeYearNum(toDateTime('2021-03-31 10:10:10')):    2021
toRelativeQuarterNum(toDateTime('2021-03-31 10:10:10')): 8084


SELECT
toDateTime('2019-07-30 14:27:30') as time,
toISOYear(time) AS iso_year,
toISOWeek(time) AS iso_week,
now() AS cur_dateTime1, 
today() AS cur_dateTime2,
yesterday() AS yesterday,
toDate(time) as getY_M_d;

┌────────────────time─┬─iso_year─┬─iso_week─┬───────cur_dateTime1─┬─cur_dateTime2─┬──yesterday─┬───getY_M_d─┐
│ 2019-07-30 14:27:302019312021-03-31 16:48:132021-03-312021-03-302019-07-30 │
└─────────────────────┴──────────┴──────────┴─────────────────────┴───────────────┴────────────┴────────────┘


SELECT
now() as nowTime,
toYYYYMMDDhhmmss(nowTime),
toYYYYMMDD(nowTime),
toYYYYMM(nowTime);

┌─────────────nowTime─┬─toYYYYMMDDhhmmss(now())─┬─toYYYYMMDD(now())─┬─toYYYYMM(now())─┐
│ 2021-03-31 16:48:482021033116484820210331202103 │
└─────────────────────┴─────────────────────────┴───────────────────┴─────────────────┘


SELECT
now() as now_time,
toDateTime('2019-07-31 18:20:30') AS def_datetime,
formatDateTime(now_time, '%D') AS now_time_day_month_year,
formatDateTime(def_datetime, '%Y') AS def_datetime_year, 
formatDateTime(def_datetime, '%y') AS def_datetime_year_litter, 
formatDateTime(def_datetime, '%H') AS hour24,
formatDateTime(def_datetime, '%I') AS hour12, 
formatDateTime(def_datetime, '%p') AS PMorAM, 
formatDateTime(def_datetime, '%w') AS def_datetime_get_curWeek,
formatDateTime(def_datetime, '%F') AS def_datetime_get_date,
formatDateTime(def_datetime, '%T') AS def_datetime_get_time,
formatDateTime(def_datetime, '%M') AS def_datetime_get_minute,
formatDateTime(def_datetime, '%S') AS def_datetime_get_second;

Row 1:
──────
now_time:                 2021-03-31 16:51:02
def_datetime:             2019-07-31 18:20:30
now_time_day_month_year:  03/31/21
def_datetime_year:        2019
def_datetime_year_litter: 19
hour24:                   18
hour12:                   06
PMorAM:                   PM
def_datetime_get_curWeek: 3
def_datetime_get_date:    2019-07-31
def_datetime_get_time:    18:20:30
def_datetime_get_minute:  20
def_datetime_get_second:  30

– 1.跳转到之后的日期函数
– 第一种,日期格式(指定日期,需注意时区的问题)

WITH 
    toDate('2021-03-29') AS date,
    toDateTime('2021-03-29 00:00:00') AS date_time
SELECT 
    addYears(date, 1) AS add_years_with_date,
    addYears(date_time, 0) AS add_years_with_date_time;

┌─add_years_with_date─┬─add_years_with_date_time─┐
│          2022-03-292021-03-29 00:00:00 │
└─────────────────────┴──────────────────────────┘

– 第二种,日期格式(当前,本地时间)

WITH
toDate(now()) as date,
toDateTime(now()) as date_time
SELECT
now() as now_time,
addYears(date, 1) AS add_years_with_date,
addYears(date_time, 1) AS add_years_with_date_time,
addMonths(date, 1) AS add_months_with_date,
addMonths(date_time, 1) AS add_months_with_date_time,
addWeeks(date, 1) AS add_weeks_with_date,
addWeeks(date_time, 1) AS add_weeks_with_date_time,
addDays(date, 1) AS add_days_with_date,
addDays(date_time, 1) AS add_days_with_date_time,
addHours(date_time, 1) AS add_hours_with_date_time,
addMinutes(date_time, 1) AS add_minutes_with_date_time,
addSeconds(date_time, 10) AS add_seconds_with_date_time,
addQuarters(date, 1) AS add_quarters_with_date, 
addQuarters(date_time, 1) AS add_quarters_with_date_time;

Row 1:
──────
now_time:                    2021-03-31 16:53:59
add_years_with_date:         2022-03-31
add_years_with_date_time:    2022-03-31 16:53:59
add_months_with_date:        2021-04-30
add_months_with_date_time:   2021-04-30 16:53:59
add_weeks_with_date:         2021-04-07
add_weeks_with_date_time:    2021-04-07 16:53:59
add_days_with_date:          2021-04-01
add_days_with_date_time:     2021-04-01 16:53:59
add_hours_with_date_time:    2021-03-31 17:53:59
add_minutes_with_date_time:  2021-03-31 16:54:59
add_seconds_with_date_time:  2021-03-31 16:54:09
add_quarters_with_date:      2021-06-30
add_quarters_with_date_time: 2021-06-30 16:53:59

– 2.跳转到当前日期之前的函数(函数将Date/DateTime减去一段时间间隔,然后返回Date/DateTime)

WITH
toDate(now()) as date,
toDateTime(now()) as date_time
SELECT
subtractYears(date, 1) AS subtract_years_with_date,
subtractYears(date_time, 1) AS subtract_years_with_date_time,
subtractQuarters(date, 1) AS subtract_Quarters_with_date,
subtractQuarters(date_time, 1) AS subtract_Quarters_with_date_time,
subtractMonths(date, 1) AS subtract_Months_with_date,
subtractMonths(date_time, 1) AS subtract_Months_with_date_time,
subtractWeeks(date, 1) AS subtract_Weeks_with_date,
subtractWeeks(date_time, 1) AS subtract_Weeks_with_date_time,
subtractDays(date, 1) AS subtract_Days_with_date,
subtractDays(date_time, 1) AS subtract_Days_with_date_time,
subtractHours(date_time, 1) AS subtract_Hours_with_date_time,
subtractMinutes(date_time, 1) AS subtract_Minutes_with_date_time,
subtractSeconds(date_time, 1) AS subtract_Seconds_with_date_time;

Row 1:
──────
subtract_years_with_date:         2020-03-31
subtract_years_with_date_time:    2020-03-31 16:59:16
subtract_Quarters_with_date:      2020-12-31
subtract_Quarters_with_date_time: 2020-12-31 16:59:16
subtract_Months_with_date:        2021-02-28
subtract_Months_with_date_time:   2021-02-28 16:59:16
subtract_Weeks_with_date:         2021-03-24
subtract_Weeks_with_date_time:    2021-03-24 16:59:16
subtract_Days_with_date:          2021-03-30
subtract_Days_with_date_time:     2021-03-30 16:59:16
subtract_Hours_with_date_time:    2021-03-31 15:59:16
subtract_Minutes_with_date_time:  2021-03-31 16:58:16
subtract_Seconds_with_date_time:  2021-03-31 16:59:15

– 计算连个时刻在不同时间单位下的差值
– 第一种:指定时间计算差值示例

WITH
toDateTime('2019-07-30 10:10:10', 'Asia/Shanghai') as date_shanghai_one,
toDateTime('2020-10-31 11:20:30', 'Asia/Shanghai') as date_shanghai_two
SELECT
dateDiff('year', date_shanghai_one, date_shanghai_two) as diff_years,
dateDiff('month', date_shanghai_one, date_shanghai_two) as diff_months,
dateDiff('week', date_shanghai_one, date_shanghai_two) as diff_week,
dateDiff('day', date_shanghai_one, date_shanghai_two) as diff_days,
dateDiff('hour', date_shanghai_one, date_shanghai_two) as diff_hours,
dateDiff('minute', date_shanghai_one, date_shanghai_two) as diff_minutes,
dateDiff('second', date_shanghai_one, date_shanghai_two) as diff_seconds;

┌─diff_years─┬─diff_months─┬─diff_week─┬─diff_days─┬─diff_hours─┬─diff_minutes─┬─diff_seconds─┐
│          115654591101766103039661820 │
└────────────┴─────────────┴───────────┴───────────┴────────────┴──────────────┴──────────────┘

– 第二种:本地当前时间示例

WITH 
    toDateTime('2020-07-30 10:10:10', 'Asia/Shanghai') AS date_shanghai_one,
    toDateTime('2021-03-31 11:20:30', 'Asia/Shanghai') AS date_shanghai_two
SELECT 
    dateDiff('year', date_shanghai_one, date_shanghai_two) AS diff_years,
    dateDiff('month', date_shanghai_one, date_shanghai_two) AS diff_months,
    dateDiff('week', date_shanghai_one, date_shanghai_two) AS diff_week,
    dateDiff('day', date_shanghai_one, date_shanghai_two) AS diff_days,
    dateDiff('hour', date_shanghai_one, date_shanghai_two) AS diff_hours,
    dateDiff('minute', date_shanghai_one, date_shanghai_two) AS diff_minutes,
    dateDiff('second', date_shanghai_one, date_shanghai_two) AS diff_seconds;

┌─diff_years─┬─diff_months─┬─diff_week─┬─diff_days─┬─diff_hours─┬─diff_minutes─┬─diff_seconds─┐
│          1835244585735143021085820 │
└────────────┴─────────────┴───────────┴───────────┴────────────┴──────────────┴──────────────┘

– timeSlot(StartTime, Duration, [,Size])
– 它返回一个时间数组,其中包括从从“StartTime”开始到“StartTime + Duration 秒”内的所有符合“size”(以秒为单位)步长的时间点
– 作用:搜索在相应会话中综合浏览量是非常有用的。

SELECT 
    timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600)) AS dateTimeArray,
    dateTimeArray[0] AS arr_index_0,
    dateTimeArray[1] AS arr_index_1,
    dateTimeArray[2] AS arr_index_2,
    dateTimeArray[3] AS arr_index_3,
    dateTimeArray[4] AS arr_index_4

Row 1:
──────
dateTimeArray: ['2012-01-01 12:00:00','2012-01-01 12:30:00']
arr_index_0:   1970-01-01 08:00:00
arr_index_1:   2012-01-01 12:00:00
arr_index_2:   2012-01-01 12:30:00
arr_index_3:   1970-01-01 08:00:00
arr_index_4:   1970-01-01 08:00:00


SELECT
timeSlots(now(), toUInt32(600), 20) as dateTimeArray, 
dateTimeArray[1] as arr_index_1,
dateTimeArray[2] as arr_index_2,
dateTimeArray[3] as arr_index_3,
dateTimeArray[4] as arr_index_4,
dateTimeArray[5] as arr_index_5;

Row 1:
──────
dateTimeArray: ['2021-03-31 17:04:20','2021-03-31 17:04:40','2021-03-31 17:05:00','2021-03-31 17:05:20','2021-03-31 17:05:40','2021-03-31 17:06:00','2021-03-31 17:06:20','2021-03-31 17:06:40','2021-03-31 17:07:00','2021-03-31 17:07:20','2021-03-31 17:07:40','2021-03-31 17:08:00','2021-03-31 17:08:20','2021-03-31 17:08:40','2021-03-31 17:09:00','2021-03-31 17:09:20','2021-03-31 17:09:40','2021-03-31 17:10:00','2021-03-31 17:10:20','2021-03-31 17:10:40','2021-03-31 17:11:00','2021-03-31 17:11:20','2021-03-31 17:11:40','2021-03-31 17:12:00','2021-03-31 17:12:20','2021-03-31 17:12:40','2021-03-31 17:13:00','2021-03-31 17:13:20','2021-03-31 17:13:40','2021-03-31 17:14:00','2021-03-31 17:14:20']
arr_index_1:   2021-03-31 17:04:20
arr_index_2:   2021-03-31 17:04:40
arr_index_3:   2021-03-31 17:05:00
arr_index_4:   2021-03-31 17:05:20
arr_index_5:   2021-03-31 17:05:40

– 指定时间为基准,之后每个元素增加20秒

SELECT
timeSlots(toDateTime('2012-01-01 12:20:00'), toUInt32(600), 20) as cur_dateTimeArray, 
cur_dateTimeArray[0] as arr_index_0,
cur_dateTimeArray[1] as arr_index_1, 
cur_dateTimeArray[2] as arr_index_2,
cur_dateTimeArray[3] as arr_index_3,
cur_dateTimeArray[4] as arr_index_4,
cur_dateTimeArray[5] as arr_index_5; 

6.字符串函数

SELECT
length('hello world') as str_length, 
empty('hello world'),
notEmpty('hello world'),
lengthUTF8('hello world'), 
char_length('hello world'), 
character_length('hello world'), 
lower('abcd123--'),
upper('abcd123--'),
lowerUTF8('abcd123-/*\8asd-\\'),
upperUTF8('abcd123--'), 
isValidUTF8('abcd123--/*\*');
SELECT notEmpty(''), notEmpty(NULL), notEmpty('he'); 

str_length:                       11
empty('hello world'):             0
notEmpty('hello world'):          1
lengthUTF8('hello world'):        11
char_length('hello world'):       11
character_length('hello world'):  11
lower('abcd123--'):               abcd123--
upper('abcd123--'):               ABCD123--
lowerUTF8('abcd123-/*\\8asd-\\'): abcd123-/*\8asd-\
upperUTF8('abcd123--'):           ABCD123--
isValidUTF8('abcd123--/*\\*'):    1

– 2.字符串维度自定义安排

SELECT format('{1} {0} {1}', 'World', 'Hello')
┌─format('{1} {0} {1}', 'World', 'Hello')─┐
│ Hello World Hello                       │
└─────────────────────────────────────────┘

SELECT format('{0} {0} {1} {1}', 'one', 'two');
┌─format('{0} {0} {1} {1}', 'one', 'two')─┐
│ one one two two                         │
└─────────────────────────────────────────┘

SELECT format('{} {}', 'Hello', 'World');
┌─format('{} {}', 'Hello', 'World')─┐
│ Hello World                       │
└───────────────────────────────────┘

3.字符串拼接 concat(s1,s2,s3,…)


SELECT concat('Hello',' ','World', '!');
┌─concat('Hello', ' ', 'World', '!')─┐
│ Hello World!                       │
└────────────────────────────────────┘

SELECT concatAssumeInjective('Hello',' ','World', '!');
┌─concatAssumeInjective('Hello', ' ', 'World', '!')─┐
│ Hello World!                                      │
└───────────────────────────────────────────────────┘

– 4.字符串截取:substring(s, offset, length), mid(s, offset, length), substr(s, offset, length)
– 以字节为单位截取指定位置字符串,返回以‘offset’位置为开头,长度为‘length’的子串。‘offset’从1开始(与标准SQL相同)。‘offset’和‘length’参数必须是常量。

SELECT 
    substring('abcdefg', 1, 3),
    substring('你好,世界', 1, 3),
    substringUTF8('你好,世界', 1, 3)

┌─substring('abcdefg', 1, 3)─┬─substring('你好,世界', 1, 3)─┬─substringUTF8('你好,世界', 1, 3)─┐
│ abc                        │ 你                            │ 你好,                            │
└────────────────────────────┴───────────────────────────────┴───────────────────────────────────┘

– 5.字符串拼接:appendTrailingCharIfAbsent(s, c)
– 如果‘s’字符串非空并且末尾不包含‘c’字符,则将‘c’字符附加到末尾。

SELECT 
    appendTrailingCharIfAbsent('good', 'c'),
    appendTrailingCharIfAbsent('goodccc', 'c')

┌─appendTrailingCharIfAbsent('good', 'c')─┬─appendTrailingCharIfAbsent('goodccc', 'c')─┐
│ goodc                                   │ goodccc                                    │
└─────────────────────────────────────────┴────────────────────────────────────────────┘

– 6.字符串编码转换:convertCharset(s, from, to) 返回从‘from’中的编码转换为‘to’中的编码的字符串‘s’。

SELECT 
    convertCharset('hello', 'UTF8', 'Unicode'),
    convertCharset('hello', 'Unicode', 'UTF8'),
    convertCharset('hello', 'Unicode', 'ASCII'),
    convertCharset('hello', 'ascii', 'ascii'),
    convertCharset('hello', 'UTF8', 'UTF8')

Row 1:
──────
convertCharset('hello', 'UTF8', 'Unicode'):  ÿþhello
convertCharset('hello', 'Unicode', 'UTF8'):  桥汬�
convertCharset('hello', 'Unicode', 'ASCII'):  
convertCharset('hello', 'ascii', 'ascii'):   hello
convertCharset('hello', 'UTF8', 'UTF8'):     hello


SELECT 
    base64Encode('username+password'),
    base64Decode('dXNlcm5hbWUrcGFzc3dvcmQ='),
    tryBase64Decode('dXNlcm5hbWUrcGFzc3dvcmQ=')

┌─base64Encode('username+password')─┬─base64Decode('dXNlcm5hbWUrcGFzc3dvcmQ=')─┬─tryBase64Decode('dXNlcm5hbWUrcGFzc3dvcmQ=')─┐
│ dXNlcm5hbWUrcGFzc3dvcmQ=          │ username+password                        │ username+password                           │
└───────────────────────────────────┴──────────────────────────────────────────┴─────────────────────────────────────────────┘

– 7.判断字符串是否已什么结尾或结束,返回1:true,0:flase
– endsWith(s, suffix) 返回是否以指定的后缀结尾。如果字符串以指定的后缀结束,则返回1,否则返回0
– startWith(s, prefix) 返回是否以指定的前缀开头。如果字符串以指定的前缀开头,则返回1,否则返回0。

SELECT 
    endsWith('string', 'g'),
    startsWith('string', 'str')

┌─endsWith('string', 'g')─┬─startsWith('string', 'str')─┐
│                       11 │
└─────────────────────────┴─────────────────────────────┘

– 8.删除左侧空白字符
– trimLeft(s) 返回一个字符串,用于删除左侧的空白字符
– trimRight(s) 返回一个字符串,用于删除右侧的空白字符
– trimBoth(s) 返回一个字符串,用于删除左侧和右侧的空白字符

SELECT 
    trimLeft(' sdfdgs'),
    trimRight('abcd '),
    trimBoth(' abcd ')

┌─trimLeft(' sdfdgs')─┬─trimRight('abcd ')─┬─trimBoth(' abcd ')─┐
│ sdfdgs              │ abcd               │ abcd               │
└─────────────────────┴────────────────────┴────────────────────┘

7.字符串搜索函数

– pasition(haystack, needle), 显示needle在haystack的第一个出现的位置。

SELECT 
    POSITION('2121stringstrstrstrstr', 'str') AS positionSearch,
    POSITION('你好,hello,12323-你好,你,好sdfd*dg', '你,好'),
    positionUTF8('n12你好', '你好') AS positionUTF8,
    positionCaseInsensitive('ABCDCDEFABCD', 'bc') AS positionCaseInsensitive,
    locate('hellohellohellohello', 'ello')

Row 1:
──────
positionSearch:                                              5
POSITION('你好,hello,12323-你好,你,好sdfd*dg', '你,好'): 31
positionUTF8:                                                4
positionCaseInsensitive:                                     2
locate('hellohellohellohello', 'ello'):                      2

– multiSearchAllPositions(haystack, [needle1, needle2, …, needlen])
– 注意:在所有multiSearch*函数中,由于实现规范,needles的数量应小于2^8。
– 函数返回一个数组,其中包含所有匹配needlei的位置

SELECT 
    multiSearchAllPositions('goodnamegoodnamegoodhellohihihi', ['dn', 'good']) AS multiSearch,
    multiSearchAllPositionsCaseInsensitive('nameSsdfagpSSDFDFetgfderef', ['SS', 'fa']) AS multiCaseInsensitive,
    multiSearchAllPositionsUTF8('nameSsdfazz轴功率gpSSDFDFetgfderef', ['Ss', 'fa', 'zz轴']) AS multiSearchUTF8,
    multiSearchAllPositionsCaseInsensitiveUTF8('nameSsdfazz轴功率gpSSDFDFetgfderef', ['Ss', 'fa', 'zz轴']) AS multiCaseInsensitiveUTF8

┌─multiSearch─┬─multiCaseInsensitive─┬─multiSearchUTF8─┬─multiCaseInsensitiveUTF8─┐
│ [4,1][5,8][5,8,10][5,8,10]                 │
└─────────────┴──────────────────────┴─────────────────┴──────────────────────────┘

– 检查字符串是否与pattern正则表达式匹配。pattern可以是一个任意的re2正则表达式。re2正则表达式的语法比Perl正则表达式的语法存在更多限制。
– match(haystack, pattern) 匹配到了则返回1,否则返回0

SELECT 
    match('1232434sadgaDDFSrefds', '[0-9a-zA-Z]'),
    match('1232321', '[a-z]')

┌─match('1232434sadgaDDFSrefds', '[0-9a-zA-Z]')─┬─match('1232321', '[a-z]')─┐
│                                             10 │
└───────────────────────────────────────────────┴───────────────────────────┘

– 与match相同,但如果所有正则表达式都不匹配,则返回0;如果任何模式匹配,则返回1。它使用hyperscan库。对于在字符串中搜索子字符串的模式,最好使用“multisearchany”,因为它更高效。
– multiMatchAny(haystack, [pattern1, pattern2, …, patternn])
– 注意:任何haystack字符串的长度必须小于232字节,否则抛出异常。这种限制是因为hyperscan API而产生的。
– 多个正则表达式对原始字符进行匹配,如若只有一个正则表达式匹配上了则返回1,否则返回0

SELECT 
    multiMatchAny('abcABC', ['[0-9]', '[a-zA-Z]']) AS multiMatchAnyOne,
    multiMatchAny('123abcABC', ['[0-9]', '[a-zA-Z]']) AS multiMatchAnyTwo,
    multiMatchAnyIndex('123abcABC', ['[0-9]', '[a-zA-Z]']) AS multiMatchAnyIndex

┌─multiMatchAnyOne─┬─multiMatchAnyTwo─┬─multiMatchAnyIndex─┐
│                111 │
└──────────────────┴──────────────────┴────────────────────┘

– 模糊匹配:like()函数,注意大写敏感。
– % 表示任何字节数(包括零字符)
– _ 表示任何一个字节

SELECT 
    'hello' LIKE '%h%' AS LIKE_UP,
    'hello' LIKE 'he' AS like_low,
    'hello' NOT LIKE 'he' AS not_like,
    'hello' LIKE '%he%' AS like_litter,
    'adgadgadfa1232' LIKE '_12_' AS like_func,
    'sdfasdfasd' LIKE '[a-z]' AS like_func2,
    '1232423' NOT LIKE '[a-zA-Z]' AS not_like_func

┌─LIKE_UP─┬─like_low─┬─not_like─┬─like_litter─┬─like_func─┬─like_func2─┬─not_like_func─┐
│       1011001 │
└─────────┴──────────┴──────────┴─────────────┴───────────┴────────────┴───────────────┘

– 使用字符串截取字符串:extract(haystack, pattern)
– 使用正则表达式截取字符串。如果‘haystack’与‘pattern’不匹配,则返回空字符串。如果正则表达式中不包含子模式,它将获取与整个正则表达式匹配的子串。否则,它将获取与第一个子模式匹配的子串。

SELECT 
    extractAll('hellogoodaimantIdeaIDEAfasd123232', '[0-9]'),
    extractAll('12323dSDFRE', '[A-Z]'),
    extract('helloclickhouse', '[a-z]')

Row 1:
──────
extractAll('hellogoodaimantIdeaIDEAfasd123232', '[0-9]'): ['1','2','3','2','3','2']
extractAll('12323dSDFRE', '[A-Z]'):                       ['S','D','F','R','E']
extract('helloclickhouse', '[a-z]'):                      h

– ngramSearch(haystack, needle)
– 基于4-gram计算haystack和needle之间的距离:计算两个4-gram集合之间的对称差异,并用它们的基数和对其进行归一化。
– 返回0到1之间的任何浮点数 – 越接近0则表示越多的字符串彼此相似。
– 如果常量的needle或haystack超过32KB,函数将抛出异常。如果非常量的haystack或needle字符串超过32Kb,则距离始终为1。

SELECT 
    ngramDistance('hello123456789', '123') AS ngramDistance,
    ngramDistanceCaseInsensitive('hello123456789', '123') AS ngramDistanceCaseInsensitive,
    ngramDistanceUTF8('hello123456789', '123') AS ngramDistanceUTF8,
    ngramDistanceCaseInsensitiveUTF8('hello123456789', '123') AS ngramDistanceCaseInsensitiveUTF8

┌─ngramDistance─┬─ngramDistanceCaseInsensitive─┬─ngramDistanceUTF8─┬─ngramDistanceCaseInsensitiveUTF8─┐
│             110.846153860.84615386 │
└───────────────┴──────────────────────────────┴───────────────────┴──────────────────────────────────┘

– 注意:对于UTF-8,我们使用3-gram。所有这些都不是完全公平的n-gram距离。
– 我们使用2字节哈希来散列n-gram,然后计算这些哈希表之间的(非)对称差异 - 可能会发生冲突。
– 对于UTF-8不区分大小写的格式,我们不使用公平的tolower函数
– 我们将每个Unicode字符字节的第5位(从零开始)和字节的第一位归零
– 这适用于拉丁语,主要用于所有西里尔字母。

8.数学函数

SELECT
1 * e() AS E,
1 * pi() AS PI,
sqrt(25) AS sqrt_25, --接受一个数值类型的参数并返回它的平方根。
cbrt(27) AS cbrt_27, --接受一个数值类型的参数并返回它的立方根。
exp(10), --接受一个数值类型的参数并返回它的指数
exp10(10), --接受一个数值类型的参数并返回它的10的x次幂。
log(10) AS LOG,
log2(10) AS LOG2, --接受一个数值类型的参数并返回它的底2对数。
ln(e()) AS LOG10; --接受一个数值类型的参数并返回它的自然对数

SELECT 
    1 * e() AS E,
    1 * pi() AS PI,
    sqrt(25) AS sqrt_25,
    cbrt(27) AS cbrt_27,
    exp(10),
    exp10(10),
    log(10) AS LOG,
    log2(10) AS LOG2,
    ln(e()) AS LOG10

Row 1:
──────
E:         2.718281828459045
PI:        3.141592653589793
sqrt_25:   5
cbrt_27:   3.0000000000000004
exp(10):   22026.46579482316
exp10(10): 10000000000
LOG:       2.3025850938475476
LOG2:      3.321928094887362
LOG10:     0.9999999987491066

-- 示例:三西格玛准则
SELECT erf(3 / sqrt(2)); -- 0.997
SELECT
sin(90), -- 返回x的三角正弦值。
cos(90), -- 返回x的三角余弦值。
tan(90), -- 返回x的三角正切值
acos(0), -- 返回x的反三角余弦值。
asin(1), -- 返回x的反三角正弦值。
atan(45); -- 返回x的反三角正切值。

SELECT 
    sin(90),
    cos(90),
    tan(90),
    acos(0),
    asin(1),
    atan(45)

┌────────────sin(90)─┬─────────────cos(90)─┬────────────tan(90)─┬────────────acos(0)─┬────────────asin(1)─┬───────────atan(45)─┐
│ 0.8939966636005579-0.4480736161291701-1.9952004122082421.57079632679489661.57079632679489661.5485777614681775 │
└────────────────────┴─────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┘


-- pow(x, y), power(x, y) 接受x和y两个参数。返回x的y次方。
SELECT
pow(2, 3), -- 2的三次方
pow(3, 2); -- 3的平方

SELECT 
    pow(2, 3),
    pow(3, 2)

┌─pow(2, 3)─┬─pow(3, 2)─┐
│         89 │
└───────────┴───────────┘

SELECT
intExp2(4), --2^4 接受一个数值类型的参数并返回它的2的x次幂(UInt64)。
intExp10(2);--10^2 接受一个数值类型的参数并返回它的10的x次幂(UInt64)。

SELECT 
    intExp2(4),
    intExp10(2)

┌─intExp2(4)─┬─intExp10(2)─┐
│         16100 │
└────────────┴─────────────┘

9.取整函数

– 1.向下取整:floor(x[,N])

SELECT 
    floor(toFloat32(12.08098), 2),
    floor(toFloat32(12.2323), 2),
    floor(toFloat32(12.89788), -1),
    floor(toFloat32(12.0959), 3),
    floor(toFloat32(12.0987), 3),
    floor(10, 2)\G

Row 1:
──────
floor(toFloat32(12.08098), 2):  12.08
floor(toFloat32(12.2323), 2):   12.23
floor(toFloat32(12.89788), -1): 10
floor(toFloat32(12.0959), 3):   12.095
floor(toFloat32(12.0987), 3):   12.098
floor(10, 2):                   10

– 2.四舍五入:round(expression [, decimal_places])
– 如果decimal_places=0,则取整数;
– 如果>0,则将值舍入小数点右侧;
– 如果<0,则将小数点左侧的值四舍五入。

SELECT
round(toFloat32(12.1234), 3),
round(toFloat32(12.0025), 3),
round(toFloat32(12.0025), 4), 
round(toFloat32(12.0025002323), 100); 

┌─round(toFloat32(12.1234), 3)─┬─round(toFloat32(12.0025), 3)─┬─round(toFloat32(12.0025), 4)─┬─round(toFloat32(12.0025002323), 100)─┐
│                       12.12312.00212.002512.002501 │
└──────────────────────────────┴──────────────────────────────┴──────────────────────────────┴──────────────────────────────────────┘

--示例:
SELECT 
    round(toFloat32(10 / 3)),
    round(toFloat32(10 / 3), 2),
    round(toFloat32(10. / 3), 3),
    round(toFloat32(10. / 3), 6)

Row 1:
──────
round(toFloat32(divide(10, 3))):     3
round(toFloat32(divide(10, 3)), 2):  3.33
round(toFloat32(divide(10., 3)), 3): 3.333
round(toFloat32(divide(10., 3)), 6): 3.333333

–roundToExp2() 接受一个数字。如果数字小于1,则返回0。否则,它将数字向下舍入到最接近的(整个非负)2的x次幂。

SELECT
roundToExp2(12.0129), 
roundToExp2(toFloat32(0.01));

┌─roundToExp2(12.0129)─┬─roundToExp2(toFloat32(0.01))─┐
│                    80.0078125 │
└──────────────────────┴──────────────────────────────┘

–3.向上取整:ceil(x[, N]) 或者 ceiling(x[, N])

SELECT
ceil(12.34343, 3), 
ceil(toFloat64(12.34343), 3), 
ceil(toFloat32(12.34343), 3), 
ceil(12.0011, 3); 

┌─ceil(12.34343, 3)─┬─ceil(toFloat64(12.34343), 3)─┬─ceil(toFloat32(12.34343), 3)─┬─ceil(12.0011, 3)─┐
│            12.34412.34412.34412.002 │
└───────────────────┴──────────────────────────────┴──────────────────────────────┴──────────────────┘

10.随机函数

随机函数
– 解释:随机函数使用非加密方式生成【伪随机】数字。
– ① 所有随机函数都只接受一个参数或不接受任何参数。
– ② 您可以向它传递任何类型的参数,但传递的参数将不会使用在任何随机数生成过程中。
– ③ 此参数的唯一目的是防止公共子表达式消除,以便在相同的查询中使用相同的随机函数生成不同的随机数
– rand() 函数:返回一个UInt32类型的随机数字,所有UInt32类型的数字被生成的概率均相等。
– rand64() 函数:返回一个UInt64类型的随机数字,所有UInt64类型的数字被生成的概率均相等。
– randConstant() 函数:返回一个UInt32类型的随机数字,该函数不同之处在于仅为每个数据块参数一个随机数。

SELECT
rand(), 
rand(10), 
rand64(),
rand64(10),
randConstant(),
randConstant();

┌─────rand()─┬──rand(10)─┬─────────────rand64()─┬───────────rand64(10)─┬─randConstant()─┬─randConstant()─┐
│ 2049392190540304723139567648680508982121840888162783907290436242500773624250077 │
└────────────┴───────────┴──────────────────────┴──────────────────────┴────────────────┴────────────────┘

更多精彩内容请关注
在这里插入图片描述

  • 1
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值