一、建表
---建立一次性表格
drop table if exists table_name;
create table table_name as select from table_name... ;
create table if not exists table_name;
---建立累积插入表格
1、
create table table_name(a string , b string , c decimal);
insert overwrite table table_name partition(ds='${bizdate}') SELECT * from table_name1...;---partition(ds='${bizdate}')为分区时间
2、
create table if not exists table_name(stat_date string comment '统计日期')COMMENT '高计算费用---节点' PARTITIONED BY (ds STRING COMMENT 'yyyymmdd')LIFECYCLE 30; ---comment是备注、注释----的意思; partition by关键字是分析性函数的一部分,它和聚合函数(如group by)不同的地方在于它能返----回一个分组中的多条记录;LIFECYCLE表示表的生命周期,临时表生命周期不要设置太久。
INSERT OVERWRITE TABLE table_name PARTITION(ds = '${bizdate}')SELECT t1.store_id
二、获取文本部分内容
1、截取字符串
substr(col,1,10)
2、获取josn字段中的内容
select get_json_object(json '{"a":1, "b":2}', '$.a');
--返回{"b":"1","a":"2"}。一个JSON对象中可以出现相同的Key,可以成功解析,返回第一个Value值。换行符(\n)、引号(")等JSON保留字符使用字符串'\n'、'\"'显示。
select get_json_object('{"b":{"b":"1","a":"2"},"a":"2"}', '$.b');
---提取JSON对象src_json.json中的信息
src_json = {"store":
{"fruit":[{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}],
"bicycle":{"price":19.95,"color":"red"}
},
"email":"amy@only_for_json_udf_test.net",
"owner":"amy"
}
--提取owner字段信息,返回amy。
select get_json_object(src_json.json, '$.owner') from src_json
--提取store.fruit字段第一个数组信息,返回{"weight":8,"type":"apple"}。
select get_json_object(src_json.json, '$.store.fruit[0]') from src_json;
3、切割取值
SUBSTRING_INDEX(SUBSTRING_INDEX(index_list_06,'TOTAL:',-1),';',1) AS 实物数量;----AVAIL:2.0;AVAIL_NOMINUS:2.0;FROZEN:0.0;FROZEN_NOMINUS:0.0;TOTAL:2.0;TOTAL_NOMINUS:2.0
4、提取数组型JSON对象的信息
--返回2222。
select get_json_object('{"array":[["aaaa",1111],["bbbb",2222],["cccc",3333]]}','$.array[1][1]');
--返回["h0","h1","h2"]。
set odps.sql.udf.getjsonobj.new=true;
select get_json_object('{"aaa":"bbb","ccc":{"ddd":"eee","fff":"ggg","hhh":["h0","h1","h2"]},"iii":"jjj"}','$.ccc.hhh[*]');
--返回h1。
select get_json_object('{"aaa":"bbb","ccc":{"ddd":"eee","fff":"ggg","hhh":["h0","h1","h2"]},"iii":"jjj"}','$.ccc.hhh[1]');
5、提取带有.的JSON对象中的信息
--创建一张表。
create table mf_json (id string, json string);
--向表中插入数据,Key带.。
insert into table mf_json (id, json) values ("1", "{
\"China.beijing\":{\"school\":{\"id\":0,\"book\":[{\"title\": \"A\",
\"price\": 8.95},{\"title\": \"B\",\"price\": 10.2}]}}}");
--向表中插入数据,Key不带.。
insert into table mf_json (id, json) values ("2", "{
\"China_beijing\":{\"school\":{\"id\":0,\"book\":[{\"title\": \"A\",
\"price\": 8.95},{\"title\": \"B\",\"price\": 10.2}]}}}");
--取id的值,查询key为China.beijing,返回0。由于包含.,只能用['']来解析。
select get_json_object(json, "$['China.beijing'].school['id']") from mf_json where id =1;
--取id的值,查询key为China_beijing,返回0。查询方法有如下两种。
select get_json_object(json, "$['China_beijing'].school['id']") from mf_json where id =2;
select get_json_object(json, "$.China_beijing.school['id']") from mf_json where id =2;
6、JSON字符串涉及转义
set odps.sql.udf.getjsonobj.new=true;
--返回"1"。
select get_json_object('{"a":"\\"1\\"","b":"2"}', '$.a');
--返回'1'。
select get_json_object('{"a":"\'1\'","b":"2"}', '$.a');
三、时间处理
--获取时间对应星期几
WEEKDAY(TO_date(t.周度日期,'yyyy-mm-dd')) as 星期
--对应本年的周数
WEEKOFYEAR(TO_date(t.周度日期,'yyyy-mm-dd'))as 当年周数
--对应月份
substr(TO_date(t.日期,'yyyy-mm-dd'),1,7) as 月份
--当前日期
getdate();
to_char(getdate(), 'yyyy-MM-dd');
--滚动时间范围取值
sign_time >= DATEADD(TO_DATE(MAX_PT('table_name'),'YYYYMMdd'),-30,'dd') AND sign_time < DATEADD(TO_DATE(MAX_PT('table_name'),'YYYYMMdd'),1,'dd')
---时间对应天数的日期
DATEADD(GETDATE(),-14,'DAY')
四、其他
---UNION
select 月份,单据仓库code,仓库名称,sum(price) as 当期数, '越库配货额' as 指标项
from hema_fdc_outsource_data_handle_yqz where source_order_type = '越库'
group by 月份,单据仓库code,仓库名称
UNION
select 月份,单据仓库code,仓库名称,sum(price) as 当期数, '配货额' as 指标项
from hema_fdc_outsource_data_handle_yqz
---常规语句
select a , sum(b) as b * from table_name where a =''and b between 1 and 2 group by a order by sum(b) desc;
---显示分区
show PARTITIONS wdk_wms.adl_wdk_warehouse_sku_for_hmrex;
---case ... when ...then...end as
case col when <100 then '不及格'
when >=100 and < 150 then '良'
else '优秀'
end as col1
--IF函数
SUM(IF(c.item_status != -1, 1, 0)) AS released
SUM(IF((if_app = 'true' AND c.item_status = 1), 1, 0)) AS app_on_sale
regexp_extract(a.content, 'if_app":(.*?)(,|$)', 1) AS if_app
COUNT(DISTINCT CASE WHEN order_channel = 2 THEN to_char(gmt_create, 'yyyy-MM-dd') END) AS cnt_day_app
--窗口函数
SUM(A.总消耗量) OVER (partition by A.仓编码,A.商品编码 ORDER BY A.统计日期,A.仓编码,A.商品编码 rows between 6 preceding and current row)
--join
join\left join ...on \right join ... on
---字段处理
substr(col , 1,10)
get_json_object(col , '$.TAX_PRICE')
SUBSTRING_INDEX(SUBSTRING_INDEX(index_list_06,'TOTAL:',-1),';',1) AS 实物数量,
---分区选择
dt = max_pt('hm_ods.s_delivery_cargo_item_detail_wdk_ums_app')