1、常用日期函数
year()
month()
date()
unix_timestamp()
from_timestamp()
dateformat
2、使用java编写udf及使用的流程
继承UDF
编写返回值Intwritetable 形参类型为Text
1、java继承udf类编写udf函数(evaluate())(一个类一个方法)
2、打fat包(包括所有依赖文件)
3、把jar包放到linux上
–永久udf函数
4、在Linux命令行使用hdfs命令把jar上传到hdfs的路径
5、create function 函数名 as ‘方法的全类名’
using jar ‘hdfs jar包的hdfs路径’;
3、常用字符串
concat()
cast() 类型转换
separatorChar是字段之间的分隔符
quoteChar是包括字段的符号,比如单引号、双引号
escapeChar是不处理的字符
drop table ext_customer_details;
create external table if not exists ext_customer_details(
customer_id string,
first_name string,
last_name string,
email string,
gender string,
address string,
country string,
language string,
job string,
credit_type string,
credit_no string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties(
"separatorChar"=",",
"escapeChar"="\\"
)
stored as textfile
drop table transaction_details;
create external table if not exists transaction_details(
transaction_id string,
customer_id string,
store_id string ,
price decimal(10,2) ,
product string,
purchase_date string,
purchase_time string ) partitioned by (month string)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties(
"separatorChar"=",",
"quoteChar"="\"",
"escapeChar"="\\"
)
stored as textfile;
drop table ext_transaction_details;
create external table if not exists ext_transaction_details(
transaction_id string,
customer_id string,
store_id string,
price decimal(10,2),
product string,
purchase_date string,
purchase_time string
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties(
"separatorChar"=",",
"quoteChar"="\"",
"escapeChar"="\\"
)
stored as textfile
location '/apps/shopping/transaction';
create external table if not exists ext_store_details(
store_id string,
store_name string,
employee_number int
)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties(
"separatorChar"=",",
"escapeChar"="\\"
)
stored as textfile
location '/apps/shopping/detalis';
create external table if not exists ext_store_review (
stransaction_id string,
store_id string,
review_store string)
row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
with serdeproperties(
"separatorChar"=",",
"escapeChar"="\\"
)
stored as textfile
location '/apps/shopping/review';
with base as (
select concat( transaction_id,"_",rank()
over(partition by transaction_id order by customer_id))
transaction_id,customer_id,store_id,price,
product,purchase_date,purchase_time,
concat(year(purchase_date),month(purchase_date)) purchase_month
from ext_transaction_details where transaction_id <> 'transaction_id')
insert into transaction_details partition(month) select * from base;