Hive基础+案例

h ive shell
查看所有数据库:show databases;
创建数据库:create database database_name;
切换数据库:use database_name;
查看所有表:show tables;
模糊查询表:show tables like '*name*';
查看所有的hive函数:show functions;
查看table的表结构: desc tablesname ;   desc formatted table_name;
查看表创建的脚本:show create table uv_etltest; 
删除表:drop table uv_etltest; 
查看分区信息:show partitions table_name; 

案例:countpv日志。l=字段匹配正则:m\.fang\.com/news/\w+/03_\d+.html,且l=不包含sf_source=的每个标黄的id的PV和UV。
日志格式: 8801b688-1492445566729-8c8230bd^u_50cfe454-1499154971810-2c970e6d^0^http^m.fang.com^/news/sh/03_25116147.html^sf_source=ttcollaborate^5^无^无^/^^0^无^122.194.3.70^0^1^0^1^[1]^无^2017-07-04^15^56^11^2017-07-04^15^56^11^^^

show databases;
create database uvtest;
use uvtest;
show tables;
desc uv_etltest;
show create table uv_etltest;
show partitions uv_etltest;

创建表:
CREATE EXTERNAL TABLE IF NOT EXISTS uv_etltest (
Visitor String
,Visit String
,IsFirstVisit int
,LocationProtocol String
,LocationDomain String
,LocationPath String
,LocationParametersString String
,RefererType String
,RefererProtocol String
,RefererDomain String
,RefererPath String
,RefererParametersString String
,SearchGroupType String
,SearchKeyword String
,IP String
,OpenPageType String
,UserType String
,UserInfo String
,Mouse int
,BIDs String
,BBSSign String
,ServerTimeDate date
,ServerTimeHour String
,ServerTimeMinute String
,ServerTimeSecond String
,ClientTimeDate date
,ClientTimeHour String
,ClientTimeMinute String
,ClientTimeSecond String
,Title String
,UrlOwner String
,PageID String
)
PARTITIONED BY (logdate INT)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '^'
LINES TERMINATED BY '\n'
STORED AS TEXTFILE

添加分区 :ALTER TABLE uv_etltest  ADD PARTITION (logdate= 20170704)  LOCATION '/logs/uv/etlbeginchar/20170704/';
修改分区的location :ALTER TABLE uv_etltest PARTITION (logdate='20170704') SET LOCATION '/logs/uv/etlbeginchar/20170704/';
修改分区的信息 : ALTER TABLE table_name PARTITION (logdate='20170704') RENAME TO PARTITION (logdate='20170504');
删除分区 :ALTER TABLE uv_etltest DROP IF EXISTS PARTITION (logdate='20170704');

查询结果
Select split(split(LocationPath,'03_')[1],'.html')[0] as id,count(Visitor) as pv,count(distinct(Visitor)) as uv from uv_etltest where logdate= 20170704 and concat(LocationDomain,LocationPath,LocationParametersString) not like '%sf_source=%' and concat(LocationDomain,LocationPath) regexp 'm\\.fang\\.com/news/\\w+/03_\\d+.html' group by split(split(LocationPath,'03_')[1],'.html')[0] order by pv desc;

结果存放在hdfs上,并以空格分开
set hive.merge.mapredfiles= true;
insert overwrite directory '/user/hive/warehouse/uvtest.db/uv_etltest' row format delimited fields terminated by "\t" 
Select split(split(LocationPath,'03_')[1],'.html')[0] as id,count(Visitor) as pv,count(distinct(Visitor)) as uv from uv_etltest where logdate= 20170704 and concat(LocationDomain,LocationPath,LocationParametersString) not like '%sf_source=%' and concat(LocationDomain,LocationPath) regexp 'm\\.fang\\.com/news/\\w+/03_\\d+.html' group by split(split(LocationPath,'03_')[1],'.html')[0] order by pv desc;

结果存放在新的hive表中
set hive.merge.mapredfiles= true; 
create table uv_etltestresult as
Select split(split(LocationPath,'03_')[1],'.html')[0] as id,count(Visitor) as pv,count(distinct(Visitor)) as uv from uv_etltest where logdate= 20170704 and concat(LocationDomain,LocationPath,LocationParametersString) not like '%sf_source=%' and concat(LocationDomain,LocationPath) regexp 'm\\.fang\\.com/news/\\w+/03_\\d+.html' group by split(split(LocationPath,'03_')[1],'.html')[0] order by pv desc;

修改表名称 :alter table  uv_etltest1 rename to uv_etltestresult;
Select * from uv_etltestresult where pv > 500;


评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值