hive shell
创建外表,指定目录
CREATE EXTERNAL TABLE IF NOT EXISTS songs2 (
sid string,
aid string,
ptime string,
sinit int,
language int,
gender int)
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bs/music/songs/';
导入HDFS文件(原文件消失)
LOAD DATA INPATH '/bs/music/input/mars_tianchi_songs.csv' OVERWRITE INTO TABLE songs2;
查看前10条数据
select * from songs2 limit 10;
创建外表,指定目录
CREATE EXTERNAL TABLE IF NOT EXISTS useraction (
uid string,
sid string,
btime string,
atype int,
ds string)
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bs/music/useraction/';
导入HDFS文件(原文件消失)
LOAD DATA INPATH '/bs/music/input/mars_tianchi_user_actions.csv' OVERWRITE INTO TABLE useraction;
select * from useraction limit 10;
表连接 小表在前
SELECT t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
Map join连接
SELECT /*+mapjoin(songs2)*/ t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
导出查询数据到hdfs
INSERT OVERWRITE DIRECTORY '/bs/music/data'
SELECT /*+mapjoin(songs2)*/ t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
查询结果保存到表
CREATE EXTERNAL TABLE IF NOT EXISTS usersongs (
sid string,
aid string,
ptime string,
sinit int,
language int,
gender int,
uid string,
sid2 string,
btime string,
atype int,
ds string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bs/music/data/';
INSERT OVERWRITE TABLE usersongs
SELECT /*+mapjoin(songs2)*/ t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
查询结果保存到本地
hive -e "select * from usersongs limit 10" >> /opt/tools/test.csv
查看HDFS上文件的前5行
hadoop fs -text /bs/music/data/000000_0 |head -n 5
去重分组查询 同一aid的uid去重总量,sid的去重总量
select count(distinct uid),count(distinct sid),aid from usersongs where atype=1 group by aid;