下载链接:
放ubuntu主目录下
//创建文件夹
hdfs dfs -mkdir /hadoopclass/ESportEarnings hdfs dfs -mkdir /hadoop_class/esport
//从主目录导入文件到hadoopclass
hdfs dfs -copyFromLocal ~/ESportEarnings.csv /hadoopclass/ESportEarnings hdfs dfs -copyFromLocal ~/ESportEarnings1.csv /hadoopclass/esport
//删除hadoop中文件 hadoop fs -rm /hadoopclass/ESportEarnings/ESport_Earnings.csv
hive中操作
//创建表 create external table game ( IdNo int, TotalMoney double, GameName string, Genre string, PlayerNo double, TournamentNo double, TopCountry string, TopCountryEarnings double, Releaseyear double) row format delimited fields terminated by ',' location'/hadoopclass/ESport_Earnings';
//创建表2 create external table esports ( IdNo int, TotalMoney double, GameName string, Genre string, PlayerNo double, TournamentNo double, TopCountry string, TopCountryEarnings double, Releaseyear double ) row format serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde' stored as textfile location'/hadoopclass/esport';
//删除表 drop table game;
//忽略第一行表头 alter table eport set TBLPROPERTIES("skip.header.line.count"="1");
//什么游戏奖金多? SELECT GameName, MAX(TotalMoney) AS MaxEarnings FROM game GROUP BY GameName ORDER BY MaxEarnings DESC limit 3;
//有哪些国家是电竞大国,获得奖金最多。 SELECT TopCountry, SUM(TopCountryEarnings) AS Esportspowerhouse FROM game GROUP BY TopCountry ORDER BY Esportspowerhouse DESC limit 3;
//有哪些国家是电竞真大国,人均获得奖金最多。 select TopCountry,(sum(TopCountryEarnings)/sum(PlayerNo)) as Esportspowerhouse from game group by TopCountry order by Esportspowerhouse DESC limit 3;
//策略类游戏的趋势,每年的增长率0 select Genre,Releaseyear from game where Genre = "Strategy" order by Releaseyear DESC limit 10;
SELECT Releaseyear, SUM(TotalMoney) FROM esports WHERE Genre='Strategy' GROUP BY Releaseyear;
//创建中间表 CREATE TABLE gamestrategyyearly AS SELECT ReleaseYear, SUM(TotalMoney) AS TotalMoneySum FROM esports WHERE Genre = 'Strategy' GROUP BY ReleaseYear;
//搜索 SELECT t2.releaseyear,(t1.money-t2.money)/t2.money FROM gamestrategyyearly t1,gamestrategyyearly t2 WHERE t1.releaseyear = t2.releaseyear-1;
select * from gamestrategyyearly;
//最近一年,什么游戏的增长势头最强劲
过滤2020,2019
按类型分组
对总奖金求和
CREATE TABLE esport_2020 AS SELECT Genre, SUM(TotalMoney) AS money FROM esports WHERE releaseyear=2020 GROUP BY Genre; //2020年各种游戏的奖金总和
CREATE TABLE esport_2019 AS SELECT Genre, SUM(TotalMoney) AS money FROM esports WHERE releaseyear=2019 GROUP BY Genre; //2020年各种游戏的奖金总和
SELECT t1.Genre, (t1.money-t2.money)/t2.money as per FROM esport2020 t1,esport2019 t2 WHERE t1.Genre = t2.Genre ORDER BY per DESC; //增长率