Ip访问频率
Hive中创建日志表
hive> create table KINDWEB_access_log_5_20(ip string, s1 string, s2 string, date string, way string, url string, w1 string, w2 string, w3 string) row format delimited fields terminated by ' '; |
导入日志数据到hive
hive> load data inpath '/testdata/accessdata/KINDWEB/201505/KINDWEB_access_log.2015-05-20.txt' overwrite into table KINDWEB_access_log_5_20; |
查看表结构
hive> desc KINDWEB_access_log_5_20; |
查看表数据
hive> select * from KINDWEB_access_log_5_20; |
查看表前10条数据
hive> select * from KINDWEB_access_log_5_20 limit 10; |
select查询ip访问频率
hive> select ip,count(*) from kindweb_access_log_5_20 group by ip; |
创建Ip访问频率视图
hive> create view view_kindweb_access_log_5_20(ip,count) as select ip,count(*) from kindweb_access_log_5_20 group by ip; OK ip _c1 Time taken: 0.666 seconds hive> |
或者
hive> create view view_kindweb_access_log_5_20_order(ip,count) as select ip,count(*) ip_count from kindweb_access_log_5_20 group by ip order by ip_count; OK ip ip_count Time taken: 0.087 seconds hive> select * from view_kindweb_access_log_5_20_order limit 10; |
查看IP访问频率视图前10条数据
创建hive表存放IP访问频率数据
hive> create table tb_kindweb_access_log_5_20_order(ip string,ip_count string) row format delimited fields terminated by ' '; |
hive> insert into tb_kindweb_access_log_5_20_order select ip,count(*) ip_count from kindweb_access_log_5_20 group by ip order by ip_count; |