项目实战day4

将csv文件导入到hive上

先将csv文件存进虚拟机里面

再将csv文件存入集群里面

在pycharm中连接hive中,并进行建数据库与建表

create external table if not exists 10w (
msg_time string comment "消息发送时间"
, sender_name string comment "发送人昵称"
, sender_account string comment "发送人账号"
, sender_sex string comment "发送人性别"
, sender_ip string comment "发送人ip地址"
, sender_os string comment "发送人操作系统"
, sender_phonetype string comment "发送人手机型号"
, sender_network string comment "发送人网络类型"
, sender_gps string comment "发送人的GPS定位"
, receiver_name string comment "接收人昵称"
, receiver_ip string comment "接收人IP"
, receiver_account string comment "接收人账号"
, receiver_os string comment "接收人操作系统"
, receiver_phonetype string comment "接收人手机型号"
, receiver_network string comment "接收人网络类型"
, receiver_gps string comment "接收人的GPS定位"
, receiver_sex string comment "接收人性别"
, msg_type string comment "消息类型"
, distance string comment "双方距离"
, message string comment "消息内容"
) ROW FORMAT DELIMITED FIELDS TERMINATED BY '' STORED AS TEXTFILE;
load data inpath '/chat_data-10W.csv' into table 10w;

load data inpath 'csv文件在集群中的位置' into table 表名;

最后成果如下图

数据查询

统计今日信息总量

select
    substr(msg_time,1,10)as message_date,
       count(*) as daily_message_count
from 10w
group by
    substr(msg_time,1,10);

统计每小时消息量,发送和接收用户数

create table if not exists 10w2 comment '每小时消息量趋势'
as select dayinfo, hourinfo,
count(*) as total_msg_cnt,
count(distinct sender_account) as sender_usr_cnt,
count(distinct receiver_account) as receiver_usr_cnt
from 10w
group by dayinfo, hourinfo;

统计今日发送消息最多的Top10用户

select
    sender_account,
       count(*) as message_count
from
    10w
where
    substr(msg_time,1,10) = '2023-01-01'
group by
    sender_account
order by
    message_count desc
limit 10

统计发送人的设备操作系统分布情况

select
    sender_os as operating_system,
    count(*) as count
from
    10w
group by
    sender_os
order by
    count desc

统计发送人的手机型号分布情况

select
    sender_phonetype as phone_model,
    count(*) as model_count
from 10w
group by sender_phonetype
order by model_count desc

  • 9
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值