Day4:大数据处理

                ​​​​​​​        ​​​​​​​        ​​​​​​​      

创建数据库:

create database  if not exists myhive2;

连接数据库:

use myhive2;

创建一个收发人基本信息详情表:

create table myhive2.news(
    send_date STRING comment '日期',
    id STRING  comment '发件人姓名' ,
    send_num STRING comment '发件人手机号',
    send_sex STRING comment '发件人性别',
    send_ip STRING comment '发件人ip地址',
    send_from STRING comment '发件人手机系统',
    send_phone STRING comment  '发件人手机型号',
    send_network STRING comment '发件人手机网络',
    send_site STRING comment '发件人地址',

    re_id STRING comment '收件人姓名' ,
    re_ip STRING comment '收件人ip地址',
    re_num STRING comment '收件人手机号',
    re_from STRING comment '收件人手机系统',
    re_phone STRING comment  '收件人手机型号',
    re_network STRING comment '收件人手机网络',
    re_site STRING comment '收件人地址',
    re_sex STRING comment '收件人性别',

    type STRING comment '文件类型',
    distance STRING comment '距离',
    content STRING comment '发送内容'

)
    COMMENT '信息表';

插入用户信息:

load data local inpath '/chat_data-10W.csv' into table myhive2.news;

(自备的数据chat_data-10W.csv)

实战数据处理:

1.统计今日总消息量
SELECT COUNT(*)
FROM myhive2.news;
2.统计今日发送和接收用户数
SELECT
    COUNT(DISTINCT id) AS total_unique_senders,
    COUNT(DISTINCT re_id) AS total_unique_receivers
FROM myhive2.news;
3.统计每小时消息量,发送和接收用户量
SELECT
    DATE_FORMAT(send_date, 'yyyy-MM-dd HH') AS hour,
    COUNT(*) AS message_count,
    COUNT(DISTINCT id) AS unique_senders,
    COUNT(DISTINCT re_id) AS unique_receivers
FROM myhive2.news
GROUP BY DATE_FORMAT(send_date, 'yyyy-MM-dd HH')
ORDER BY hour;
4.查询每个人发了多少条信息
select id,count(*) from myhive2.news group by id;
SELECT
    id AS person_name,
    COUNT(*) AS message_count
FROM myhive2.news
GROUP BY id
ORDER BY message_count DESC;
5.统计发送人的设备操作系统分布情况
SELECT
    send_from AS operating_system,
    COUNT(*) AS count
FROM myhive2.news
GROUP BY send_from;
6.统计发送人的手机型号分布情况
SELECT send_phone, COUNT(*) AS count
FROM myhive2.news
GROUP BY send_phone;
7.统计发送人的网络类型分布情况
SELECT send_network, COUNT(*) AS count
FROM myhive2.news
GROUP BY send_network;
8.统计发送消息各地区数据量
SELECT
    send_site AS region,
    COUNT(*) AS message_count
FROM myhive2.news
GROUP BY send_site;
  • 9
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值