大数据综合实例—陌陌聊天数据分析
1. 需求分析

- 数据大小:14万条
- 列分隔符:制表符 \t

2. 建库建表与加载数据
drop database if exists db_msg cascade;
create database db_msg;
use db_msg;
create table db_msg.tb_msg_source(
msg_time string comment "消息发送时间"
, sender_name string comment "发送人昵称"
, sender_account string comment "发送人账号"
, sender_sex string comment "发送人性别"
, sender_ip string comment "发送人ip地址"
, sender_os string comment "发送人操作系统"
, sender_phonetype string comment "发送人手机型号"
, sender_network string comment "发送人网络类型"
, sender_gps string comment "发送人的GPS定位"
, receiver_name string comment "接收人昵称"
, receiver_ip string comment "接收人IP"
, receiver_account string comment "接收人账号"
, receiver_os string comment "接收人操作系统"
, receiver_phonetype string comment "接收人手机型号"
, receiver_network string comment "接收人网络类型"
, receiver_gps string comment "接收人的GPS定位"
, receiver_sex string comment "接收人性别"
, msg_type string comment "消息类型"
, distance string comment "双方距离"
, message string comment "消息内容"
)
row format delimited fields terminated by '\t';