【无标题】数仓实战之亚马逊全球跨境电商分析平台

一、Sqoop框架
 --1. 验证sqoop
cd /export/server/sqoop-1.4.7

bin/sqoop list-databases \
--connect jdbc:mysql://node1:3306/ \
--username root --password 123456


二、拉链表-Demo
-- 1、MySQL创建商品表
-- 创建数据库 
CREATE DATABASE demo; -- 创建商品表 
create table if not exists demo.product( 
goods_id varchar(50), -- 商品编号 
goods_status varchar(50), -- 商品状态 
createtime varchar(50), -- 商品创建时间 
modifytime varchar(50) -- 商品修改时间 
) ; 

--2、Hive-ODS层建表
-- 创建表
create database if not exists demo;

-- 创建ods层表
create table if not exists demo.ods_product(
  goods_id string,        -- 商品编号
  goods_status string,    -- 商品状态
  createtime string,      -- 商品创建时间
  modifytime string       -- 商品修改时间
)
partitioned by (dt string)   --按照天分区
row format delimited fields terminated by '\t';

--3、Hive dw层创建拉链表
-- 创建拉链表
create table if not exists demo.dw_product(
  goods_id string,        -- 商品编号
  goods_status string,    -- 商品状态
  createtime string,      -- 商品创建时间
  modifytime string,       -- 商品修改时间
  dw_start_date string,   -- 生效日期
  dw_end_date string      -- 失效日期
)
row format delimited fields terminated by '\t';

--4、MySQL数据库导入12月20日数据(4条数据)
insert into demo.product(goods_id, goods_status, createtime, modifytime) values
('001', '待审核', '2020-12-18', '2020-12-20'),
('002', '待售', '2020-12-19', '2020-12-20'),
('003', '在售', '2020-12-20', '2020-12-20'),
('004', '已删除', '2020-12-15', '2020-12-20');

--5、使用sqoop进行全量同步MySQL数据到Hive ods层表,创建Hive分区,并导入mysql的数据
-- 创建分区
alter table demo.ods_product  add  partition (dt='2020-12-20');

--将mysql数据导入Hive
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/demo \
--username root \
--password 123456 \
--table product \
--m 1  \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/demo.db/ods_product/dt=2020-12-20

--6 编写SQL从ods导入dw当天最新的数据

-- 从ods层导入dw当天最新数据
insert overwrite table demo.dw_product
select
  goods_id,                -- 商品编号
  goods_status,            -- 商品状态
  createtime,              -- 商品创建时间
  modifytime,              -- 商品修改时间
  modifytime as dw_start_date,    -- 生效日期
   '9999-12-31' as dw_end_date     -- 失效日期
from
  demo.ods_product
where
  dt = '2020-12-20';

--7、增量导入2019年12月21日数据- MySQL数据库导入12月21日数据(6条数据)
--模拟更新数据
UPDATE demo.product SET goods_status = '待售', modifytime = '2020-12-21' WHERE goods_id = '001';
INSERT INTO demo.product(goods_id, goods_status, createtime, modifytime) VALUES
('005', '待审核', '2020-12-21', '2020-12-21'),
('006', '待审核', '2020-12-21', '2020-12-21');

--8、使用sqoop开发增量同步MySQL数据到Hive ods层表
--Hive创建分区
alter table demo.ods_product add  partition (dt='2020-12-21');
--增量导入
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/demo \
--username root \
--password 123456 \
--target-dir /user/hive/warehouse/demo.db/ods_product/dt=2020-12-21 \
--query "select * from product where modifytime = '2020-12-21' and \$CONDITIONS" \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1 

--8、编写SQL处理dw层历史数据,重新计算之前的dw_end_date

 app 层
 dw  层  (2020-12-20)   dw_product
 ods 层  (2020-12-20 、 2020-12-21)
-- 重新计算dw层拉链表中的失效时间
select
  t1.goods_id,                -- 商品编号
  t1.goods_status,            -- 商品状态
  t1.createtime,              -- 商品创建时间
  t1.modifytime,              -- 商品修改时间
  t1.dw_start_date,           -- 生效日期(生效日期无需重新计算)
  case when (t2.goods_id is not null and t1.dw_end_date > '2020-12-21')
  then '2020-12-21'
  else t1.dw_end_date
  end as dw_end_date       -- 更新生效日期(需要重新计算)
from
  demo.dw_product t1
  left join
  (select * from demo.ods_product where dt='2020-12-21') t2
   on t1.goods_id = t2.goods_id

--9、合并当天最新的数据和历史数据

insert overwrite table demo.dw_product
select
  t1.goods_id,                -- 商品编号
  t1.goods_status,            -- 商品状态
  t1.createtime,              -- 商品创建时间
  t1.modifytime,              -- 商品修改时间
  t1.dw_start_date,           -- 生效日期(生效日期无需重新计算)
  case when (t2.goods_id is not null and t1.dw_end_date > '2020-12-21')
  then '2020-12-21'
  else t1.dw_end_date
  end as dw_end_date       -- 更新生效日期(需要重新计算)
from
  demo.dw_product t1
  left join
  (select * from demo.ods_product where dt='2020-12-21') t2
   on t1.goods_id = t2.goods_id
union all
select 
  goods_id,                -- 商品编号
  goods_status,            -- 商品状态
  createtime,              -- 商品创建时间
  modifytime,              -- 商品修改时间
  modifytime as dw_start_date,  -- 生效日期
   '9999-12-31' as dw_end_date   -- 失效日期
from
  demo.ods_product where dt='2020-12-21'
order by dw_start_date, goods_id;

--10、拉链表查询
-- 查询2020-12-20数据
 select * from dw_product where dw_start_date <=  '2020-12-20' and dw_end_date >= '2020-12-20' order by goods_id;
-- 查询当前订单的最新状态

 select * from dw_product where dw_end_date = '9999-12-31' order by goods_id ;

三、业务数据分析
--1、项目环境初始化-导入Mysql业务数据
mysql -uroot –p

mysql>source /export/data/hivedatas/10tables.sql;

--2、项目环境初始化-Hive创建分层数据库和表
--使用zeppelin连接Hive,并执行以下语句
create database if not exists itcast_ods;
create database if not exists itcast_dw;
create database if not exists itcast_app;

--3、创建ods层数据表
hive -f /export/data/hive_sql/ods_create_table.hql

--4、 在订单表中创建 2021-01-09分区
alter table itcast_ods.itcast_orders  add partition(dt='2021-01-09');

--5、导入itcast_orders表数据,这里我们先只导入修改时间为2021-01-09日以及之前的数据,后期我们再增量导入其他数据
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_orders/dt=2021-01-09 \
--query "select * from itcast_orders WHERE modifiedTime <= '2021-01-09 23:59:59' and \$CONDITIONS" \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1 

--6、ODS层数据采集-其他事实表采集
--    订单明细事实表采集
/export/server/sqoop-1.4.7/bin/sqoop import  \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_order_goods  \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_order_goods/ \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1

--订单退货表数据采集
/export/server/sqoop-1.4.7/bin/sqoop import  \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_order_refunds \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_order_refunds \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1


--7、ODS层数据采集-维度表数据采集
/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_goods \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_goods \
--delete-target-dir \
--fields-terminated-by '\t' \
--m 1

/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_goods_cats \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_goods_cats \
--m 1  


/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_org \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_org \
--m 1  


/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_payments \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_payments \
--m 1  


/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_shops \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_shops \
--m 1  


/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_user_address \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_user_address \
--m 1  


/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_users \
--delete-target-dir \
--fields-terminated-by '\t' \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_users \
--m 1

--8、测试数据是否都正确被加载
select * from itcast_ods.itcast_orders limit 10;
select * from itcast_ods.itcast_goods limit 10;
select * from itcast_ods.itcast_order_goods limit 10;
select * from itcast_ods.itcast_shops limit 10;
select * from itcast_ods.itcast_goods_cats limit 10;
select * from itcast_ods.itcast_org limit 10;
select * from itcast_ods.itcast_order_refunds limit 10;
select * from itcast_ods.itcast_users limit 10;
select * from itcast_ods.itcast_user_address limit 10;
select * from itcast_ods.itcast_payments limit 10;


-----------------------------------业务数据分析-DW层数据加载-------------------------------
--订单事实表数据加载-拉链表
--1、创建dw层订单拉链表
-- 创建dw层订单事实表--带有分区字段
DROP TABLE IF EXISTS itcast_dw.dw_fact_orders;
create  table itcast_dw.dw_fact_orders (
  orderId            bigint,
  orderNo           string,
  userId             bigint,
  orderStatus        bigint,
  goodsMoney         double,
  deliverType        bigint,
  deliverMoney       double,
  totalMoney         double,
  realTotalMoney     double,
  payType            bigint,
  isPay              bigint,
  areaId             bigint,
  userAddressId      bigint,
  areaIdPath         string,
  userName           string,
  userAddress       string,
  userPhone         string,
  orderScore         bigint,
  isInvoice          bigint,
  invoiceClient     string,
  orderRemarks       string,
  orderSrc           bigint,
  needPay            double,
  payRand            bigint,
  orderType          bigint,
  isRefund           bigint,
  isAppraise         bigint,
  cancelReason       bigint,
  rejectReason       bigint,
  rejectOtherReason string,
  isClosed           bigint,
  goodsSearchKeys   string,
  orderunique       string,
  isFromCart        ,  
  receiveTime       string,
  deliveryTime       string,
  tradeNo           string,
  dataFlag           bigint,
  createTime         string,
  settlementId       bigint,
  commissionFee      double,
  scoreMoney         double,
  useScore           bigint,
  orderCode         string,
  extraJson         string,
  orderCodeTargetId  bigint,
  noticeDeliver      bigint,
  invoiceJson       string,
  lockCashMoney      double,
  payTime           string,
  isBatch            bigint,
  totalPayFee        bigint,
  modifiedTime       string,
  dw_start_date       string,
  dw_end_date         string
)
partitioned by (dt string) --按照天分区
row format delimited fields terminated by '\t';

--2、第一次全量数据导入dw层拉链表
 --订单表数据:ods层导入dw层
insert overwrite table itcast_dw.dw_fact_orders  partition(dt='2021-01-09')
select
orderId,          
orderNo,          
userId,           
orderStatus,      
goodsMoney,       
deliverType,      
deliverMoney,     
totalMoney,       
realTotalMoney,   
payType,          
isPay,            
areaId,           
userAddressId,    
areaIdPath,       
userName,         
userAddress,      
userPhone,        
orderScore,       
isInvoice,        
invoiceClient,    
orderRemarks,     
orderSrc,         
needPay,          
payRand,          
orderType,        
isRefund,         
isAppraise,       
cancelReason,     
rejectReason,     
rejectOtherReason,
isClosed,         
goodsSearchKeys,  
orderunique,      
isFromCart,       
receiveTime,      
deliveryTime,     
tradeNo,          
dataFlag,         
createTime,       
settlementId,     
commissionFee,    
scoreMoney,       
useScore,         
orderCode,        
extraJson,        
orderCodeTargetId,
noticeDeliver,    
invoiceJson,      
lockCashMoney,    
payTime,          
isBatch,          
totalPayFee,      
modifiedTime,
--增加开始时间
date_format(modifiedTime,'yyyy-MM-dd') as dw_start_date,
--增加结束时间
'9999-12-31' as dw_end_date
from itcast_ods.itcast_orders where dt="2021-01-09";

--3、模拟新增和修改数据
--在mysql中添加模拟增量数据,修改orderId=1的订单状态,添加新订单数据
--将资料中的itcast_orders_append_update.sql脚本上传到/export/data/hive_sql/目录

mysql -uroot -p
source /export/data/hive_sql/itcast_orders_append_update.sql

--4、Sqoop抽取增量数据导入ods层
-- 在订单表中创建 2020-09-10分区
alter table itcast_ods.itcast_orders  add partition(dt='2021-01-10');

/export/server/sqoop-1.4.7/bin/sqoop import \
--connect jdbc:mysql://node1:3306/itcast_shop \
--username root \
--password 123456 \
--table itcast_orders \
--target-dir /user/hive/warehouse/itcast_ods.db/itcast_orders/dt=2021-01-10 \
--check-column modifiedTime \
--incremental lastmodified \
--last-value "2021-01-10 00:00:00" \
--fields-terminated-by '\t' \
--m 1 \
--append 

--5、ods层数据合并到dw层拉链表中
--开启动态分区,解释一下动态分区,比如我需要将A表查询到的数据插入到B表,而A表是分区表,如果开启了自动分区功能,则查询到的A表数据原来在哪个分区,则添加到B表字后,B表会自动创建对应的分区。
set hive.exec.dynamici.partition=true;  
set hive.exec.dynamic.partition.mode=nonstrict;

--合并历史数据和最新数据到拉链表sql
insert overwrite  table itcast_dw.dw_fact_orders
select
dw.orderId            ,
dw.orderNo            ,
dw.userId             ,
dw.orderStatus        ,
dw.goodsMoney         ,
dw.deliverType        ,
dw.deliverMoney       ,
dw.totalMoney         ,
dw.realTotalMoney     ,
dw.payType            ,
dw.isPay              ,
dw.areaId             ,
dw.userAddressId      ,
dw.areaIdPath         ,
dw.userName           ,
dw.userAddress        ,
dw.userPhone          ,
dw.orderScore         ,
dw.isInvoice          ,
dw.invoiceClient      ,
dw.orderRemarks       ,
dw.orderSrc           ,
dw.needPay            ,
dw.payRand            ,
dw.orderType          ,
dw.isRefund           ,
dw.isAppraise         ,
dw.cancelReason       ,
dw.rejectReason       ,
dw.rejectOtherReason  ,
dw.isClosed           ,
dw.goodsSearchKeys    ,
dw.orderunique        ,
dw.isFromCart         ,
dw.receiveTime        ,
dw.deliveryTime       ,
dw.tradeNo            ,
dw.dataFlag           ,
dw.createTime         ,
dw.settlementId       ,
dw.commissionFee      ,
dw.scoreMoney         ,
dw.useScore           ,
dw.orderCode          ,
dw.extraJson          ,
dw.orderCodeTargetId  ,
dw.noticeDeliver      ,
dw.invoiceJson        ,
dw.lockCashMoney      ,
dw.payTime            ,
dw.isBatch            ,
dw.totalPayFee        ,
dw.modifiedTime ,
dw.dw_start_date,
--修改end_date
case when ods.orderid is not null and dw.dw_end_date ='9999-12-31'
then '2021-01-10'
else dw.dw_end_date
end as dw_end_date,
--动态分区需要的字段
dw.dt
from 
itcast_dw.dw_fact_orders  dw 
left join 
(select * from itcast_ods.itcast_orders where dt ='2021-01-10') ods
on dw.orderid=ods.orderid 
union all
--今天新增数据的插入动作
select
orderId            ,
orderNo            ,
userId             ,
orderStatus        ,
goodsMoney         ,
deliverType        ,
deliverMoney       ,
totalMoney         ,
realTotalMoney     ,
payType            ,
isPay              ,
areaId             ,
userAddressId      ,
areaIdPath         ,
userName           ,
userAddress        ,
userPhone          ,
orderScore         ,
isInvoice          ,
invoiceClient      ,
orderRemarks       ,
orderSrc           ,
needPay            ,
payRand            ,
orderType          ,
isRefund           ,
isAppraise         ,
cancelReason       ,
rejectReason       ,
rejectOtherReason  ,
isClosed           ,
goodsSearchKeys    ,
orderunique        ,
isFromCart         ,
receiveTime        ,
deliveryTime       ,
tradeNo            ,
dataFlag           ,
createTime         ,
settlementId       ,
commissionFee      ,
scoreMoney         ,
useScore           ,
orderCode          ,
extraJson          ,
orderCodeTargetId  ,
noticeDeliver      ,
invoiceJson        ,
lockCashMoney      ,
payTime            ,
isBatch            ,
totalPayFee        ,
modifiedTime       ,
--增加开始时间
date_format(modifiedTime,'yyyy-MM-dd') as dw_start_date,
--增加结束时间
'9999-12-31' as dw_end_date,
--指定动态分区使用的字段,动态分区的用法:就是查询字段的最后一个字段hive表进行解析然后存入指定分区
--此次数据分区按照订单的修改时间
date_format(modifiedTime,'yyyy-MM-dd') 
from itcast_ods.itcast_orders where dt="2021-01-10";


--6、验证数据查询拉链表数据
select * from itcast_dw.dw_fact_orders limit 15;


--7、其他事实表的加载
-- 创建dw层订单明细表
drop table if exists itcast_dw.dw_fact_order_goods;
create  table itcast_dw.dw_fact_order_goods(
    ogId            bigint,
    orderId         bigint,
    goodsId         bigint,
    goodsNum        bigint,
    goodsPrice      double,
    payPrice        double,
    goodsSpecId     bigint,
    goodsSpecNames  string,
    goodsName       string,
    goodsImg        string,
    extraJson       string,
    goodsType       bigint,
    commissionRate  double,
    goodsCode       string,
    promotionJson   string,
    createtime      string
)
row format delimited fields terminated by '\t';

--将ods层的数据全部加载到dw层
insert overwrite table itcast_dw.dw_fact_order_goods select * from itcast_ods.itcast_order_goods;

-- 创建dw层订单退货表
drop table if exists itcast_dw.itcast_order_refunds;
create  table itcast_dw.dw_fact_order_refunds(
    id                bigint,
    orderId           bigint,
    goodsId           bigint,
    refundTo          bigint,
    refundReson       bigint,
    refundOtherReson  string,
    backMoney         double,
    refundTradeNo     string,
    refundRemark      string,
    refundTime        string,
    shopRejectReason  string,
    refundStatus      bigint,
    createTime        string,
    modifiedTime        string
)
row format delimited fields terminated by '\t';

--将ods层的数据全部加载到dw层
insert overwrite table itcast_dw.dw_fact_order_refunds select * from itcast_ods.itcast_order_refunds;

------------APP层指标开发-指标开发1-时间维度分析业务开发------------------------------
--1、创建app层数据表
drop table if exists itcast_app.app_order_date;
create table itcast_app.app_order_date(
  date_type string,   -- 时间维度类型
  date_val string,    -- 时间维度值
  order_count bigint, -- 订单笔数
  order_money double  -- 订单交易额
)
row format delimited fields terminated by '\t' ;

--2、创建日期维度表
DROP TABLE IF EXISTS itcast_dw.dw_dim_date;
CREATE TABLE itcast_dw.dw_dim_date(
  date_key string,
  date_value string,
  day_in_year string,
  day_in_month string,
  is_first_day_in_month string,
  is_last_day_in_month string,
  weekday string,
  week_in_month string,
  is_first_day_in_week string,
  is_dayoff string,
  is_workday string,
  is_holiday string,
  date_type string,
  month_number string,                        
  year string,
  year_month_number string,
  quarter_name string,
  quarter_number string,
  year_quarter string
)
row format delimited fields terminated by '\t';

--给表加载数据
load data local inpath '/export/data/hivedatas/dim_holiday.txt' into table itcast_dw.dw_dim_date;


--3、统计2020年期间每个季度的销售订单笔数、订单总额
insert overwrite table itcast_app.app_order_date 
select
 '1' as date_type,              -- 时间维度类型,1表示季度
  t1.year_quarter as date_val,  -- 季度名称
  count(orderid),               -- 订单总数
  sum(totalMoney)               -- 订单总额
from 
  itcast_dw.dw_dim_date t1
  left join
  (select * from itcast_dw.dw_fact_orders  where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
   on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by t1.year_quarter
order by t1.year_quarter;

-- 测试
select * from itcast_app.app_order_date where dt='2021-01-09' and date_type = '1';

--4、统计2020年期间每个月的销售订单笔数、订单总额
insert into table itcast_app.app_order_date
select
   '2' as date_type, --2表示月类型
  t1.year_month_number as date_val,
   count(orderid),                  -- 订单总数
   if(sum(totalMoney) is NULL,0,sum(totalMoney))                         -- 订单总额
from
  itcast_dw.dw_dim_date t1
  left join
    (select * from itcast_dw.dw_fact_orders  where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
    on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by 
  t1.year_month_number
order by t1.year_month_number;

---- 测试
select * from itcast_app.app_order_date where date_type = '2';

--5、统计2020年期间每周一到周日的销售订单笔数、订单总额
insert into table itcast_app.app_order_date
select
   '3' as date_type,
   t1.weekday as date_val,
    count(orderid),                  -- 订单总数
 if(sum(totalMoney) is NULL,0,sum(totalMoney))                            -- 订单总额
from 
  itcast_dw.dw_dim_date t1
  left join
  (select * from itcast_dw.dw_fact_orders  where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
    on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by
  t1.weekday
order by t1.weekday;

-- 测试
select * from itcast_app.app_order_date where  date_type = '3';


-- 统计2020年期间国家法定节假日、休息日、工作日的订单笔数、订单总额

insert into table itcast_app.app_order_date 
select
   '4' as date_type,
  t1.date_type as date_val,
   count(orderid) as order_count,                  -- 订单总数
 if(sum(totalMoney) is NULL,0,sum(totalMoney))  -- 订单总额
from 
  itcast_dw.dw_dim_date t1
  left join
  (select * from itcast_dw.dw_fact_orders  where dt >='2020-01-01' and dw_end_date='9999-12-31') t2
    on t1.date_value = date_format(createtime, 'yyyy-MM-dd')
group by
  t1.date_type
order by t1.date_type;

-- 测试
select * from itcast_app.app_order_date where date_type = '4';

------------APP层指标开发-指标开发2-订单分析地域、分类维度分析业务开发------------------------------
--1、创建app(数据集市层)订单分析表
DROP TABLE IF EXISTS itcast_app.app_trade_order;
create table if not exists itcast_app.app_trade_order(
  area_type string,               -- 区域范围:区域类型(全国、大区、城市)
  region_name string,             -- 区域名称
  city_name string,               -- 城市名称
  category_type string,           --分类类型(一级、二级)
  category_1st_name string,       -- 一级分类名称
  category_2d_name string,        -- 二级分类名称
  payment_name string,            -- 支付方式(所有、微信、支付宝、...)
  total_count bigint,             -- 订单数量
  total_goods_num bigint,         -- 商品数量
  total_money double              -- 支付金额
)
row format delimited fields terminated by '\t';


--2、商品分类维度数据表操作
  -- 1:表创建
  DROP TABLE IF EXISTS itcast_dw.dw_dim_goods_cats;
  create table if not exists itcast_dw.dw_dim_goods_cats(
    cat_3d_id string,                   -- 三级商品分类id
    cat_3d_name string,                 -- 三级商品分类名称
    cat_2d_id string,                    -- 二级商品分类Id
    cat_2d_name string,                  -- 二级商品分类名称
    cat_1t_id string,                   -- 一级商品分类id
    cat_1t_name string                  -- 一级商品分类名称
  )
  row format delimited fields terminated by '\t' ;
-- 2. 加载商品分类维度表数据
insert overwrite table itcast_dw.dw_dim_goods_cats
select 
  t3.catid as cat_3d_id,          -- 三级分类id
  t3.catname as cat_3d_name,      -- 三级分类名称
  t2.catid as cat_2d_id,          -- 二级分类id
  t2.catname as cat_2d_name,      -- 二级分类名称
  t1.catid as cat_1t_id,          -- 一级分类id
  t1.catname as cat_1t_name       -- 一级分类名称
from 
  (select catid, catname, parentid from itcast_ods.itcast_goods_cats where cat_level=3) t3 -- 商品三级分类数据
  left join
  (select catid, catname, parentid from itcast_ods.itcast_goods_cats where cat_level=2) t2 -- 商品二级分类数据
   on t3.parentid = t2.catid
  left join
  (select catid, catname, parentid from itcast_ods.itcast_goods_cats where cat_level=1) t1 -- 商品一级分类数据
   on t2.parentid = t1.catid;
   
-- 测试数据
select * from itcast_dw.dw_dim_goods_cats limit 5;

--3、支付方式维度数据表操作
-- 1. 创建支付方式维度表
drop table if exists itcast_dw.dw_dim_payments;
create table if not exists itcast_dw.dw_dim_payments(
  payment_id string,          -- 支付方式id
  payment_name string         -- 支付方式名称
)
row format delimited fields terminated by '\t' ;
 
-- 2. 加载支付方式维度数据
-- 需要额外添加一行数据 0 -> 其他
insert overwrite table itcast_dw.dw_dim_payments 
select 
  t1.id as payment_id,        -- 支付方式id
  t1.payName as payment_name  -- 支付方式名称
from 
  itcast_ods.itcast_payments  t1 ;
 
   -- 测试查询支付方式维度数据
select * from itcast_dw.dw_dim_payments limit 5;

--4、订单明细事实表拉宽
-- 4.1. 创建订单明细事实表
DROP TABLE IF EXISTS itcast_dw.dw_fact_order_goods_wide;
create table if not exists itcast_dw.dw_fact_order_goods_wide(
  order_id string,            -- 订单id
  goods_cat_3d_id string,     -- 商品三级分类id
  shop_id string,             -- 店铺id
  payment_id string,          -- 订单支付方式
  goods_num bigint,           -- 商品数量
  pay_money double,           -- 订单明细金额
  paytime string              -- 订单时间
)
row format delimited fields terminated by '\t' ;
-- 4.2、加载数据
insert overwrite table itcast_dw.dw_fact_order_goods_wide 
select
  t1.orderid as order_id,
  t3.goodscatid as goods_cat_3d_id,
  t3.shopid as shop_id,
  t1.paytype as payment_id,
  t2.goodsnum as goods_num,
  t2.goodsnum*t2.payprice  as pay_money,
  t1.paytime as paytime
from
  (select orderid, paytype, paytime from itcast_dw.dw_fact_orders  ) t1                     -- 订单表数据
 join
  (select orderid, goodsid, goodsnum, payprice from itcast_ods.itcast_order_goods) t2     -- 订单明细数
   on t1.orderid = t2.orderid
 join
  (select goodsid, shopid, goodscatid from itcast_ods.itcast_goods ) t3           -- 商品数量
   on t2.goodsid = t3.goodsid;
   
-- 测试数据
select * from itcast_dw.dw_fact_order_goods_wide limit 5;


--5、指标计算汇总计算-全国、一级商品分类维度交易信息
-- 获取全国、一级商品分类维度的分支付类型数据
-- 5.1 获取全国、一级商品分类维度的不分支付类型数据
insert into table itcast_app.app_trade_order 
select
   '全国' as area_type,
   '' as region_name,
   '' as city_name,
   '一级分类' as category_type,
  t1.cat_1t_name as category_1st_name,
   '' as category_2d_name,
   '所有' as payment_name,
   count(distinct t2.order_id) as total_count,
  sum(case when t2.goods_num is null
      then 0
      else t2.goods_num
      end
      ) as total_goods_num,
  sum(case when t2.pay_money is null
      then 0
      else t2.pay_money
      end
      ) as total_money
from
  (select * from itcast_dw.dw_dim_goods_cats) t1
  left join 
  (select * from itcast_dw.dw_fact_order_goods_wide) t2
   on t1.cat_3d_id = t2.goods_cat_3d_id
group by t1.cat_1t_name;

-- 测试
select * from itcast_app.app_trade_order where category_type = '一级分类' and payment_name = '所有';

-- 5、全国、二级商品分类维度的分支付类型数据

insert into table itcast_app.app_trade_order
select
   '全国' as area_type,
   '' as region_name,
   '' as city_name,
   '二级分类' as category_type,
  t3.cat_1t_name as category_1st_name,
  t3.cat_2d_name as category_2d_name,
  t1.payment_name as payment_name,
   count(distinct t2.order_id) as total_count,
  sum(t2.goods_num) as total_goods_num,
  sum(t2.pay_money) as total_money
from
  (select * from itcast_dw.dw_dim_payments ) t1
  left join
  (select * from itcast_dw.dw_fact_order_goods_wide ) t2
   on t1.payment_id = t2.payment_id
  left join
  (select * from itcast_dw.dw_dim_goods_cats) t3
   on t2.goods_cat_3d_id = t3.cat_3d_id
group by t1.payment_name, t3.cat_1t_name, t3.cat_2d_name;
 
-- 测试查询数据
select * from itcast_app.app_trade_order where  area_type = '全国' and category_type = '二级分类' limit 10;


------------APP层指标开发-指标开发3-用户订单指标业务开发------------------------------
--1、创建APP表
drop table if exists itcast_app.user_order_measure;
create table itcast_app.user_order_measure(
  userid string,                          -- 用户id
  username string,                        -- 用户名称
  first_paytime string,                   -- 第一次消费时间
  lastest_paytime string,                 -- 最近一次消费时间
  first_day_during_days bigint,           -- 首单距今时间
  lastest_day_durning_days bigint,        -- 尾单距今时间
  min_paymoney double,                    -- 最小消费金额
  max_paymoney double,                    -- 最大消费金额
  total_count_without_back bigint,        -- 累计消费次数(不含退拒)
  total_money_without_back double,        -- 累计消费金额(不含退拒)
  total_count_without_back_30 bigint,     -- 近30天累计消费次数(不含退拒)
  total_money_without_back_30 double,     -- 近30天累计消费金额(不含退拒)
  total_count_30 bigint,                  -- 近30天累计消费次数(含退拒)
  total_money_30 double,                  -- 近30天累计消费金额(含退拒)
  atv double,                             -- 客单价(含退拒)
  atv_withoutback double,                 -- 客单价(不含退拒)
  atv_60 double,                          -- 近60天客单价(含退拒)
  atv_60_withoutback double,              -- 近60天客单价(不含退拒)
  school_order_count bigint,              -- 学校下单总数
  company_order_count bigint,             -- 单位下单总数
  home_order_count bigint,                -- 家里下单总数
  am_order_count bigint,                  -- 上午下单总数
  pm_order_count bigint,                  -- 下午下单总数
  night_order_count bigint,               -- 晚上下单总数
  most_usual_address string,              -- 常用收货地址
  most_usual_paytype string               -- 常用支付方式
)
row format delimited fields terminated by '\t';


--2、创建用户维度表:itcast_dw.dw_dim_user

drop table if exists itcast_dw.dw_dim_users;
create table itcast_dw.dw_dim_users(
  userId          bigint,
  loginName       string,
  userSex         bigint
)
row format delimited fields terminated by '\t';

--3、加载数据
insert overwrite table itcast_dw.dw_dim_users
select 
  userId,   
  loginName,
  userSex  
from
  itcast_ods.itcast_users ;
   
--验证
select * from itcast_dw.dw_dim_users limit 10;


--4、创建dw层dim_user_address表
drop table if exists itcast_dw.dw_dim_user_address;
create table itcast_dw.dw_dim_user_address(
  addressId    bigint,
  userId       bigint,
  userName     string,
  otherName   string,
  userPhone   string,
  areaIdPath   string,
  areaId       bigint,
  userAddress string,
  isDefault    bigint,
  dataFlag     bigint,
  createTime   string
)
row format delimited fields terminated by '\t';

--5、从ods层itcast_user_address导出数据到dim_user_address表
insert overwrite table itcast_dw.dw_dim_user_address 
select 
addressId, --用户地址id
userId, --用户id
userName, --用户名
otherName, --地址类型
userPhone, --用户联系方式
areaIdPath, --地址id路径
areaId,     --区域ID
userAddress, --用户地址
isDefault,  --是否默认地址
dataFlag,   --数据状态
createTime  --创建时间
from itcast_ods.itcast_user_address;


--6、创建订单时间标志、地址标志宽表 fact_order_wide
drop table if exists itcast_dw.dw_fact_order_wide;
create table itcast_dw.dw_fact_order_wide(
  orderId bigint, --订单id
  orderStatus bigint, --订单状态
  payType bigint, --支付类型
  userId bigint, --用户id
  userAddressId bigint, --用户地址id
  payTime string, --支付时间
  payMoney double, --支付金额
  createtime string, --创建时间
  flag30 bigint, --最近30天标记
  flag60 bigint, --最近60天标记
  flag90 bigint, --最近90天笔记
  flag180 bigint,--最近180天笔记
  flagTimeBucket string, --一天时间段标志(凌晨、早晨、上午.
  othername string --地址标志(家里、学校、工作单位..)
)
row format delimited fields terminated by '\t';


--7、加载数据
insert overwrite table itcast_dw.dw_fact_order_wide
select
  t1.orderId,
  t1.orderStatus,
  t1.payType,
  t1.userId,
  t1.userAddressId,
  t1.payTime,
  t1.totalMoney,
  t1.createtime,
   --近30天
  case when datediff(current_timestamp, t1.createtime) <= 30
      then 1
  else 0
  end as flag_30,
    --近60天
  case when datediff(current_timestamp, t1.createtime) <= 60
      then 1
  else 0
  end as flag_60,
    --近90天
  case when datediff(current_timestamp, t1.createtime) <= 90
      then 1
  else 0
  end as flag_90, 
    --近180天
  case when datediff(current_timestamp, t1.createtime) >= 180
  then 1
  else 0
  end as flag_180,
   --所属时间段
  case when hour(t1.createtime) >= 0 and hour(t1.createtime) < 6
      then '凌晨'
  when hour(t1.createtime) >= 6 and hour(t1.createtime) < 12
      then '上午'
  when hour(t1.createtime) >= 12 and hour(t1.createtime) < 14
      then '中午'
  when hour(t1.createtime) >= 14 and hour(t1.createtime) < 18
      then '下午'
  else '晚上'
  end as flag_time_bucket,
   --家里、单位、学校
  t2.othername
from 
  (select orderId,orderStatus,payType,userId,userAddressId,payTime,totalMoney,createtime from itcast_dw.dw_fact_orders 
where dw_end_date ='9999-12-31') t1  
left join
  (select * from itcast_dw.dw_dim_user_address) t2
   on t1.userAddressId = t2.addressId;
       
-- 测试语句
select * from itcast_dw.dw_fact_order_wide limit 5; 


--1、指标开发一
select
  t1.userid, --用户id
  t1.loginname, --登录名字
  MIN(t2.payTime) as first_paytime, --首次下单时间
  MAX(t2.payTime) as lastest_paytime, --尾单时间
  DATEDIFF(CURRENT_TIMESTAMP, MIN(t2.payTime)) as first_day_during_days,--首单距今
  DATEDIFF(CURRENT_TIMESTAMP, MAX(t2.payTime)) as lastest_day_durning_days, --尾单距今
  MIN(t2.paymoney) as min_paymoney, --最小支付金额
  MAX(t2.paymoney) as max_paymoney  --最大支付金额
from
  (select * from itcast_dw.dw_fact_order_wide) as t2
  left join
    (select * from itcast_dw.dw_dim_users) as t1
   on t1.userId = t2.userId
group by t1.userid,t1.loginname;

--2、指标开发二
select
  t2.userid,
  t2.loginname,
  MIN(t1.payTime) as first_paytime,
  MAX(t1.payTime) as lastest_paytime,
  DATEDIFF(CURRENT_TIMESTAMP, MIN(t1.payTime)) as first_day_during_days,
  DATEDIFF(CURRENT_TIMESTAMP, MAX(t1.payTime)) as lastest_day_durning_days,
  MIN(t1.paymoney) as min_paymoney,
  MAX(t1.paymoney) as max_paymoney,
sum(
case when t1.orderstatus !=10 and t1.orderstatus !=11
  then 1
  else 0
  end
) as total_count_without_back,--累计消费次数不含退拒,
sum(case when t1.orderstatus !=10 and t1.orderstatus !=11
  then t1.paymoney
  else 0
  end
  ) as total_money_without_back, --累计消费金额不含退拒

sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
  then 1
    else 0
    end
  ) as total_count_without_back_30,   --累计近30天消费次数不含退拒

  sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
  then t1.paymoney
    else 0
    end
  ) as total_money_without_back_30,-- 累计近30天消费金额不含退拒
sum(case when t1.flag30 =1 
  then 1
    else 0
    end
  ) as total_count_without_30,    --累计近30天消费次数含退拒
  sum(case when t1.flag30 =1 
  then t1.paymoney
    else 0
    end
  ) as total_money_with_back_30    --累计近30天消费金额含退拒
    
from 
(select * from itcast_dw.dw_fact_order_wide) t1 
left join  
(select * from itcast_dw.dw_dim_users) t2 on 
t1.userid=t2.userid
group by t2.userid,t2.loginname limit 5;

---------------------------------------------------
select
  t2.userid,
  t2.loginname,
  MIN(t1.payTime) as first_paytime,
  MAX(t1.payTime) as lastest_paytime,
  DATEDIFF(CURRENT_TIMESTAMP, MIN(t1.payTime)) as first_day_during_days,
  DATEDIFF(CURRENT_TIMESTAMP, MAX(t1.payTime)) as lastest_day_durning_days,
  MIN(t1.paymoney) as min_paymoney,
  MAX(t1.paymoney) as max_paymoney,
sum(
case when t1.orderstatus !=10 and t1.orderstatus !=11
  then 1
  else 0
  end
) as total_count_without_back,--累计消费次数不含退拒,
sum(case when t1.orderstatus !=10 and t1.orderstatus !=11
  then t1.paymoney
  else 0
  end
  ) as total_money_without_back, --累计消费金额不含退拒

sum(case when t1.flag180 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
  then 1
    else 0
    end
  ) as total_count_without_back_180,   --累计近30天消费次数不含退拒
  
  sum(case when t1.flag180 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
  then t1.paymoney
    else 0
    end
  ) as total_money_without_back_180,-- 累计近30天消费金额不含退拒
sum(case when t1.flag180 =1 
  then 1
    else 0
    end
  ) as total_count_without_180,    --累计近30天消费次数含退拒
  sum(case when t1.flag180 =1 
  then t1.paymoney
    else 0
    end
  ) as total_money_with_back_180    --累计近30天消费金额含退拒
    
from 
(select * from itcast_dw.dw_fact_order_wide) t1 
left join  
(select * from itcast_dw.dw_dim_users) t2 on 
t1.userid=t2.userid
group by t2.userid,t2.loginname limit 5


--3、指标开发三
select
  t2.userid,
  t2.loginname,
SUM(case when t1.otherName = '学校'
      then 1
      else 0
      end) as school_order_count,      -- 学校下单总数
  SUM(case when t1.otherName = '单位'
      then 1
      else 0
      end) as company_order_count,    -- 单位下单总数
  SUM(case when t1.otherName = '家里'
      then 1
      else 0
      end) as home_order_count,        -- 家里下单总数
  SUM(case when t1.flagTimeBucket = '上午'
      then 1
      else 0
      end) as am_order_count,          -- 上午下单总数
  SUM(case when t1.flagTimeBucket = '下午'
      then 1
      else 0
      end) as pm_order_count,          -- 下午下单总数
  SUM(case when t1.flagTimeBucket = '晚上'
      then 1
      else 0
      end) as night_order_count-- 晚上下单总数
from
(select * from itcast_dw.dw_fact_order_wide) t1 
left join  
(select * from itcast_dw.dw_dim_users) t2 on 
t1.userid=t2.userid
group by t2.userid,t2.loginname limit 5;


--4、指标数据加载
insert overwrite table itcast_app.user_order_measure 
select
 t2.userid,
  t2.loginname,
  MIN(t1.payTime) as first_paytime,
  MAX(t1.payTime) as lastest_paytime,
  DATEDIFF(CURRENT_TIMESTAMP, MIN(t1.payTime)) as first_day_during_days,
  DATEDIFF(CURRENT_TIMESTAMP, MAX(t1.payTime)) as lastest_day_durning_days,
  MIN(t1.paymoney) as min_paymoney,
  MAX(t1.paymoney) as max_paymoney,
sum(
case when t1.orderstatus !=10 and t1.orderstatus !=11
  then 1
  else 0
  end
) as total_count_without_back,--累计消费次数不含退拒,
sum(case when t1.orderstatus !=10 and t1.orderstatus !=11
  then t1.paymoney
  else 0
  end
  ) as total_money_without_back, --累计消费金额不含退拒

sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
  then 1
    else 0
    end
  ) as total_count_without_back_30,   --累计近30天消费次数不含退拒

  sum(case when t1.flag30 =1 and t1.orderstatus !=10 and t1.orderstatus !=11
  then t1.paymoney
    else 0
    end
  ) as total_money_without_back_30,-- 累计近30天消费金额不含退拒
sum(case when t1.flag30 =1 
  then 1
    else 0
    end
  ) as total_count_without_30,    --累计近30天消费次数含退拒
  sum(case when t1.flag30 =1 
  then t1.paymoney
    else 0
    end
  ) as total_money_with_back_30,    --累计近30天消费金额含退拒
      0 as  atv,
      0 as atv_withoutback,
      0 as total_count_without_60,
      0 as total_money_with_back_60,
    SUM(case when t1.otherName = '学校'
    then 1
    else 0
    end) as school_order_count,      -- 学校下单总数
SUM(case when t1.otherName = '单位'
    then 1
    else 0
    end) as company_order_count,    -- 单位下单总数
SUM(case when t1.otherName = '家里'
    then 1
    else 0
    end) as home_order_count,        -- 家里下单总数
SUM(case when t1.flagTimeBucket = '上午'
    then 1
    else 0
    end) as am_order_count,          -- 上午下单总数
SUM(case when t1.flagTimeBucket = '下午'
    then 1
    else 0
    end) as pm_order_count,          -- 下午下单总数
SUM(case when t1.flagTimeBucket = '晚上'
    then 1
    else 0
    end) as night_order_count,-- 晚上下单总数
      --最常用地址
    '' most_usual_address,
--常用的支付方式
    '' most_usual_paytype
from 
(select * from itcast_dw.dw_fact_order_wide) t1 
left join  
(select * from itcast_dw.dw_dim_users) t2 on 
t1.userid=t2.userid
group by t2.userid,t2.loginname

-------------------------结果表导出----------------------------------
--1、在Mysql中创建目标数据库
create database hive_app_result;
--2、导出时间维度分析业务指标数据、
 --2.1 创建Mysql目标表
  create table hive_app_result.app_order_date(
    date_type varchar(20),   -- 时间维度类型
    date_val varchar(20),    -- 时间维度值
    order_count bigint, -- 订单笔数
    order_money double  -- 订单交易额
  );
--2.2 数据导出
  /export/server/sqoop-1.4.7/bin/sqoop export \
  --connect jdbc:mysql://192.168.88.100:3306/hive_app_result \
  --username root \
  --password 123456 \
  --table app_order_date \
  --input-fields-terminated-by '\t' \
  --export-dir /user/hive/warehouse/itcast_app.db/app_order_date

--3、导出地域和商品分类维度指标数据
 --3.1 创建Mysql目标表
  DROP TABLE IF EXISTS hive_app_result.app_trade_order;
  CREATE TABLE IF NOT EXISTS hive_app_result.app_trade_order(
    area_type VARCHAR(20),               -- 区域范围:区域类型(全国、大区、城市)
    region_name VARCHAR(20),             -- 区域名称
    city_name VARCHAR(20),               -- 城市名称
    category_type VARCHAR(20),           --
    category_1st_name VARCHAR(20),       -- 一级分类名称
    category_2d_name VARCHAR(20),        -- 二级分类名称
    payment_name VARCHAR(20),            -- 支付方式(所有、微信、支付宝、...)
    total_count BIGINT,             -- 订单数量
    total_goods_num BIGINT,         -- 商品数量
    total_money DOUBLE              -- 支付金额
  );
--3.2 数据导出
  /export/server/sqoop-1.4.7/bin/sqoop export \
  --connect jdbc:mysql://192.168.88.100:3306/hive_app_result \
  --username root \
  --password 123456 \
  --table app_trade_order \
  --input-fields-terminated-by '\t' \
  --input-null-string '\\N' --input-null-non-string '\\N' \
  --export-dir /user/hive/warehouse/itcast_app.db/app_trade_order

  --4、导出用户订单指标数据
  --4.1 创建Mysql目标表
  CREATE TABLE hive_app_result.user_order_measure(
    userid VARCHAR(50),                          -- 用户id
    username VARCHAR(50),                        -- 用户名称
    first_paytime VARCHAR(50),                   -- 第一次消费时间
    lastest_paytime VARCHAR(50),                 -- 最近一次消费时间
    first_day_during_days BIGINT,           -- 首单距今时间
    lastest_day_durning_days BIGINT,        -- 尾单距今时间
    min_paymoney DOUBLE,                    -- 最小消费金额
    max_paymoney DOUBLE,                    -- 最大消费金额
    total_count_without_back BIGINT,        -- 累计消费次数(不含退拒)
    total_money_without_back DOUBLE,        -- 累计消费金额(不含退拒)
    total_count_without_back_30 BIGINT,     -- 近30天累计消费次数(不含退拒)
    total_money_without_back_30 DOUBLE,     -- 近30天累计消费金额(不含退拒)
    total_count_30 BIGINT,                  -- 近30天累计消费次数(含退拒)
    total_money_30 DOUBLE,                  -- 近30天累计消费金额(含退拒)
    atv DOUBLE,                             -- 客单价(含退拒)
    atv_withoutback DOUBLE,                 -- 客单价(不含退拒)
    atv_60 DOUBLE,                          -- 近60天客单价(含退拒)
    atv_60_withoutback DOUBLE,              -- 近60天客单价(不含退拒)
    school_order_count BIGINT,              -- 学校下单总数
    company_order_count BIGINT,             -- 单位下单总数
    home_order_count BIGINT,                -- 家里下单总数
    am_order_count BIGINT,                  -- 上午下单总数
    pm_order_count BIGINT,                  -- 下午下单总数
    night_order_count BIGINT,               -- 晚上下单总数
    most_usual_address VARCHAR(50),              -- 常用收货地址
    most_usual_paytype VARCHAR(50)               -- 常用支付方式
  );

 --4.2 数据导出
 /export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/hive_app_result \
--username root \
--password 123456 \
--table user_order_measure \
--input-fields-terminated-by '\t' \
--export-dir /user/hive/warehouse/itcast_app.db/user_order_measure

---------------数据可视化------------------------------------------------------
--1、启动superset
superset run -h 192.168.88.100 -p 8099 --with-threads --reload --debugger
--2、访问superset
 http://192.168.88.100:8099
  用户名:admin
  密码: 123456
--3、连接Mysql数据库
 mysql+pymysql://root:123456@192.168.88.100/hive_app_result?charset=utf8


--4、指标可视化 
 --4.1、2020年期间每个季度的销售订单笔数、订单总额可视化
   select * from hive_app_result.app_order_date where date_type = '1';

--4.2、2020年期间每个月的销售订单笔数可视化
-- 2020-01-1 00:00:00
 SELECT STR_TO_DATE(concat(date_val,"-01"),'%Y-%m-%d') AS TIMESTAMP, order_count,order_money 
 FROM hive_app_result.app_order_date 
 WHERE  date_type = '2'

--4.3、2020年期间周一到周日的订单金额可视化
SELECT 
date_val,
CASE WHEN  date_val = '1' THEN '周一'
 WHEN  date_val = '2' THEN '周二'
 WHEN  date_val = '3' THEN '周三'
 WHEN  date_val = '4' THEN '周四'
 WHEN  date_val = '5' THEN '周五'
 WHEN  date_val = '6' THEN '周六'
 WHEN  date_val = '7' THEN '周日'
END AS week_val,
order_count,
order_money
FROM `app_order_date`
WHERE date_type = '3'
ORDER BY date_val ASC 

--4.4、全国一级商品分类维度的不分支付类型数据可视化
select * from hive_app_result.app_trade_order where category_type = '一级分类' and payment_name = '所有';

--4.5、全国二级商品分类维度的不分支付类型数据可视化
SELECT * FROM hive_app_result.app_trade_order WHERE  area_type = '全国' AND category_type = '二级分类';


四、业务数据分析
--1、现将日志数据采集到HDFS上.
# Name the components on this agent
a1.sources = r1
a1.sinks = k1
a1.channels = c1

a1.sources.r1.type = TAILDIR
a1.sources.r1.positionFile = /var/log/flume/taildir_position.json
a1.sources.r1.filegroups = f1
a1.sources.r1.filegroups.f1 = /usr/local/nginx/logs/access*.log


# Describe the sink
a1.sinks.k1.type = hdfs
a1.sinks.k1.hdfs.path = /flume/web_log/2021-02-03
a1.sinks.k1.hdfs.filePrefix = events-

a1.sinks.k1.hdfs.round = true
a1.sinks.k1.hdfs.roundValue = 10
a1.sinks.k1.hdfs.roundUnit = minute

a1.sinks.k1.hdfs.rollInterval = 0
a1.sinks.k1.hdfs.rollSize = 4194304
a1.sinks.k1.hdfs.rollCount = 1000000
a1.sinks.k1.hdfs.idleTimeout= 10
a1.sinks.k1.hdfs.batchSize = 100
a1.sinks.k1.hdfs.useLocalTimeStamp = true
#生成的文件类型,默认是Sequencefile,可用DataStream,则为普通文本
a1.sinks.k1.hdfs.fileType = DataStream

# Use a channel which buffers events in memory
a1.channels.c1.type = memory
a1.channels.c1.capacity = 1000
a1.channels.c1.transactionCapacity = 100

# Bind the source and sink to the channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1


--2、启动flume采集
 /export/server/flume-1.8.0/bin/flume-ng agent -c /export/server/flume-1.8.0/conf  -f /export/server/flume-1.8.0/conf/web_log.conf -n a1  -Dflume.root.logger=INFO,console


--3、数据预处理 

 hadoop jar /export/data/mapreduce/web_log.jar cn.itcast.bigdata.weblog.pre.WeblogPreProcess   

 源文件路径:/flume/web_log/2021-02-03
 目标路径:/output/web_log/pre_web_log

--原始日志数据
163.177.71.12 - - [01/Nov/2020:08:31:39 +0000] "HEAD / HTTP/1.1" 200 20 "-" "DNSPod-Monitor/1.0"
163.177.71.12 - - [01/Nov/2020:08:31:42 +0000] "HEAD / HTTP/1.1" 200 20 "-" "DNSPod-Monitor/1.0"
60.247.77.253 - - [01/Nov/2020:08:31:41 +0000] "GET /wp-content/uploads/2020/09/Hadoop-mahout-roadmap.png HTTP/1.1" 200 178536 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.89 Safari/537.1"

--预处理之后的日志数据
true163.177.71.12-2020-11-01 08:28:43/20020"-""DNSPod-Monitor/1.0"
true163.177.71.12-2020-11-01 08:28:46/20020"-""DNSPod-Monitor/1.0"
true60.247.77.253-2020-11-01 08:31:34/wp-content/uploads/2020/09/Hadoop-mahout-roadmap.png200178528"-""Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1"

--pageviews模型
4e7f9897-fc23-4307-93cf-94c97ce1feea163.177.71.12-2020-11-01 08:28:43/13"-""DNSPod-Monitor/1.0"20200
4e7f9897-fc23-4307-93cf-94c97ce1feea163.177.71.12-2020-11-01 08:28:46/2173"-""DNSPod-Monitor/1.0"20200

4e7f9897-fc23-4307-93cf-94c97ce1aaaa163.177.71.12-2020-11-01 15:28:46/14"-""DNSPod-Monitor/1.0"20200
4e7f9897-fc23-4307-93cf-94c97ce1aaaa163.177.71.12-2020-11-01 15:28:50/260"-""DNSPod-Monitor/1.0"20200

0772cabd-5da2-47f7-95fc-484dce095ca360.247.77.253-2020-11-01 08:30:09/category/%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98/18"http://blog.fens.me/category/%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98/""Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1"17504200
0772cabd-5da2-47f7-95fc-484dce095ca360.247.77.253-2020-11-01 08:30:17/hadoop-mahout-roadmap/21"http://blog.fens.me/category/%e6%95%b0%e6%8d%ae%e6%8c%96%e6%8e%98/""Mozilla/5.0(WindowsNT6.1;WOW64)AppleWebKit/537.1(KHTML,likeGecko)Chrome/21.0.1180.89Safari/537.1"10336200

--得到pageviews模型
hadoop jar /export/data/mapreduce/web_log.jar  cn.itcast.bigdata.weblog.clickstream.ClickStreamPageView   


--visits模型
00ebcd4c-9443-4112-828b-582b2e4e3c03183.60.9.2172020-11-01 07:10:572018-11-01 07:10:57/hadoop-hive-intro//hadoop-hive-intro/"-"12
0c371b1b-ccc7-4658-9c71-496e0ffc0f94115.236.11.1942020-11-01 12:25:572018-11-01 12:25:57/hadoop-mahout-roadmap//hadoop-mahout-roadmap/"http://f.dataguru.cn/thread-177375-1-1.html"8

--得到visits模型
hadoop jar /export/data/mapreduce/web_log.jar cn.itcast.bigdata.weblog.clickstream.ClickStreamVisit    

---4、数据库创建
create database if not exists  web_log_ods;
create database if not exists  web_log_dw;
create database if not exists  web_log_app;

--5、创建ods层表

  --创建清洗后的原始日志表
  drop table if exists web_log_ods.ods_weblog_origin;
  create table web_log_ods.ods_weblog_origin(
  valid string , --有效标记
  remote_addr string, --访客ip
  remote_user string, --访客用户信息
  time_local string, --请求时间
  request string,  --请求url
  status string, --响应状态码
  body_bytes_sent string, --响应字节数
  http_referer string, --来源url
  http_user_agent string --访客终端信息
  ) 
  partitioned by (dt string)
  row format delimited fields terminated by '\001';

  --创建点击流模型pageviews表
drop table if exists web_log_ods.ods_click_pageviews;
create table  web_log_ods.ods_click_pageviews(
session string, --会话id
remote_addr string, --访客ip
remote_user string, --访客用户信息
time_local string, --请求时间
request string, --请求url
visit_step string, --访问步长
page_staylong string, --页面停留时间(秒)
http_referer string, --来源url
http_user_agent string,--访客终端信息
body_bytes_sent string,--响应字节数
status string --响应状态码
)
partitioned by (dt string)
row format delimited fields terminated by '\001';

--创建点击流visits模型表
drop table if exists web_log_ods.ods_click_stream_visits;
create table web_log_ods.ods_click_stream_visits(
session     string, --会话id
remote_addr string, --访客ip
inTime      string, --会话访问起始时间
outTime     string, --会话访问离开时间
inPage      string, --会话访问起始页面
outPage     string, --会话访问离开页面
referal     string, --来源url
pageVisits  int --会话页面访问数量
)
partitioned by (dt string)
row format delimited fields terminated by '\001';

--表数据加载

load data inpath '/output/web_log/pre_web_log' overwrite into table  web_log_ods.ods_weblog_origin partition(dt='2021-02-01');
 
load data inpath '/output/web_log/pageviews'overwrite into table web_log_ods.ods_click_pageviews partition(dt='2021-02-01');
 
load data inpath '/output/web_log/visits' overwrite into table web_log_ods.ods_click_stream_visits partition(dt='2021-02-01');

--明细表(宽表)实现
drop table web_log_dw.dw_weblog_detail;
create table web_log_dw.dw_weblog_detail(
valid           string, --有效标识
remote_addr     string, --来源IP
remote_user     string, --用户标识
time_local      string, --访问完整时间
daystr          string, --访问日期
timestr         string, --访问时间
month           string, --访问月
day             string, --访问日
hour            string, --访问时
request         string, --请求的url
status          string, --响应码
body_bytes_sent string, --传输字节数
http_referer    string, --来源url
ref_host        string, --来源的host
ref_path        string, --来源的路径
ref_query       string, --来源参数query
ref_query_id    string, --来源参数query的值
http_user_agent string --客户终端标识
)
partitioned by(dt string)
row format delimited fields terminated by '\001';

--明细表加载数据
2020-11-01 08:28:43

insert into table web_log_dw.dw_weblog_detail partition(dt='2021-02-01')
select c.valid,c.remote_addr,c.remote_user,c.time_local,
substring(c.time_local,1,10) as daystr, --2020-11-01
substring(c.time_local,12) as tmstr, --08:28:43
substring(c.time_local,6,2) as month, --11
substring(c.time_local,9,2) as day,  ---01
substring(c.time_local,12,2) as hour, --08
c.request,c.status,c.body_bytes_sent,c.http_referer,c.ref_host,c.ref_path,c.ref_query,c.ref_query_id,c.http_user_agent
from
(
    SELECT
  a.valid,a.remote_addr,a.remote_user,a.time_local,
  a.request,a.status,a.body_bytes_sent,a.http_referer,a.http_user_agent,b.ref_host,b.ref_path,b.ref_query,b.ref_query_id
  FROM web_log_ods.ods_weblog_origin a LATERAL VIEW
  parse_url_tuple(regexp_replace(http_referer, "\"", ""), 'HOST', 'PATH','QUERY', 'QUERY:id') b as ref_host, ref_path, ref_query,
  ref_query_id
 ) c;

-- http://www.baidu.com/email/info/a.jpg?id=val1&key2=val2   www.baidu.com   /email/info/a.jpg id=val1&key2=val2  val1


--执行sql
hive -f '/export/data/hive_sql/web_log_detail.sql'1


--6、基础指标分析
192.168.88.5:
  8:00  a.html
  8:15  b.html
  8:50  c.html
  18:15 d.html

192.168.88.6:
  8:00  c.html
  8:15  d.html
  8:50  e.html
  18:15 f.html


 web_log_dw.dw_weblog_detail
 select * from  web_log_dw.dw_weblog_detail limit 10
 --浏览页面次数(pv)  --->6
 select count(*) as pvs from  web_log_dw.dw_weblog_detail where valid = true and dt='2021-02-01';

 --独立访客(uv) ----->2
 select count(distinct remote_addr) as uvs from  web_log_dw.dw_weblog_detail where valid = true and dt='2021-02-01';

 --访问次数(vv) ----->6
select count(session) from ods_click_stream_visits where  dt='2021-02-01' 

--基础指标入库
drop table if exists web_log_app.app_webflow_basic_info;
create table web_log_app.app_webflow_basic_info(date_val string,pvs bigint,uvs bigint,vvs bigint) partitioned by(dt string);
 
--允许笛卡尔积
set spark.sql.crossJoin.enabled=true;
 
insert into table web_log_app.app_webflow_basic_info partition(dt='2021-02-01')
select '2021-02-01',a.*,b.* from

   select count(*) as pvs,count(distinct remote_addr) as uvs from web_log_dw.dw_weblog_detail  where dt='2021-02-01'
) a 
join 
(
 select count(session) as vvs from web_log_ods.ods_click_stream_visits where dt='2021-02-01'
) b;

--基础指标多维度统计分析:注意gruop by语句的语法
 
1、多维度统计PV总量
--多维度统计分析:注意gruop by语句的语法
 
1.1. 多维度统计PV总量
--第一种方式:直接在web_log_dw.dw_weblog_detail 单表上进行查询
1.1.1 计算该处理批次(一天)中的各小时pvs
drop table if exists web_log_app.app_pvs_everyhour_oneday;
create table web_log_app.app_pvs_everyhour_oneday(month string,day string,hour string,pvs bigint) partitioned by(dt string);
 
insert into table web_log_app.app_pvs_everyhour_oneday partition(dt='2021-02-01')
select a.month as month,a.day as day,a.hour as hour,count(*) as pvs from web_log_dw.dw_weblog_detail  a
where  a.dt='2021-02-01' group by a.month,a.day,a.hour;
 
--计算每天的pvs
drop table if exists web_log_app.app_pvs_everyday;
create table web_log_app.app_pvs_everyday(pvs bigint,month string,day string);
 
insert into table web_log_app.app_pvs_everyday
select count(*) as pvs,a.month as month,a.day as day from web_log_dw.dw_weblog_detail  a
group by a.month,a.day;

--7、符合指标分析」

--复合指标统计分析
 
--人均浏览页数(平均访问深度)
 --需求描述:统计今日所有来访者平均请求的页面数。
 --总页面请求数pv/去重总人数uv
 
drop table if exists web_log_app.app_avgpv_user_everydayy;
create table web_log_app.app_avgpv_user_everyday(day string, avgpv string);
        
 
--方式一:
insert into table web_log_app.app_avgpv_user_everyday
select '2021-02-01',pvs/uvs from web_log_app.app_webflow_basic_info;
 
--方式二:
 
insert  into table web_log_app.app_avgpv_user_everyday
select '2021-02-01',sum(b.pvs)/count(b.remote_addr) from
(select remote_addr,count(*) as pvs from web_log_dw.dw_weblog_detail where dt='2021-02-01' group by remote_addr) b;


--平均访问时长
 
    --平均每次访问(会话)在网站上的停留时间。
    --体现网站对访客的吸引程度。
    --平均访问时长=访问总时长/访问次数。
 
--先计算每次会话的停留时长
 
select session, sum(page_staylong) as web_staylong from web_log_ods.ods_click_pageviews where dt='2021-02-01'
group by session;
 
 
--计算平均访问时长
select
sum(a.web_staylong)/count(a.session)
from 
(select session, sum(page_staylong) as web_staylong from web_log_ods.ods_click_pageviews where dt='2021-02-01'
group by session) a;

--跳出率
    --跳出率是指用户到网站上仅浏览了一个页面就离开的访问次数与所有访问次数的百分比。
    --是评价网站性能的重要指标。
 
--/hadoop-mahout-roadmap/ 页面跳出率
 
--总的访问次数vv
select vvs from app_webflow_basic_info where dt='2021-02-01';
 
--浏览/hadoop-mahout-roadmap/一个页面 并且离开的人数
select count(*) from web_log_ods.ods_click_stream_visits where dt='2021-02-01'
and pagevisits = 1 and outpage = "/hadoop-mahout-roadmap/";
 
 
--合并计算结果:
select
(b.nums/a.vvs)*100
from web_log_app.app_webflow_basic_info a join (select count(*) as nums from web_log_ods.ods_click_stream_visits where dt='2021-02-01'
and pagevisits = 1 and outpage = "/hadoop-mahout-roadmap/") b;

--热门页面统计
--统计最热门的页面top10
 
drop table if exists web_log_app.app_hotpages_everyday;
create table web_log_app.app_hotpages_everyday(day string,url string,pvs string);
 
--方式1
insert into table web_log_app.app_hotpages_everyday
select '2021-02-01',a.request,a.request_counts from
(select request as request,count(request) as request_counts 
from web_log_dw.dw_weblog_detail where dt='2021-02-01' group by request having request is not null
) a
order by a.request_counts desc limit 10;

--方式2
insert into table web_log_app.app_hotpages_everyday
select * from
(
SELECT 
  '2021-02-01',a.request,a.request_counts,
  RANK() OVER( ORDER BY a.request_counts desc) AS rn 
  FROM 
  (
    select request as request,count(request) as request_counts 
    from web_log_dw.dw_weblog_detail where dt='2021-02-01' group by request having request is not null
  )a
)b
where b.rn <= 10
 ;


--漏斗模型
--添加模拟数据

load data local inpath '/export/data/hivedatas/click-part-r-00000' overwrite into table web_log_ods.ods_click_pageviews partition(dt='2021-02-01');
 
----------------------------------------------------------
---1、查询每一个步骤的总访问人数
--UNION All将多个SELECT语句的结果集合并为一个独立的结果集
 
create table web_log_app.app_oute_numbs as 
select 'step1' as step,count(distinct remote_addr)  as numbs from web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/item%'
union all
select 'step2' as step,count(distinct remote_addr)  as numbs from  web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/category%'
union all
select 'step3' as step,count(distinct remote_addr)  as numbs from  web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/order%'
union all
select 'step4' as step,count(distinct remote_addr)  as numbs from  web_log_ods.ods_click_pageviews where dt='2021-02-01' and request like '/index%';


--2、查询每一步骤相对于路径起点人数的比例
--级联查询,自己跟自己join
 
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs  from web_log_app.app_oute_numbs rn
inner join 
web_log_app.app_oute_numbs rr;


--每一步的人数/第一步的人数==每一步相对起点人数比例
select tmp.rnstep,tmp.rnnumbs/tmp.rrnumbs * 100 as abs_rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs  from web_log_app.app_oute_numbs rn
inner join 
web_log_app.app_oute_numbs rr) tmp
where tmp.rrstep='step1'


--3、查询每一步骤相对于上一步骤的漏出率
--首先通过自join表过滤出每一步跟上一步的记录
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs  from web_log_app.app_oute_numbs rn
inner join 
web_log_app.app_oute_numbs rr
where cast(substr(rn.step,5,1) as int)=cast(substr(rr.step,5,1) as int)-1;
 
--然后就可以非常简单的计算出每一步相对上一步的漏出率
select tmp.rrstep as step,tmp.rrnumbs/tmp.rnnumbs as leakage_rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs  from web_log_app.app_oute_numbs rn
inner join 
web_log_app.app_oute_numbs rr) tmp
where cast(substr(tmp.rnstep,5,1) as int)=cast(substr(tmp.rrstep,5,1) as int)-1;

--4、汇总以上两种指标
drop table if exists web_log_app.app_bounce_rate;
create table web_log_app.app_bounce_rate 
(
 step_num string,
 numbs bigint,
 abs_rate double,
 leakage_rate double
);
insert into table web_log_app.app_bounce_rate 
select abs.step,abs.numbs,abs.rate as abs_rate,rel.leakage_rate as leakage_rate
from 
(
select tmp.rnstep as step,tmp.rnnumbs as numbs,tmp.rnnumbs/tmp.rrnumbs * 100 as rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs  from web_log_app.app_oute_numbs rn
inner join 
web_log_app.app_oute_numbs rr) tmp
where tmp.rrstep='step1'
) abs
left outer join
(
select tmp.rrstep as step,tmp.rrnumbs/tmp.rnnumbs * 100 as leakage_rate
from
(
select rn.step as rnstep,rn.numbs as rnnumbs,rr.step as rrstep,rr.numbs as rrnumbs  from web_log_app.app_oute_numbs rn
inner join 
web_log_app.app_oute_numbs rr) tmp
where cast(substr(tmp.rnstep,5,1) as int)=cast(substr(tmp.rrstep,5,1) as int)-1
) rel
on abs.step=rel.step;


--8、结果表导出------------------------

--创建mysql结果表
CREATE DATABASE web_log_result;
drop table if exists web_log_result.app_webflow_basic_info;
CREATE TABLE web_log_result.app_webflow_basic_info(MONTH VARCHAR(50),DAY VARCHAR(50),pv BIGINT,uv BIGINT,ip BIGINT,vv BIGINT);

/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_webflow_basic_info \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_webflow_basic_info/dt=2021-02-01

CREATE TABLE web_log_result.app_avgpv_user_everyday(
DAY VARCHAR(50),
avgpv VARCHAR(50)
);


/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_avgpv_user_everyday \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_avgpv_user_everyday

DROP TABLE IF EXISTS web_log_result.app_hotpages_everyday;
CREATE TABLE web_log_result.app_hotpages_everyday(
 day_value VARCHAR(20),
 url VARCHAR(100),
 pvs BIGINT
);


/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_hotpages_everyday \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_hotpages_everyday \
--input-null-string '\\N' --input-null-non-string '\\N'

drop table web_log_result.app_pvs_everyhour_oneday;
create table web_log_result.app_pvs_everyhour_oneday
(
month varchar(20),
day varchar(20),
hour varchar(20),
pvs bigint
);

/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_pvs_everyhour_oneday \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_pvs_everyhour_oneday/dt=2021-02-01 \
--input-null-string '\\N' --input-null-non-string '\\N'


drop table if exists web_log_result.app_bounce_rate;
create table web_log_result.app_bounce_rate 
(
 step_num varchar(50),
 numbs bigint,
 abs_rate double,
 leakage_rate double
);


/export/server/sqoop-1.4.7/bin/sqoop export \
--connect jdbc:mysql://192.168.88.100:3306/web_log_result \
--username root \
--password 123456 \
--table app_bounce_rate \
--input-fields-terminated-by '\001' \
--export-dir /user/hive/warehouse/web_log_app.db/app_bounce_rate \
--input-null-string '\\N' --input-null-non-string '\\N'


--9、数据可视化
--启动superset
superset run -h 192.168.88.100 -p 8099 --with-threads --reload --debugger
--mysql的url地
  mysql+pymysql://root:123456@192.168.88.100/web_log_result?charset=utf8


--总pv量
  SELECT pv FROM web_log_result.app_webflow_basic_info
--一天各小时pvs的趋势
SELECT STR_TO_DATE(CONCAT('2020-',MONTH,'-',DAY,HOUR,':00:00'),'%Y-%m-%d %H:%i:%s') AS date_val ,pvs 
FROM app_pvs_everyhour_oneday WHERE MONTH='11' AND DAY = '01';


--统计每日最热门的页面top10
SELECT * FROM web_log_result.app_hotpages_everyday;

--漏斗模型
SELECT * FROM web_log_result.app_bounce_rate ORDER BY step_num;

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
亚马逊跨境电商运营实战pdf是一本专门介绍亚马逊跨境电商运营实战经验的电子书。该书内容详尽,涵盖了从开始准备跨境电商业务到具体操作细节等方方面面的实用建议和经验分享。 首先,该书首先介绍了亚马逊跨境电商的背景和市场前景,分析跨境电商的优势和机会。接着,作者讲解了如何选择适合销售的产品,分析了市场趋势和竞争情况,同时也提供了选择供应商和制定销售策略的指导。 在具体的运营方面,该书详细介绍了注册和设置卖家账号的步骤,讲解了亚马逊平台的规则和政策,以及如何创建和优化产品页面,提高产品的曝光度和销售量。此外,作者还分享了一些评价管理、物流配送、售后服务等方面的经验,帮助读者解决日常运营中可能遇到的问题。 同时,该书还强调了市场调研和数据分析的重要性,教读者如何通过亚马逊平台提供的工具和报告来了解产品和市场的情况,并根据数据做出调整和决策。 总的来说,亚马逊跨境电商运营实战pdf是一本实用性极高的电子书,对于想要进入跨境电商领域或已经在跨境电商运营中遇到问题的人来说,都是一本值得阅读和参考的指南。它不仅能帮助我们了解亚马逊平台的规则和运营要点,还能指导我们制定切实可行的运营策略,提高产品在亚马逊平台上的销售业绩。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

㫪旧

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值