老姜大数据金融项目

最新推荐文章于 2024-04-07 09:38:44 发布

紫金小飞侠

最新推荐文章于 2024-04-07 09:38:44 发布

阅读量2.4k

点赞数 5

分类专栏： # 老姜大数据

本文链接：https://blog.csdn.net/yangshengwei230612/article/details/119120233

版权

老姜大数据专栏收录该内容

6 篇文章

订阅专栏

1大数据金融项目背景

1.3商业智能（BI）的价值
出报告（非重点）
1.行业报告
2.竞争对手分析
3.自我认知
出报表
1.直面领导层的数据展现（驾驶舱）
2.为公司的发展提供数据决策支持
3.数据驱动
4.业务迭代
5.风险管理（金融）
1.4MIS与BIS
MIS偏向于出报表
BIS 偏向于出报告

2金融BI体系介绍

2.1什么是BI

可视化工作
1.专职的前端团队
2.使用一些开源/商业的BI工具

BI是什么？
1.BI本身是从数据采集到加工到展示的一系列的工作
2.对于大数据来说，BI是数据组织和加工（包括数仓的工作）

2.2数据仓库与BI之间的关系
数据仓库/数据集市/数据糊
1.ETL 数据的抽取，清洗（转换），加载
2.数仓数据的整理，建模
数据服务 BI，数据查询，建模分析。。。

一般BI工作流
在这里插入图片描述

6金融体系数仓搭建

6.1需求背景

消费金融业务
1.贷款的目的，用于消费的（买苹果手机，买笔记本电脑，给女朋友买个钻戒）
2.没有抵押的，纯信用的贷款模式
3.目前，支付宝，金条，滴滴，美团。。。。

6.3需求分析

在这里插入图片描述

6.4分层建模

在这里插入图片描述
ODS建表
小贴士：ODS层尽量采用String类型

DWD建表
DWD与ODS表结构应该一致

ODS、DWD、DWS、DWA
ODS存储原始数据
DWD存储聚合数据（面向数据聚合）
DWS业务聚合数据（面向业务主题聚合）
DWA存储应用数据（面向业务人员的数据集）

注册表：bu_reg


create table bu_reg(
id int(64) comment 'id'
,channel varchar(32) comment '渠道'
,phone varchar(64) comment '手机号'
,user_name varchar(64) comment '用户名'
,passwd varchar(64) comment '密码'
,create_time datetime comment '创建时间'
,status int(16) comment '状态'
)



load data infile '/root/data/bu_reg.csv' into table bu_reg fields terminated by ',' optionally enclosed by '"' escaped by '"' lines terminated by '\r\n';



create table ods.bu_reg(
id string comment 'id'
,channel string comment '渠道'
,phone string comment '手机号'
,user_name string comment '用户名'
,passwd string comment '密码'
,create_time string comment '创建时间'
,status string comment '状态'
)
PARTITIONED by (dt string)


create table ods.bu_apply(
id string comment 'id'
,phone string comment '手机号'
,card string comment 'idcard'
,status string comment '状态'
,credit_amount string comment '授信金额'
,create_time string comment '创建时间'
)
PARTITIONED by (dt string)



create table ods.bu_loan(
id string comment 'id'
,user_id string comment '注册编号'
,apply_id string comment '申请编号'
,status string comment '状态'
,create_time string comment '创建时间'
)
PARTITIONED by (dt string)


create table ods.bu_pay(
id string comment 'id'
,user_id string comment '注册编号'
,apply_id string comment '申请编号'
,loan_id string comment '放款编号'
,amount string comment '还款金额'
,create_time string comment '创建时间'
)
PARTITIONED by (dt string)


create table ods.bu_api(
id string comment 'id'
,apply_id string comment '申请编号'
,create_time string comment '创建时间'
,zm_score string comment '芝麻分'
,zf_score string comment '支付分'
,renhang string comment '人行信息(分)'
)
PARTITIONED by (dt string)

-- 加载数据到指定的分区里
load data local inpath '/root/data/bu_reg.csv' overwrite into table ods.bu_reg partition(dt=20210101);
load data local inpath '/root/data/bu_apply.csv' overwrite into table ods.bu_apply partition(dt=20210102);
load data local inpath '/root/data/bu_loan.csv' overwrite into table ods.bu_loan partition(dt=20210103);
load data local inpath '/root/data/bu_pay.csv' overwrite into table ods.bu_pay partition(dt=20210104);
load data local inpath '/root/data/bu_api.csv' overwrite into table ods.bu_api partition(dt=20210102);


-- 修改hive的字段分隔符
 alter table ods.bu_reg set serdeproperties('field.delim'=',');
 alter table ods.bu_apply set serdeproperties('field.delim'=',');
 alter table ods.bu_loan set serdeproperties('field.delim'=',');
 alter table ods.bu_pay set serdeproperties('field.delim'=',');
 alter table ods.bu_api set serdeproperties('field.delim'='\t');

-----------------------------------------
create table dwd.dwd_bu_reg like ods.bu_reg;
create table dwd.dwd_bu_apply like ods.bu_apply;
create table dwd.dwd_bu_loan like ods.bu_loan;
create table dwd.dwd_bu_pay like ods.bu_pay;
create table dwd.dwd_bu_api like ods.bu_api;	


-- 使用等于的方式删除分区
alter table ods.bu_reg drop partition(dt=20210405);
alter table ods.bu_apply drop partition(dt=20210405);
alter table ods.bu_loan drop partition(dt=20210405);
alter table ods.bu_pay drop partition(dt=20210405);
alter table ods.bu_api drop partition(dt=20210405);


load data local inpath '/root/data/bu_reg_20210102.csv' overwrite into table ods.bu_reg partition(dt=20210102);

-- 使用非等于的方式删除分区
alter table ods.bu_reg drop partition(dt>20210102);
alter table ods.bu_apply drop partition(dt!=20210102);
alter table ods.bu_loan drop partition(dt!=20210103);
alter table ods.bu_pay drop partition(dt!=20210104);
alter table ods.bu_api drop partition(dt!=20210102);

-- 查看表的分区-已经存在的分区
show partitions ods.bu_reg ;



-- dwd 0102的分区应该保存的是截止这天的所有数据
-- 方案1 全汇总
insert overwrite table dwd.dwd_bu_reg partition(dt=20210102)
select id,channel,phone,user_name,passwd,create_time,status from ods.bu_reg 

-- 方案2 累进

insert overwrite table dwd.dwd_bu_reg partition(dt=20210101)
select id,channel,phone,user_name,passwd,create_time,status from ods.bu_reg where dt=20210101


insert overwrite table dwd.dwd_bu_reg partition(dt=20210102)
select id,channel,phone,user_name,passwd,create_time,status from ods.bu_reg where dt=20210102
union all
select id,channel,phone,user_name,passwd,create_time,status from dwd.dwd_bu_reg where dt=20210101

# 如何不写这上面些字段呢？
-- 开启Hive正则表达式的支持 || 退出当前终端失效
set hive.support.quoted.identifiers=None;
insert overwrite table dwd.dwd_bu_reg partition(dt=20210101)
select `(dt)?+.+` from ods.bu_reg where dt=20210101

insert overwrite table dwd.dwd_bu_reg partition(dt=20210102)
select `(dt)?+.+` from ods.bu_reg where dt=20210102
union all
select `(dt)?+.+` from dwd.dwd_bu_reg where dt=20210101

------------------------------------------------------------------------------
# 通过hive -f 来执行一些SQL脚本
# 无参数的脚本
hive -f  jingrong_dwd.sql

# 有参数的脚本
hive -hivevar dt=20210103 -hivevar yesterday=20210102 -f  jingrong_dwd_wrok.sql

# 如何减少参数
#1 在传递前处理，在shell中处理
# jingrong_dwd_wrok.sh

dt=$1 # 接受脚本传入的第一个参数
yesterday=`date -d "${dt} -1 day" +%Y%m%d`
hive -hivevar dt=${dt} -hivevar yesterday=${yesterday} -f  jingrong_dwd_wrok.sql

#2 在SQL中处理
# 略

-- 通过hive -e 来执行一段代码
hive -e 'select * from dwd_bu_pay whre dt=20210104'

# DWS数据聚合

create table dws.apply_info(
id string comment 'id'
,phone string comment '手机号'
,card string comment 'idcard'
,status string comment '状态'
,credit_amount string comment '授信金额'
,day string comment '日期'
,tm string comment '时间'
,zm_score string comment '芝麻分'
,zf_score string comment '支付分'
,renhang string comment '人行Info'
)
PARTITIONED by (dt string)


insert overwrite table dws.apply_info partition(dt=20210104)
select 
a.id
,a.phone
,a.card
,a.status
,a.credit_amount
,regexp_replace(substr(a.create_time,1,10),'/','') as day
,substr(a.create_time,12,8) as day
,b.zm_score
,b.zf_score
,b.renhang
from (select * from dwd.dwd_bu_apply where dt=20210104 ) a 
left join 
(select * from dwd.dwd_bu_api where dt=20210104 ) b 
on a.id = b.apply_id



# DWA层

模拟需求：
	1. 我想看每天的注册人数，申请人数，授信人数
	2. 我想看注册申请转化率，申请授信转化率
	3. 授信用户的芝麻平均分，人行曾逾期用户的比例

-- 需求12：
-- 注册人数
select 
regexp_replace(substr(create_time,1,10),'/','') as day
,count(id) as cnt
from dwd.dwd_bu_reg where dt=20210104
group by regexp_replace(substr(create_time,1,10),'/','')



-- 申请人数
select 
day
,count(id) as cnt
from dws.apply_info where dt=20210104
group by day
-- 授信人数
select 
day
,count(id) as cnt
from dws.apply_info where dt=20210104 and status = 1
group by day

#-- 合并上述SQL
select 
day
,count(id) as apply_cnt
,count(case when status = 1 then id else null end) as pass_cnt
,count(case when status = 1 then id else null end)/count(id) as pass_rate
from dws.apply_info where dt=20210104
group by day

#-- 进一步合并注册申请的SQL
insert overwrite table dwa.bu_funnel partition(dt=20210104)
select
a.day
,nvl(b.reg_cnt,0)
,nvl(a.apply_cnt,0)
,nvl(a.pass_cnt,0)
,round(a.apply_cnt/b.reg_cnt,2) as rp_rate
,round(a.pass_cnt/a.apply_cnt,2) as pass_rate
from
(select 
day
,count(id) as apply_cnt
,count(case when status = 1 then id else null end) as pass_cnt
from dws.apply_info where dt=20210104
group by day ) a
left join
(select 
regexp_replace(substr(create_time,1,10),'/','') as day
,count(id) as reg_cnt
from dwd.dwd_bu_reg where dt=20210104
group by regexp_replace(substr(create_time,1,10),'/','') ) b
on a.day = b.day

-- 需求12汇总表
create table dwa.bu_funnel(
day string comment '日期'
,reg_cnt string comment '注册人数'
,apply_cnt string comment '申请人数'
,pass_cnt string comment '授信人数'
,rp_rate string comment '注册申请通过率'
,pass_rate string comment '申请授信通过率'
)
PARTITIONED by (dt string)

-- 需求3
select 
day
-- ,count(id) as pass_cnt
,round(avg(zm_score),2) as agv_zm
-- ,sum(get_jsonobject(renhang,'$.overdue')) as overdue_cnt
,round(sum(get_json_object(renhang,'$.overdue'))/count(id) ,2) as overdue_rate
from dws.apply_info where dt=20210104 and status = 1
group by day