使用Spark sql的shell转换Oracle sql的例子

使用Sparksql的shell转换Oracle的例子

//1.创建表
create table AUTO_PAID_CASE_TMP_01
(
branch_company_code VARCHAR(25),
policy_no VARCHAR(60),
product_code_detail VARCHAR(24),
accident_month VARCHAR(20),
paid_amount Double,
paid_insured Double,
paid_alae Double,
paid_amount_retain Double,
paid_insured_retain Double,
paid_alae_retain Double,
paid_num_noncancel Double,
paid_num_zero Double,
paid_num_cancel Double,
paid_num_noncancel_detail Double,
paid_num_zero_detail Double,
paid_num_cancel_detail Double,
estimate_amount Double,
estimate_amount_retain Double,
os_num Double,
os_num_detail Double
)row format delimited fields terminated by ‘,’ ;

//2创建表
create table AUTO_DL1_BASE_PREMIUM_TMP01
(
branch_company_code VARCHAR(21),
policy_no VARCHAR(60),
department_group_code VARCHAR(18),
department_code VARCHAR(24),
product_type VARCHAR(24),
estimate_product_type VARCHAR(500),
business_require_system2 VARCHAR(500),
product_code VARCHAR(24),
product_code_detail VARCHAR(24),
inception_month VARCHAR(6),
inquire_month VARCHAR(6),
selling_channel_type VARCHAR(6),
policy_type VARCHAR(3),
policy_status VARCHAR(15),
party_type CHAR(1),
premium_amount Double,
premium_amount_static Double,
premium_amount_retain Double,
premium_amount_retain_static Double,
premium_amount_withtax Double,
exposure Double,
exposure_detail Double,
gep Double,
nep Double,
ee Double,
ee_detail Double,
gep_withtax Double,
actuarial_usage_code VARCHAR(50)
)row format delimited fields terminated by ‘,’ ;

//3.导入hive数据
load data local inpath ‘/room/data/haha.csv’ overwrite into table AUTO_PAID_CASE_TMP_01;
(load data local inpath ‘文件路径’ overwrite into table 表名 ,其中overwrite是覆盖,不是追加!

//加上下面这句是导入本地文件的语句csv“,”隔开的形式。
//insert overwrite local directory “/tmp/out/”
//row format delimited fields terminated by “,”

//4.这是创建查询结果表正确格式
create table query_result as
select
auto_dl1_base_premium_tmp01.branch_company_code,
auto_dl1_base_premium_tmp01.policy_no,
auto_dl1_base_premium_tmp01.department_group_code,
auto_dl1_base_premium_tmp01.department_code,
auto_dl1_base_premium_tmp01.product_code,
auto_dl1_base_premium_tmp01.product_code_detail,
auto_dl1_base_premium_tmp01.inception_month,
auto_dl1_base_premium_tmp01.inquire_month,
auto_dl1_base_premium_tmp01.selling_channel_type,
auto_dl1_base_premium_tmp01.policy_type,
auto_dl1_base_premium_tmp01.policy_status,
auto_dl1_base_premium_tmp01.premium_amount,
auto_dl1_base_premium_tmp01.premium_amount_static,
auto_dl1_base_premium_tmp01.premium_amount_retain,
auto_dl1_base_premium_tmp01.premium_amount_retain_static,
auto_dl1_base_premium_tmp01.premium_amount_withtax,
auto_dl1_base_premium_tmp01.exposure,
auto_dl1_base_premium_tmp01.exposure_detail,
auto_dl1_base_premium_tmp01.gep,
auto_dl1_base_premium_tmp01.nep,
auto_dl1_base_premium_tmp01.ee,
auto_dl1_base_premium_tmp01.ee_detail,
auto_dl1_base_premium_tmp01.party_type,
auto_dl1_base_premium_tmp01.product_type,
auto_dl1_base_premium_tmp01.estimate_product_type,
auto_dl1_base_premium_tmp01.business_require_system2,

case when  first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_amount else 0 end paid_amount,

 case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_amount_retain else 0 end paid_amount_retain,

case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_insured else 0 end paid_insured,         

case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_alae else 0 end paid_alae,           

    case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_num_noncancel else 0 end paid_num_noncancel,       

    case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_num_zero else 0 end paid_num_zero,   

  case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_num_cancel else 0 end paid_num_cancel,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_num_noncancel_detail else 0 end paid_num_noncancel_detail,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_num_zero_detail else 0 end paid_num_zero_detail,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.paid_num_cancel_detail else 0 end paid_num_cancel_detail,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.estimate_amount else 0 end estimate_amount,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.estimate_amount_retain else 0 end estimate_amount_retain,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.os_num else 0 end os_num,

   case when first_value(auto_dl1_base_premium_tmp01.inception_month)
          over(partition by auto_dl1_base_premium_tmp01.branch_company_code,
               auto_dl1_base_premium_tmp01.policy_no,
               auto_dl1_base_premium_tmp01.product_code_detail,
               auto_dl1_base_premium_tmp01.inquire_month order by auto_dl1_base_premium_tmp01.inception_month)=auto_dl1_base_premium_tmp01.inception_month 
       then auto_paid_case_tmp_01.os_num_detail else 0 end os_num_detail,


   auto_dl1_base_premium_tmp01.gep_withtax,
   auto_dl1_base_premium_tmp01.actuarial_usage_code
    from auto_dl1_base_premium_tmp01 
    left join auto_paid_case_tmp_01 
      on auto_dl1_base_premium_tmp01.branch_company_code = auto_paid_case_tmp_01.branch_company_code
     and auto_dl1_base_premium_tmp01.product_code_detail = auto_paid_case_tmp_01.product_code_detail
     and auto_dl1_base_premium_tmp01.policy_no = auto_paid_case_tmp_01.policy_no
     and auto_dl1_base_premium_tmp01.inquire_month = auto_paid_case_tmp_01.accident_month;

结果:
用时Time taken: 43.05 seconds
17/08/29 19:29:25 INFO CliDriver: Time taken: 43.05 seconds

//5.查询结果语句sql
//
select substr(query_result.inquire_month,1,4),sum(query_result.premium_amount_static),sum(query_result.premium_amount_retain_static),sum(query_result.paid_amount)
,sum(query_result.paid_amount_retain),sum(query_result.estimate_amount),sum(query_result.estimate_amount_retain) from
query_result group by substr(query_result.inquire_month,1,4);

//结果如下:
2016 5.666219326377354E7 5.293598994840718E7 2.481366084999998E7 2.2746898331373088E7 1128048.84 1054172.1800000002
2017 3.9573576133207515E7 3.624134659292713E7 7865497.719999997 7087857.122956341 5014556.81 4547917.7
2014 2.0922988189999998E7 1.956592757000001E7 7445313.239999999 6525843.269999999 0.0 0.0
2013 1385740.8700000003 1014179.1199999993 99992.49 48603.060000000005 0.0 0.0
2015 4.0925074010000005E7 3.812092436400004E7 1.756812285E7 1.6017133919999989E7 37612.8 37612.8
Time taken: 4.11 seconds, Fetched 5 row(s)
17/08/30 11:31:10 INFO CliDriver: Time taken: 4.11 seconds, Fetched 5 row(s)

ps此次使用如下内存参数查询的指令:
./spark-sql –master yarn –driver-memory 4G –executor-memory 3G –num-executors 4

经过多次调优,现有机器条件下(三台内存,11G,8G,8G)下以及3T硬盘空间,能够达到的最好效果,数据源于保险行业真实数据,不方便透露,260万条数据与64万条数据的关联产生结果表,速度还是非常不错的。
结果

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值