sql4学习

--整理市12-15年每天流感例数


----------------------------------------------##社保14-15年一年数据----------------------------------------------------


--观察hmrpt_xiamen1415_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen1415_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen1415_fwa_diagnose_code_desc;
--1 32298 32298


--上述表diagnose_code唯一
--hmrpt_xiamen1415_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg 
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen1415_fwa_bill_info a left join *****159_ind.hmrpt_xiamen1415_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;


--整理date为yyyymmdd 八位整数格式; 
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen1415_lg1;
create table *****159_ind.xiamen1415_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen1415_lg a;


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen1415_lg1
group by visit_no;


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg_tj1
as  select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen1415_lg_tj
group by date1
order by date1;


select * from *****159_ind.xiamen1415_lg_tj1;


--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen1415_lg_tj2
as select * from *****159_ind.xiamen1415_lg_tj1 
where date1>=20140702  and date1<=20150811;


select * from *****159_ind.xiamen1415_lg_tj2;








----------------------------------------------社保13年一年数据----------------------------------------------------


--观察hmrpt_xiamen13_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen13_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc;
--1 32298 32298


--上述表diagnose_code唯一
--hmrpt_xiamen13_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
drop table *****159_ind.xiamen13_lg;


set hive.auto.convert.join=true;
set hive.exec.parallel=true;
set mapred.job.queue.name=queue_0539_01;
create table *****159_ind.xiamen13_lg 
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen13_fwa_bill_info a left join *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;


--整理date为yyyymmdd 八位整数格式; 
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen13_lg1;
create table *****159_ind.xiamen13_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen13_lg a;


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen13_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen13_lg1
group by visit_no;


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen13_lg_tj1
as  select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen13_lg_tj
group by date1
order by date1;


select * from *****159_ind.xiamen13_lg_tj1;


--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen13_lg_tj2
as select * from *****159_ind.xiamen13_lg_tj1 
where date1>=20130702  and date1<=20140630;


select * from *****159_ind.xiamen13_lg_tj2;




desc formatted *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;;


select * from *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;








----------------------------------------------社保12一年数据----------------------------------------------------


--观察hmrpt_xiamen12_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen12_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;
--1 0 0


--上述表无数据,用13年代替
--hmrpt_xiamen12_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg 
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen12_fwa_bill_info a left join *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;


--整理date为yyyymmdd 八位整数格式; 
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen12_lg1;
create table *****159_ind.xiamen12_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen12_lg a;


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen12_lg1
group by visit_no;


set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg_tj1
as  select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen12_lg_tj
group by date1
order by date1;


select * from *****159_ind.xiamen12_lg_tj1;


--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen12_lg_tj2
as select * from *****159_ind.xiamen12_lg_tj1 
where date1>=20120703  and date1<=20130630;


select * from *****159_ind.xiamen12_lg_tj2;






---所有表纵向合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen_all_lg_tj
as select * from *****159_ind.xiamen12_lg_tj2
union all select * from *****159_ind.xiamen13_lg_tj2
union all select * from *****159_ind.xiamen1415_lg_tj2;


select * from *****159_ind.xiamen_all_lg_tj;
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值