--整理市12-15年每天流感例数
----------------------------------------------##社保14-15年一年数据----------------------------------------------------
--观察hmrpt_xiamen1415_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen1415_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen1415_fwa_diagnose_code_desc;
--1 32298 32298
--上述表diagnose_code唯一
--hmrpt_xiamen1415_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen1415_fwa_bill_info a left join *****159_ind.hmrpt_xiamen1415_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;
--整理date为yyyymmdd 八位整数格式;
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen1415_lg1;
create table *****159_ind.xiamen1415_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen1415_lg a;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen1415_lg1
group by visit_no;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg_tj1
as select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen1415_lg_tj
group by date1
order by date1;
select * from *****159_ind.xiamen1415_lg_tj1;
--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen1415_lg_tj2
as select * from *****159_ind.xiamen1415_lg_tj1
where date1>=20140702 and date1<=20150811;
select * from *****159_ind.xiamen1415_lg_tj2;
----------------------------------------------社保13年一年数据----------------------------------------------------
--观察hmrpt_xiamen13_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen13_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc;
--1 32298 32298
--上述表diagnose_code唯一
--hmrpt_xiamen13_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
drop table *****159_ind.xiamen13_lg;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
set mapred.job.queue.name=queue_0539_01;
create table *****159_ind.xiamen13_lg
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen13_fwa_bill_info a left join *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;
--整理date为yyyymmdd 八位整数格式;
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen13_lg1;
create table *****159_ind.xiamen13_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen13_lg a;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen13_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen13_lg1
group by visit_no;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen13_lg_tj1
as select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen13_lg_tj
group by date1
order by date1;
select * from *****159_ind.xiamen13_lg_tj1;
--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen13_lg_tj2
as select * from *****159_ind.xiamen13_lg_tj1
where date1>=20130702 and date1<=20140630;
select * from *****159_ind.xiamen13_lg_tj2;
desc formatted *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;;
select * from *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;
----------------------------------------------社保12一年数据----------------------------------------------------
--观察hmrpt_xiamen12_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen12_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;
--1 0 0
--上述表无数据,用13年代替
--hmrpt_xiamen12_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen12_fwa_bill_info a left join *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;
--整理date为yyyymmdd 八位整数格式;
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen12_lg1;
create table *****159_ind.xiamen12_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen12_lg a;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen12_lg1
group by visit_no;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg_tj1
as select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen12_lg_tj
group by date1
order by date1;
select * from *****159_ind.xiamen12_lg_tj1;
--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen12_lg_tj2
as select * from *****159_ind.xiamen12_lg_tj1
where date1>=20120703 and date1<=20130630;
select * from *****159_ind.xiamen12_lg_tj2;
---所有表纵向合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen_all_lg_tj
as select * from *****159_ind.xiamen12_lg_tj2
union all select * from *****159_ind.xiamen13_lg_tj2
union all select * from *****159_ind.xiamen1415_lg_tj2;
select * from *****159_ind.xiamen_all_lg_tj;
----------------------------------------------##社保14-15年一年数据----------------------------------------------------
--观察hmrpt_xiamen1415_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen1415_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen1415_fwa_diagnose_code_desc;
--1 32298 32298
--上述表diagnose_code唯一
--hmrpt_xiamen1415_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen1415_fwa_bill_info a left join *****159_ind.hmrpt_xiamen1415_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;
--整理date为yyyymmdd 八位整数格式;
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen1415_lg1;
create table *****159_ind.xiamen1415_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen1415_lg a;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen1415_lg1
group by visit_no;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen1415_lg_tj1
as select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen1415_lg_tj
group by date1
order by date1;
select * from *****159_ind.xiamen1415_lg_tj1;
--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen1415_lg_tj2
as select * from *****159_ind.xiamen1415_lg_tj1
where date1>=20140702 and date1<=20150811;
select * from *****159_ind.xiamen1415_lg_tj2;
----------------------------------------------社保13年一年数据----------------------------------------------------
--观察hmrpt_xiamen13_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen13_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc;
--1 32298 32298
--上述表diagnose_code唯一
--hmrpt_xiamen13_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
drop table *****159_ind.xiamen13_lg;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
set mapred.job.queue.name=queue_0539_01;
create table *****159_ind.xiamen13_lg
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen13_fwa_bill_info a left join *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;
--整理date为yyyymmdd 八位整数格式;
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen13_lg1;
create table *****159_ind.xiamen13_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen13_lg a;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen13_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen13_lg1
group by visit_no;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen13_lg_tj1
as select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen13_lg_tj
group by date1
order by date1;
select * from *****159_ind.xiamen13_lg_tj1;
--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen13_lg_tj2
as select * from *****159_ind.xiamen13_lg_tj1
where date1>=20130702 and date1<=20140630;
select * from *****159_ind.xiamen13_lg_tj2;
desc formatted *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;;
select * from *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;
----------------------------------------------社保12一年数据----------------------------------------------------
--观察hmrpt_xiamen12_fwa_diagnose_code_desc表中diagnose_code是否唯一,不唯一需要去重,去重后与hmrpt_xiamen12_fwa_bill_info合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
select count(1) as m,count(distinct diagnose_code) as n from *****159_ind.hmrpt_xiamen12_fwa_diagnose_code_desc;
--1 0 0
--上述表无数据,用13年代替
--hmrpt_xiamen12_fwa_bill_info中只有diagnose_code,向表中添加diagnose_code_name
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg
as select a.bill_no,a.pc_no,a.visit_no, case when (a.in_hosp_date is not null and a.in_hosp_date!="") then a.in_hosp_date else a.settle_date end as date,a.medical_org_name, a.diagnose_code, b.diagnose_code_name from *****159_ind.hmrpt_xiamen12_fwa_bill_info a left join *****159_ind.hmrpt_xiamen13_fwa_diagnose_code_desc b
on a.diagnose_code=b.diagnose_code;
--整理date为yyyymmdd 八位整数格式;
--提取icd-10编码点号前部分
--定义流感,icd编码为J10,J11或者diagnose_code_name包含流感,流行性感冒
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
drop table *****159_ind.xiamen12_lg1;
create table *****159_ind.xiamen12_lg1
as select a.bill_no,a.pc_no,a.visit_no, year(to_date(date))*10000+month(to_date(date))*100+day(to_date(date)) as date1,a.medical_org_name, a.diagnose_code,case when split(diagnose_code,"\\.")[0] in ("J10","J11") or diagnose_code_name rlike '.*(流感|流行性感冒).*' then 1 else 0 end as lg,
diagnose_code_name from *****159_ind.xiamen12_lg a;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg_tj
as select visit_no, min(date1) as date1, case when sum(lg)>0 then 1 else 0 end as lg from *****159_ind.xiamen12_lg1
group by visit_no;
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen12_lg_tj1
as select date1, sum(lg) as lg_n, count(1) as visit_n from *****159_ind.xiamen12_lg_tj
group by date1
order by date1;
select * from *****159_ind.xiamen12_lg_tj1;
--观察数据后,剔除不正常的日期
create table *****159_ind.xiamen12_lg_tj2
as select * from *****159_ind.xiamen12_lg_tj1
where date1>=20120703 and date1<=20130630;
select * from *****159_ind.xiamen12_lg_tj2;
---所有表纵向合并
set mapred.job.queue.name=queue03;
set hive.auto.convert.join=true;
set hive.exec.parallel=true;
create table *****159_ind.xiamen_all_lg_tj
as select * from *****159_ind.xiamen12_lg_tj2
union all select * from *****159_ind.xiamen13_lg_tj2
union all select * from *****159_ind.xiamen1415_lg_tj2;
select * from *****159_ind.xiamen_all_lg_tj;