御膳房--操作
表名 描述 来源 所属包 所属项目 操作
item_feature 商品粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
config 每个商品在全国和分仓区域的补少... 天池 查看包 tianchi_data (tianchi_data) 已授权
item_store_feature 商品和分仓区域粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
数据开发-工作流-工作流节点
select
count(*)
from tianchi_data.config;
select
*
from tianchi_data.config limit 10;
创建结果输出的表target
create table if not exists target (
item_id bigint,
store_code string,
target Double);
表管理-筛选-输入target-
创建分仓结果输出的测试表target2
create table if not exists target2 (
item_id bigint,
store_code string,
target Double);
把查询结果保存到表
insert overwrite table target2
select item_id,store_code,qty_alipay_njhs from tianchi_data.item_store_feature limit 10;
查询表结构
desc tianchi_data.item_feature
desc tianchi_data.item_store_feature
查询新建的表
select
item_id,
store_code,
target
from target2;
删除表中所有数据
truncate table target2
创建全国结果输出的测试表target3
create table if not exists target3 (
item_id bigint,
target Double);
统计前两周的和——查询
insert overwrite table target3
select item_id,sum(qty_alipay_njhs) from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id;
insert overwrite table target2
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
实验数据操作表
create table if not exists test1 (
item_id bigint,
store_code string,
target Double);
Case when 的使用方法
--简单Case函数
CASE sex
WHEN '1' THEN '男'
WHEN '2' THEN '女'
ELSE '其他' END
--Case搜索函数
CASE WHEN sex = '1' THEN '男'
WHEN sex = '2' THEN '女'
ELSE '其他' END
---case a when b ---如果a=b返回then后面的结果
insert overwrite table test1
select item_id,case 100 when 100 then 'all' end ,target from target3 ;
查看
select count(*) from target
1389442
1189442
将test1数据追加到target2
insert into table target2
select item_id,store_code,target from test1
测试
select item_id,store_code,target from target2 where item_id=4
生成最终结果
insert into table target
select item_id,store_code,target from target2
UNION 操作符用于合并两个或多个 SELECT 语句的结果集。如果允许重复的值,请使用 UNION ALL
SELECT column_name(s) FROM table_name1
UNION ALL
SELECT column_name(s) FROM table_name2
mvn打包进入项目目录执行命令D:\eclipseHadoop1x\hadoop-test-indigoVersion-space\TianChiMapreduce
mvn clean package
配置请全部在base.mapred.xml中完成
MapOnly程序在Map阶段直接针对每一条输入记录进行处理并输出,后续无Combiner和Reducer。要编写MapOnly程序,只要做如下两点即可:
将base.mapred.xml中的Combiner和Reducer注释或删除
Mapper中要直接输出outputRecord,不要像普通MR程序输出Map的中间结果
如何读取本地文件
ODPS的MR程序不允许直接使用本地IO,如有资源文件,可放在src/main/resources/下,然后通过TaskContext的readResourceFileAsStream方法读取。
示例:
// 假设资源文件为src/main/resources/data.txt
String resourceFileName = "src/main/resources/data.txt";
BufferedInputStream bis = new BufferedInputStream(this.getClass().getClassLoader().getResourceAsStream(resourceFileName));
BufferedReader = new BufferedReader(new InputStreamReader(bis));
实验数据操作表
create table if not exists test2 (
item_id bigint,
store_code string,
target Double);
增加测试数据
insert into table test2
select item_id,store_code,target from target limit 20
输出结果
create table if not exists result (
str1 string,
str2 string);
参考示例
drop table if exists result;
create table if not exists result (str1 string,str2 string);
insert into table result
select
myudf(item_id) as (item_id)
from
(
select
*
from test2 tb2
where item_id=4
) tb1;
参考示例
SELECT
myudf(user_id,item_id,behavior_type,time,'2014-12-18',1) AS (user_id,item_id,time,feature) //返回的结果重命--方便插入其他表字段对应
FROM
(
SELECT
*
FROM
t_mj_p_user tb2 //tb2必须有防止报错
WHERE
time < '2014-12-18'
DISTRIBUTE BY user_id,item_id
SORT BY user_id,item_id,time DESC
) tb1; //tb1必须有防止报错
create table if not exists cn_submit (
item_id bigint,
store_code string,
target Double);
insert into table cn_submit
select * from target;
create table if not exists all_feature2 (
item_id bigint,
store_code string,
1w Double,
2w Double,
3w Double,
1m Double,
2m Double,
1d Double,
2d Double,
3d Double);
insert into table all_feature2
select item_id,case 100 when 100 then 'all' end as store_code,sum(dd),sum(dd2),sum(dd3),sum(dd4),sum(dd5),sum(dd6),sum(dd7),sum(dd8) from (
select item_id,sum(qty_alipay_njhs) as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151221' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,sum(qty_alipay_njhs) as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,sum(qty_alipay_njhs) as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151207' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,sum(qty_alipay_njhs) as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151130' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,sum(qty_alipay_njhs) as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151102' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,sum(qty_alipay_njhs) as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,sum(qty_alipay_njhs) as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151226' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,sum(qty_alipay_njhs) as dd8 from tianchi_data.item_feature where thedate ='20151225' group by item_id )
t group by item_id;
线性回归预测模型存放的地址
create table if not exists test3 (
item_id bigint,
store_code string,
target Double);
ceil取上整函数 floor向下取整 log取对数
insert into table test3
select item_id,store_code,floor(prediction_score) from test4;
insert into table test3
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
truncate table cn_submit;
insert into table cn_submit
select * from test3;
创建时间列表
create table if not exists datalist as
SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
清楚表重新生成
truncate table datalist;
insert into table datalist
SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
查询
select * from datalist;
create table if not exists datalist2 as
select thedate ,myudf() from datalist;
select * from datalist2;
创建要连接的表
create table if not exists item_feature as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227'
表连接
create table if not exists item_feature2 as
select /* + mapjoin(a) */
a._c1 as tab,
b.item_id,
b.thedate,
b.qty_alipay_njhs
from datalist2 a join item_feature b
on a.thedate = b.thedate;
select * from item_feature2 where item_id='222784' order by thedate
按ceiling(tab/14)分组--31维度的特征
create table if not exists item_feature4 as
select item_id,ceil(tab/14) as tab1,sum(qty_alipay_njhs) as sum1 from item_feature2 group by item_id,ceil(tab/14)
特征建立组
create table if not exists item_feature5 as
create table if not exists test3_9 as
select item_id,store_code,target from test3 order by item_id;
truncate table cn_submit;
insert into table cn_submit
select * from test3_9;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
模型组合
create table if not exists test3_91 as
select item_id,case 100 when 100 then 'all' end as store_code,floor(0.9*target) from target3;
truncate table cn_submit;
insert into table cn_submit
select * from test3_9;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
将全国最大值缩小200 总结果1389442个 max1结果1389435个
少7个值
create table if not exists max1 as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not exists max1_7 as
select * from target2 where item_id ='985273';
将其中6个放入max1 结果1389441个
insert into table max1
select * from max1_7 where store_code not in ('all');
select count(*) from max1
将最大值减去200
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max1;
insert into table cn_submit
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';
查看特征得知受12.25影响特别大
'985273'
将全国最大值0.97 总结果1389442个 max1结果1389435个
少7个值
create table if not exists max2 as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not exists max2_7 as
select * from target2 where item_id ='985273';
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max2;
insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max2_7;
查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not exists top50x as
select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 50
create table if not exists max50 as
select * from target2 where item_id not in (
select item_id from top50x
) ;
50x7=350 1389442-350=1389092
将350个值取出
create table if not exists max50_7 as
select * from target2 where item_id in (select item_id from top50x);
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max50;
insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max50_7;
查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not exists top1000x as
select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 1000;
create table if not exists max1000 as
select * from target2 where item_id not in (
select item_id from top1000x
) ;
将值取出
create table if not exists max1000_7 as
select * from target2 where item_id in (select item_id from top1000x);
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max1000;
insert into table cn_submit
select item_id,store_code,floor(0.95*target) as target from max1000_7;
平滑12.25后提交前两周和
create table if not exists pinghua3 as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate not in ('20151225');
insert into table pinghua3
select item_id,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_feature where thedate ='20151226';
create table if not exists pinghua2 as
select item_id,store_code,qty_alipay_njhs,thedate from tianchi_data.item_store_feature where thedate not in ('20151225');
insert into table pinghua2
select item_id,store_code,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_store_feature where thedate ='20151226';
truncate table cn_submit;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from pinghua2 where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
insert into table cn_submit
select item_id,case when 1=1 then 'all' end as store_code,sum(qty_alipay_njhs) from pinghua3 where thedate>='20151214' and thedate<='20151227' group by item_id;
表名 描述 来源 所属包 所属项目 操作
item_feature 商品粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
config 每个商品在全国和分仓区域的补少... 天池 查看包 tianchi_data (tianchi_data) 已授权
item_store_feature 商品和分仓区域粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
数据开发-工作流-工作流节点
select
count(*)
from tianchi_data.config;
select
*
from tianchi_data.config limit 10;
创建结果输出的表target
create table if not exists target (
item_id bigint,
store_code string,
target Double);
表管理-筛选-输入target-
创建分仓结果输出的测试表target2
create table if not exists target2 (
item_id bigint,
store_code string,
target Double);
把查询结果保存到表
insert overwrite table target2
select item_id,store_code,qty_alipay_njhs from tianchi_data.item_store_feature limit 10;
查询表结构
desc tianchi_data.item_feature
desc tianchi_data.item_store_feature
查询新建的表
select
item_id,
store_code,
target
from target2;
删除表中所有数据
truncate table target2
创建全国结果输出的测试表target3
create table if not exists target3 (
item_id bigint,
target Double);
统计前两周的和——查询
insert overwrite table target3
select item_id,sum(qty_alipay_njhs) from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id;
insert overwrite table target2
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
实验数据操作表
create table if not exists test1 (
item_id bigint,
store_code string,
target Double);
Case when 的使用方法
--简单Case函数
CASE sex
WHEN '1' THEN '男'
WHEN '2' THEN '女'
ELSE '其他' END
--Case搜索函数
CASE WHEN sex = '1' THEN '男'
WHEN sex = '2' THEN '女'
ELSE '其他' END
---case a when b ---如果a=b返回then后面的结果
insert overwrite table test1
select item_id,case 100 when 100 then 'all' end ,target from target3 ;
查看
select count(*) from target
1389442
1189442
将test1数据追加到target2
insert into table target2
select item_id,store_code,target from test1
测试
select item_id,store_code,target from target2 where item_id=4
生成最终结果
insert into table target
select item_id,store_code,target from target2
UNION 操作符用于合并两个或多个 SELECT 语句的结果集。如果允许重复的值,请使用 UNION ALL
SELECT column_name(s) FROM table_name1
UNION ALL
SELECT column_name(s) FROM table_name2
mvn打包进入项目目录执行命令D:\eclipseHadoop1x\hadoop-test-indigoVersion-space\TianChiMapreduce
mvn clean package
配置请全部在base.mapred.xml中完成
MapOnly程序在Map阶段直接针对每一条输入记录进行处理并输出,后续无Combiner和Reducer。要编写MapOnly程序,只要做如下两点即可:
将base.mapred.xml中的Combiner和Reducer注释或删除
Mapper中要直接输出outputRecord,不要像普通MR程序输出Map的中间结果
如何读取本地文件
ODPS的MR程序不允许直接使用本地IO,如有资源文件,可放在src/main/resources/下,然后通过TaskContext的readResourceFileAsStream方法读取。
示例:
// 假设资源文件为src/main/resources/data.txt
String resourceFileName = "src/main/resources/data.txt";
BufferedInputStream bis = new BufferedInputStream(this.getClass().getClassLoader().getResourceAsStream(resourceFileName));
BufferedReader = new BufferedReader(new InputStreamReader(bis));
实验数据操作表
create table if not exists test2 (
item_id bigint,
store_code string,
target Double);
增加测试数据
insert into table test2
select item_id,store_code,target from target limit 20
输出结果
create table if not exists result (
str1 string,
str2 string);
参考示例
drop table if exists result;
create table if not exists result (str1 string,str2 string);
insert into table result
select
myudf(item_id) as (item_id)
from
(
select
*
from test2 tb2
where item_id=4
) tb1;
参考示例
SELECT
myudf(user_id,item_id,behavior_type,time,'2014-12-18',1) AS (user_id,item_id,time,feature) //返回的结果重命--方便插入其他表字段对应
FROM
(
SELECT
*
FROM
t_mj_p_user tb2 //tb2必须有防止报错
WHERE
time < '2014-12-18'
DISTRIBUTE BY user_id,item_id
SORT BY user_id,item_id,time DESC
) tb1; //tb1必须有防止报错
create table if not exists cn_submit (
item_id bigint,
store_code string,
target Double);
insert into table cn_submit
select * from target;
create table if not exists all_feature2 (
item_id bigint,
store_code string,
1w Double,
2w Double,
3w Double,
1m Double,
2m Double,
1d Double,
2d Double,
3d Double);
insert into table all_feature2
select item_id,case 100 when 100 then 'all' end as store_code,sum(dd),sum(dd2),sum(dd3),sum(dd4),sum(dd5),sum(dd6),sum(dd7),sum(dd8) from (
select item_id,sum(qty_alipay_njhs) as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151221' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,sum(qty_alipay_njhs) as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,sum(qty_alipay_njhs) as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151207' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,sum(qty_alipay_njhs) as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151130' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,sum(qty_alipay_njhs) as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151102' and thedate <='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,sum(qty_alipay_njhs) as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151227' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,sum(qty_alipay_njhs) as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151226' group by item_id
union all
select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,sum(qty_alipay_njhs) as dd8 from tianchi_data.item_feature where thedate ='20151225' group by item_id )
t group by item_id;
线性回归预测模型存放的地址
create table if not exists test3 (
item_id bigint,
store_code string,
target Double);
ceil取上整函数 floor向下取整 log取对数
insert into table test3
select item_id,store_code,floor(prediction_score) from test4;
insert into table test3
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
truncate table cn_submit;
insert into table cn_submit
select * from test3;
创建时间列表
create table if not exists datalist as
SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
清楚表重新生成
truncate table datalist;
insert into table datalist
SELECT distinct(thedate) FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
查询
select * from datalist;
create table if not exists datalist2 as
select thedate ,myudf() from datalist;
select * from datalist2;
创建要连接的表
create table if not exists item_feature as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227'
表连接
create table if not exists item_feature2 as
select /* + mapjoin(a) */
a._c1 as tab,
b.item_id,
b.thedate,
b.qty_alipay_njhs
from datalist2 a join item_feature b
on a.thedate = b.thedate;
select * from item_feature2 where item_id='222784' order by thedate
按ceiling(tab/14)分组--31维度的特征
create table if not exists item_feature4 as
select item_id,ceil(tab/14) as tab1,sum(qty_alipay_njhs) as sum1 from item_feature2 group by item_id,ceil(tab/14)
特征建立组
create table if not exists item_feature5 as
create table if not exists test3_9 as
select item_id,store_code,target from test3 order by item_id;
truncate table cn_submit;
insert into table cn_submit
select * from test3_9;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
模型组合
create table if not exists test3_91 as
select item_id,case 100 when 100 then 'all' end as store_code,floor(0.9*target) from target3;
truncate table cn_submit;
insert into table cn_submit
select * from test3_9;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
将全国最大值缩小200 总结果1389442个 max1结果1389435个
少7个值
create table if not exists max1 as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not exists max1_7 as
select * from target2 where item_id ='985273';
将其中6个放入max1 结果1389441个
insert into table max1
select * from max1_7 where store_code not in ('all');
select count(*) from max1
将最大值减去200
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max1;
insert into table cn_submit
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';
查看特征得知受12.25影响特别大
'985273'
将全国最大值0.97 总结果1389442个 max1结果1389435个
少7个值
create table if not exists max2 as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not exists max2_7 as
select * from target2 where item_id ='985273';
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max2;
insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max2_7;
查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not exists top50x as
select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 50
create table if not exists max50 as
select * from target2 where item_id not in (
select item_id from top50x
) ;
50x7=350 1389442-350=1389092
将350个值取出
create table if not exists max50_7 as
select * from target2 where item_id in (select item_id from top50x);
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max50;
insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max50_7;
查看特征得知受12.25影响特别大 3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not exists top1000x as
select item_id,store_code,2w from all_feature2 where 3d > (1d+2d) order by 2w desc limit 1000;
create table if not exists max1000 as
select * from target2 where item_id not in (
select item_id from top1000x
) ;
将值取出
create table if not exists max1000_7 as
select * from target2 where item_id in (select item_id from top1000x);
提交结果
truncate table cn_submit;
insert into table cn_submit
select * from max1000;
insert into table cn_submit
select item_id,store_code,floor(0.95*target) as target from max1000_7;
平滑12.25后提交前两周和
create table if not exists pinghua3 as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate not in ('20151225');
insert into table pinghua3
select item_id,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_feature where thedate ='20151226';
create table if not exists pinghua2 as
select item_id,store_code,qty_alipay_njhs,thedate from tianchi_data.item_store_feature where thedate not in ('20151225');
insert into table pinghua2
select item_id,store_code,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_store_feature where thedate ='20151226';
truncate table cn_submit;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from pinghua2 where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
insert into table cn_submit
select item_id,case when 1=1 then 'all' end as store_code,sum(qty_alipay_njhs) from pinghua3 where thedate>='20151214' and thedate<='20151227' group by item_id;