关闭

天池大数据比赛,菜鸟仓库比赛,御膳房操作

标签: 天池大数据比赛菜鸟仓库比赛御膳房操作
896人阅读 评论(0) 收藏 举报
分类:
御膳房--操作
表名 描述 来源 所属包 所属项目 操作
item_feature 商品粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
config     每个商品在全国和分仓区域的补少... 天池 查看包 tianchi_data (tianchi_data) 已授权
item_store_feature 商品和分仓区域粒度相关特征 天池 查看包 tianchi_data (tianchi_data) 已授权
数据开发-工作流-工作流节点
select 
count(*)
from tianchi_data.config;


select 
*
from tianchi_data.config limit 10;


创建结果输出的表target
create table if not  exists target (
item_id bigint,
store_code string,
target Double); 


表管理-筛选-输入target-


创建分仓结果输出的测试表target2
create table if not  exists target2 (
item_id bigint,
store_code string,
target Double); 
把查询结果保存到表
insert overwrite table target2 
select item_id,store_code,qty_alipay_njhs from tianchi_data.item_store_feature limit 10;
查询表结构
desc tianchi_data.item_feature
desc tianchi_data.item_store_feature
查询新建的表
select 
item_id, 
store_code, 
target 
from target2;


删除表中所有数据
truncate  table  target2


创建全国结果输出的测试表target3
create table if not  exists target3 (
item_id bigint,
target Double); 
统计前两周的和——查询
insert overwrite table target3
select item_id,sum(qty_alipay_njhs) from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id;
insert overwrite table target2
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;


实验数据操作表
create table if not  exists test1 (
item_id bigint,
store_code string,
target Double); 


Case when 的使用方法 
--简单Case函数
CASE sex
WHEN '1' THEN '男'
WHEN '2' THEN '女'
ELSE '其他' END
--Case搜索函数
CASE WHEN sex = '1' THEN '男'
WHEN sex = '2' THEN '女'
ELSE '其他' END
---case a when b ---如果a=b返回then后面的结果
insert overwrite table test1
select item_id,case 100 when 100 then 'all' end ,target from target3 ;
查看
select count(*) from  target 
1389442
1189442
将test1数据追加到target2 
insert into table target2 
select item_id,store_code,target from test1
测试
select item_id,store_code,target from target2 where item_id=4
生成最终结果
insert into table target
select item_id,store_code,target from target2


UNION 操作符用于合并两个或多个 SELECT 语句的结果集。如果允许重复的值,请使用 UNION ALL
SELECT column_name(s) FROM table_name1
UNION ALL
SELECT column_name(s) FROM table_name2




mvn打包进入项目目录执行命令D:\eclipseHadoop1x\hadoop-test-indigoVersion-space\TianChiMapreduce 
mvn clean package


配置请全部在base.mapred.xml中完成
MapOnly程序在Map阶段直接针对每一条输入记录进行处理并输出,后续无Combiner和Reducer。要编写MapOnly程序,只要做如下两点即可:


将base.mapred.xml中的Combiner和Reducer注释或删除
Mapper中要直接输出outputRecord,不要像普通MR程序输出Map的中间结果
如何读取本地文件


ODPS的MR程序不允许直接使用本地IO,如有资源文件,可放在src/main/resources/下,然后通过TaskContext的readResourceFileAsStream方法读取。
示例:
// 假设资源文件为src/main/resources/data.txt
String resourceFileName = "src/main/resources/data.txt";
BufferedInputStream bis = new BufferedInputStream(this.getClass().getClassLoader().getResourceAsStream(resourceFileName));
BufferedReader = new BufferedReader(new InputStreamReader(bis));


实验数据操作表
create table if not  exists test2 (
item_id bigint,
store_code string,
target Double); 
增加测试数据
insert into table test2
select item_id,store_code,target from target limit 20
输出结果
create table if not  exists result (
str1 string,
str2 string); 


参考示例
drop table if exists result;
create table if not  exists result (str1 string,str2 string); 


insert into table result 


select 
    myudf(item_id) as (item_id) 
from 
    (
    select 
        * 
    from test2 tb2 
    where item_id=4
    ) tb1;
参考示例
SELECT 
    myudf(user_id,item_id,behavior_type,time,'2014-12-18',1) AS (user_id,item_id,time,feature) //返回的结果重命--方便插入其他表字段对应
FROM 
    (
    SELECT 
        * 
    FROM 
        t_mj_p_user tb2   //tb2必须有防止报错
    WHERE 
        time < '2014-12-18' 
    DISTRIBUTE BY user_id,item_id 
    SORT BY user_id,item_id,time DESC
    ) tb1;  //tb1必须有防止报错


create table if not  exists cn_submit (
item_id bigint,
store_code string,
target Double); 
insert into table cn_submit
select * from target;




create table if not  exists all_feature2 (
item_id bigint,
store_code string,
1w Double,
2w Double,
3w Double,
1m Double,
2m Double,
1d Double,
2d Double,
3d Double); 




insert into table all_feature2 
select item_id,case 100 when 100 then 'all' end as store_code,sum(dd),sum(dd2),sum(dd3),sum(dd4),sum(dd5),sum(dd6),sum(dd7),sum(dd8) from (
        select item_id,sum(qty_alipay_njhs) as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151221' and thedate <='20151227' group by item_id 
        union all
        select item_id,case when 1=1 then 0 end as dd,sum(qty_alipay_njhs) as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151214' and thedate <='20151227' group by item_id 
union all
        select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,sum(qty_alipay_njhs) as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151207' and thedate <='20151227' group by item_id 
union all
        select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,sum(qty_alipay_njhs) as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151130' and thedate <='20151227' group by item_id 
union all
       select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,sum(qty_alipay_njhs) as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate >='20151102' and thedate <='20151227' group by item_id 
union all
        select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,sum(qty_alipay_njhs) as dd6,case when 1=1 then 0 end as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where  thedate ='20151227' group by item_id 
union all
        select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,sum(qty_alipay_njhs) as dd7,case when 1=1 then 0 end as dd8 from tianchi_data.item_feature where thedate ='20151226' group by item_id 
union all
        select item_id,case when 1=1 then 0 end as dd,case when 1=1 then 0 end as dd2 ,case when 1=1 then 0 end as dd3,case when 1=1 then 0 end as dd4,case when 1=1 then 0 end as dd5,case when 1=1 then 0 end as dd6,case when 1=1 then 0 end as dd7,sum(qty_alipay_njhs) as dd8 from tianchi_data.item_feature where thedate ='20151225' group by item_id )
        t  group by item_id;

线性回归预测模型存放的地址
create table if not  exists test3 (
item_id bigint,
store_code string,
target Double); 

ceil取上整函数  floor向下取整  log取对数
insert into table test3
select item_id,store_code,floor(prediction_score) from test4;


insert into table test3
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;


truncate  table  cn_submit;
insert into table cn_submit
select * from test3;


创建时间列表
create table if not  exists datalist as
SELECT distinct(thedate)   FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
清楚表重新生成
truncate  table  datalist;
insert into table datalist
SELECT distinct(thedate)   FROM tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227' order by thedate limit 500;
查询
select * from datalist;
create table if not  exists datalist2 as
select thedate ,myudf() from datalist;
select * from datalist2;
创建要连接的表
create table if not  exists item_feature as 
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate >='20141020' and thedate <='20151227'
表连接
create table if not  exists item_feature2 as 
select /* + mapjoin(a) */
a._c1 as tab,
        b.item_id,
b.thedate,
        b.qty_alipay_njhs
    from datalist2 a join item_feature b
    on a.thedate = b.thedate;

select * from item_feature2 where item_id='222784' order by thedate
按ceiling(tab/14)分组--31维度的特征
create table if not  exists item_feature4 as 
select item_id,ceil(tab/14) as tab1,sum(qty_alipay_njhs) as sum1 from item_feature2 group by item_id,ceil(tab/14)
特征建立组
create table if not  exists item_feature5 as 


create table if not  exists test3_9  as
select item_id,store_code,target from test3 order by item_id;


truncate  table  cn_submit;
insert into table cn_submit
select * from test3_9;


insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;


模型组合
create table if not  exists test3_91  as
select item_id,case 100 when 100 then 'all' end as store_code,floor(0.9*target) from target3;
truncate  table  cn_submit;
insert into table cn_submit
select * from test3_9;


insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from tianchi_data.item_store_feature where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;


将全国最大值缩小200  总结果1389442个  max1结果1389435个
少7个值
create table if not  exists max1  as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not  exists max1_7  as
select * from target2 where item_id ='985273';
将其中6个放入max1  结果1389441个
insert into table max1 
select * from  max1_7 where store_code not in ('all');


select count(*) from  max1
将最大值减去200
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';


提交结果
truncate  table  cn_submit;


insert into table cn_submit
select * from max1;


insert into table cn_submit
select item_id,store_code,target-200 as target from max1_7 where item_id ='985273' and store_code='all';


查看特征得知受12.25影响特别大
'985273'




将全国最大值0.97 总结果1389442个  max1结果1389435个
少7个值
create table if not  exists max2  as
select * from target2 where item_id not in ('985273') ;
将七个值取出
create table if not  exists max2_7  as
select * from target2 where item_id ='985273';
提交结果
truncate  table  cn_submit;


insert into table cn_submit
select * from max2;


insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max2_7;




查看特征得知受12.25影响特别大  3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not  exists top50x  as
select item_id,store_code,2w  from all_feature2 where 3d > (1d+2d) order by 2w desc limit 50


create table if not  exists max50  as
select * from target2 where item_id not in (
select item_id from top50x
) ;


50x7=350  1389442-350=1389092


将350个值取出
create table if not  exists max50_7  as
select * from target2 where item_id in (select item_id from top50x);


提交结果
truncate  table  cn_submit;


insert into table cn_submit
select * from max50;


insert into table cn_submit
select item_id,store_code,floor(0.97*target) as target from max50_7;




查看特征得知受12.25影响特别大  3d > (1d+2d)==>3d > 2*(1d+2d)
create table if not  exists top1000x  as
select item_id,store_code,2w  from all_feature2 where 3d > (1d+2d) order by 2w desc limit 1000;


create table if not  exists max1000  as
select * from target2 where item_id not in (
select item_id from top1000x
) ;


将值取出
create table if not  exists max1000_7  as
select * from target2 where item_id in (select item_id from top1000x);


提交结果
truncate  table  cn_submit;


insert into table cn_submit
select * from max1000;


insert into table cn_submit
select item_id,store_code,floor(0.95*target) as target from max1000_7;




平滑12.25后提交前两周和
create table if not  exists pinghua3 as
select item_id,qty_alipay_njhs,thedate from tianchi_data.item_feature where thedate not in ('20151225');
insert into table pinghua3
select item_id,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_feature where thedate ='20151226';
create table if not  exists pinghua2 as
select item_id,store_code,qty_alipay_njhs,thedate from tianchi_data.item_store_feature  where thedate not in ('20151225');
insert into table pinghua2
select item_id,store_code,qty_alipay_njhs,case when 1=1 then '20151225' end as thedate from tianchi_data.item_store_feature where thedate ='20151226';


truncate  table  cn_submit;
insert into table cn_submit
select item_id,store_code,sum(qty_alipay_njhs) from pinghua2 where thedate>='20151214' and thedate<='20151227' group by item_id,store_code;
insert into table cn_submit
select item_id,case when 1=1 then 'all' end as store_code,sum(qty_alipay_njhs) from pinghua3 where thedate>='20151214' and thedate<='20151227' group by item_id;
0
0

查看评论
* 以上用户言论只代表其个人观点,不代表CSDN网站的观点或立场
    个人资料
    • 访问:138521次
    • 积分:2574
    • 等级:
    • 排名:第14919名
    • 原创:159篇
    • 转载:14篇
    • 译文:0篇
    • 评论:37条
    github地址
    https://github.com/lytforgood
    最新评论