主要参考:
https://book.itxueyuan.com/3b7D/open
https://book.itxueyuan.com/3b7D/6AGg
基础数据
# cat /root/xytest/pig/data/demodata
xiaoxiao,12,12.1
aaa,13,1.1
kjkj,12,12.1
ddf,19,12.8
groupby
grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);
grunt> B = group A by age;
grunt> dump B;
(12,{(kjkj,12,12.1),(xiaoxiao,12,12.1)})
(13,{(aaa,13,1.1)})
(19,{(ddf,19,12.8)})
group也支持多列group
grunt> D = group A by (age,name);
dump D;
((12,kjkj),{(kjkj,12,12.1)})
((12,xiaoxiao),{(xiaoxiao,12,12.1)})
((13,aaa),{(aaa,13,1.1)})
((19,ddf),{(ddf,19,12.8)})
============cogroup==================
数据:
[root@cdh1 data]# cat demodata
xiaoxiao,12,12.1f
aaa,13,1.1f
kjkj,12,12.1f
ddf,19,12.8f
[root@cdh1 data]# cat demodata2
xiaoxiao,99,aaaaaaaaaaaa
aaa,88,bbbbbbbbbbb
kjkj,77,ccccccccccc
ddf,66,dddddddddd
grunt> A = load '/root/xytest/pig/data/demodata' using PigStorage(',') as (name:chararray,age:int,gpa:float);
grunt> B = load '/root/xytest/pig/data/demodata2' using PigStorage(',') as (name:chararray,score:int,address:chararray);
grunt> C = cogroup A by name,B by name;
grunt> dump C;
运行结果:
(aaa,{(aaa,13,1.1)},{(aaa,88,bbbbbbbbbbb)})
(ddf,{(ddf,19,12.8)},{(ddf,66,dddddddddd)})
(kjkj,{(kjkj,12,12.1)},{(kjkj,77,ccccccccccc)})
(xiaoxiao,{(xiaoxiao,12,12.1)},{(xiaoxiao,99,aaaaaaaaaaaa)})
查看C的格式
grunt> describe C;
C: {group: chararray,A: {(name: chararray,age: int,gpa: float)},B: {(name: chararray,score: int,address: chararray)}}