1.创建并上传数据到HDFS
2.加载
records = LOAD 'hdfs://127.0.0.1:8020/pig/pigtestdata' AS (year:chararray,temperature:int,quality:int);
DUMP records;
DESCRIBE records
3.过滤
filter_records = FILTER records BY temperature >= 0 AND quality == 2;
DUMP filter_records
4.分组
group_records = GROUP records BY year;
DUMP group
DESCRIBE group_records
5.数据变换
max_temperature = FOREACH group_records GENERATE group ,MAX(records.temperature);
DUMP max_temperature