pig编程应用

查询气温

(1)气温数据文件temperature.txt的内容

hadoop@ddai-desktop:~$ vim temperature.txt

hadoop@ddai-desktop:~$ more temperature.txt 
1990 	21
1990 	18
1991 	21
1992 	30
1992 	999
1990 	23

(2)查找每年最高气温

grunt> copyFromLocal temperature.txt /test
grunt> records = load '/test/temperature.txt' USING PigStorage(' ') 
as (year: chararray,temperature:int);
grunt> valid_records = filter records by temperature!=999;
grunt> grouped_records = group valid_records by year;
grunt> max_temperature = foreach grouped_records 
generate group,MAX(valid_records.temperature);
grunt> dump max_temperature;

在这里插入图片描述
或者脚本来查询

hadoop@ddai-desktop:~$ vim max_temp.pig

records = load '/test/temperature.txt' USING PigStorage(' ') 
as (year: chararray,temperature:int);
valid_records = filter records by temperature!=999;
grouped_records = group valid_records by year;
max_temperature = foreach grouped_records 
generate group,MAX(valid_records.temperature);
dump max_temperature;
hadoop@ddai-desktop:~$ pig max_temp.pig 

在这里插入图片描述

编写用户自定义函数

编写自定义过滤函数

打开Ecplise工具,新建“Map/Reduce Project”项目“TempFilter”,新建“IsValidTemp”类,添加jar包。

在这里插入图片描述

import java.io.IOException;
import org.apache.pig.FilterFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.Tuple;
public class IsValidTemp extends FilterFunc {
    @Override
    public Boolean exec(Tuple tuple) throws IOException {
        if(tuple ==null ||tuple.size()==0)
        return false;
        try {
            Object obj=tuple.get(0);
            if (obj==null)
                return false;
            int temperature=(Integer)obj;
            return temperature!=999;
        }catch(ExecException e) {
            throw new IOException(e);
        }
    }
}

右击“TempFilter”→“src”→“IsValidTemp”,选择“Export”→“java”→“JAR file”,输入“IsValidTemp”,导出jar包
在这里插入图片描述
运行自定义过滤函数包

grunt> copyFromLocal /home/hadoop/workspace/IsValidTemp.jar /test
grunt> register hdfs://ddai-master:9000/test/IsValidTemp.jar;
grunt> records = load '/test/temperature.txt' USING PigStorage(' ') as (year: chararray,temperature:int);
grunt> valid_records = filter records by IsValidTemp(temperature);
grunt> dump valid_records;

在这里插入图片描述

编写自定义运算函数

打开Ecplise工具,新建“Map/Reduce Project”项目“EvalTemp”,新建“EvalTemp”类,添加jar包(同上)

import java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
public class EvalTemp extends EvalFunc<String> {
    @Override
    public String exec(Tuple tuple) throws IOException {
        if (tuple == null || tuple.size() == 0)
            return null;
        try {
            Object object = tuple.get(0);
            int temperature = (Integer)object;
            if (temperature >= 30){
                return "Hot";
            }
            else if(temperature >=10){
                return "Moderate";
            }
            else {
                return "Cool";
            }
         } catch(Exception e) {
             throw new IOException(e);
         }
    }
}

在这里插入图片描述
导出EvalTemp.jar包,运行自定义运算函数包
在这里插入图片描述

grunt> copyFromLocal /home/hadoop/workspace/EvalTemp.jar /test
grunt> register hdfs://ddai-master:9000/test/EvalTemp.jar;
grunt> result1 = foreach valid_records generate year,temperature,EvalTemp(temperature);
grunt> dump result1;

在这里插入图片描述

自定义加载函数

打开Eclipse工具,新建“Map/Reduce Project”项目“WordCountLoad”,新建“WordCountLoad”类,添加jar包。(同上)

import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.pig.LoadFunc;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigSplit;
import org.apache.pig.data.BagFactory;
import org.apache.pig.data.DataBag;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
public class WordCountLoad extends LoadFunc {
    protected RecordReader reader;
    TupleFactory tupleFactory = TupleFactory.getInstance();
    BagFactory bagFactory = BagFactory.getInstance();
    @Override
    public InputFormat getInputFormat() throws IOException {
         return new TextInputFormat();
    }
    @Override
    public Tuple getNext() throws IOException {
        try {
             if (!reader.nextKeyValue())  return null;
             Text value = (Text)reader.getCurrentValue();
             String line = value.toString();
             String[] words =  line.split("\\s+");
             List<Tuple> tuples = new ArrayList<Tuple>();
             Tuple tuple = null;
             for (String word : words) {
                 tuple= tupleFactory.newTuple();
                 tuple.append(word);
                 tuples.add(tuple);
             }
             DataBag bag = bagFactory.newDefaultBag(tuples);
             Tuple result = tupleFactory.newTuple(bag);
             return result;
         } catch (InterruptedException e) {
             throw new ExecException(e);
         }
     }
     @Override
     public void prepareToRead(RecordReader reader,PigSplit arg1) throws IOException {
         this.reader = reader;
     }
     @Override
     public void setLocation(String location, Job job) throws IOException {
         FileInputFormat.setInputPaths(job,location);
     }
}

导出WordCountLoad.jar包,运行自定义加载函数包

在这里插入图片描述

grunt> cat /input/a1.txt
hello world
grunt> cat /input/a2.txt
good hadoop
注册自定义加载包
grunt> copyFromLocal /home/hadoop/workspace/WordCountLoad.jar /test
grunt> register hdfs://ddai-master:9000/test/WordCountLoad.jar;
运行加载包
grunt> records = load 'hdfs://ddai-master:9000/input' USING WordCountLoad() as (words:bag{word: (chararray)});
grunt> dump records;

在这里插入图片描述

计数

grunt> flatten_records = foreach records generate flatten($0);
grunt> grouped_records = group flatten_records by words::w;
待解决bug

在这里插入图片描述

result= foreach grouped_records generate group,COUNT(flatten_records);

final_result= order result by $1 desc,$0;

dump final_result;

编写自定义函数和客户端程序

打开Eclipse工具,新建“Map/Reduce Project”项目“WordPig”,新建“WordUpper”类,添加jar包(同上)
导出WordUpper.jar包,并上传到HDFS,并测试运行

import java.io.IOException;
import org.apache.pig.EvalFunc;
import org.apache.pig.data.Tuple;
public class WordUpper extends EvalFunc<String> {
    public String exec(Tuple input) throws IOException {
        if (input == null || input.size() == 0)
            return null;
         try{
            reporter.progress();
            String str = (String)input.get(0);
            return str.toUpperCase();
         }catch(Exception e){
          throw new IOException(e);
         }
    }
}

grunt> copyFromLocal /home/hadoop/workspace/WordUpper.jar /test
grunt> register hdfs://ddai-master:9000/test/WordUpper.jar;
grunt> records = load 'hdfs://ddai-master:9000/input' USING PigStorage('\n') as (line:chararray);
grunt> dump records;

在这里插入图片描述

grunt> result = foreach records generate WordUpper(line);
grunt> dump result;

在这里插入图片描述

编写客户端程序WordClient.java

hadoop@ddai-desktop:~$ vim WordClient.java
import org.apache.pig.PigServer;
public class WordClient {
    public static void main(String[] args) throws Exception{
        PigServer pigServer=new PigServer("mapreduce");
        pigServer.registerJar("hdfs://ddai-master:9000/test/WordUpper.jar");
        pigServer.registerQuery("A = load 'hdfs://ddai-master:9000/input'using PigStorage('\\n') as (line:chararray);");
        pigServer.registerQuery("B = foreach A generate WordUpper(line);");
        pigServer.store("B", "hdfs://ddai-master:9000/output/all");
    }
}
hadoop@ddai-desktop:~$ vim w.sh
#!/bin/bash
HADOOP_HOME=/opt/hadoop-2.8.5
PIG_HOME=/opt/pig-0.17.0
CLASSPATH=.:$PIG_HOME/conf:$(hadoop classpath)
for i in ${PIG_HOME}/lib/*.jar ; do
    CLASSPATH=$CLASSPATH:$i
done
CLASSPATH=$CLASSPATH:$PIG_HOME/pig-0.17.0-core-h2.jar
java -cp $CLASSPATH WordClient                  

运行

hadoop@ddai-desktop:~$ javac -cp /opt/pig-0.17.0/pig-0.17.0-core-h2.jar WordClient.java
hadoop@ddai-desktop:~$ sh w.sh

在这里插入图片描述

hadoop@ddai-desktop:~$ hdfs dfs -cat /output/all/part-m-00000

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

侬本多情。

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值