hadoop中构建自定义的数据类型

       为了完成Hadoop中没有的数据类型,需要自己创建一个。类似与C语言中的“自定义结构体”。

       设置需要建立的成员变量,设定相应的变量类型。

        需要编写一个空的构造函数。一个带参数的构造函数,使用this给变量进行赋值,还需要有一个整体的set函数。然后单独对各个变量编写对应变量的get与set方法。接下来是编写read函数和write函数。到此为止,自定义数据类型部分完成。下一步需要编写对应的inputformate类,实现自定义数据的读取。

        输入的数据是以文件形式存储的,所以就继承Fileinputformate来实现。首先继承Fileinputformate的基类,然后重写isSplitable方法。判断是否需要分片。然后重写createRecordReader方法。这里,我们希望自定义的Recordreader的key是text,value是之前定义的VideoPlayData,需要新建一个实例,取名为VideoPlayRecorReader。

固定格式:

定义成员变量 均为public  

        public LineReader in;//行读取器
        public Text lineKey;//自定义key类型
        public 之前定义的数据类型 lineValue;//自定义value类型
        public Text line;//每行数据类型

关闭输入流  close()

获取当前的key,getCurrentKey()

获取当前的value,注意返回值应该是自定义的数据类型 getCurrentValue()

获取当前进度,getProgress()

开始读取数据,initialize(),主要在这个方法里面读取文件中的key和Value。

将Key和Value填写到Key,Value对中,nextValue()

完成。

package com.dajiangtai.hadoop.test;


import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;


import org.apache.hadoop.io.WritableComparable;


public class VideoPlayData implements WritableComparable<Object>{
/**
* @param args
*/
private int playnumber;
private int collectnumber;
private int commentnumber;
private int againstnumber;
private int supportnumber;

public VideoPlayData(){}

public VideoPlayData(int playnumber ,int collectnumber , int commentnumber ,int againstnumber){
this.playnumber = playnumber;
this.collectnumber = collectnumber;
this.commentnumber = commentnumber;
this.againstnumber = againstnumber;
}//�����Ĺ��캯��

public void set(int playnumber, int collectnumber , int commentnumber ,int againstnumber ,
int supportnumber){
this.playnumber = playnumber;
this.collectnumber = collectnumber;
this.againstnumber = againstnumber;
this.supportnumber = supportnumber;

}

public int  getPlaynumber() {
return playnumber;

}

public void setPlaynumber(int playnumber){
this.playnumber = playnumber;

}

public int  getCollectnumber(){
return playnumber;
}

public void setCollectnumber(int collectnumber){
this.collectnumber = collectnumber;
}

public int getCommentnumber(){
return commentnumber;
}

public void setCommentnumber(int commentnumber){
this.commentnumber = commentnumber;
}

public int getAgainstnumber(){
return againstnumber;
}

public void setAgainstnumber(int againstnumber){
this.againstnumber = againstnumber;
}

public int getSupportnumber(){
return supportnumber;

}
public void setSupportnumber(int supportnumber){
this.supportnumber = supportnumber;

}

@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
playnumber = in.readInt();
collectnumber = in.readInt();
commentnumber = in.readInt();
againstnumber = in.readInt();
supportnumber = in.readInt();

}
@Override
public void write(DataOutput out)throws IOException{
out.writeInt(playnumber);
out.writeInt(collectnumber);
out.writeInt(commentnumber);
out.writeInt(againstnumber);
out.writeInt(supportnumber);

}
@Override
public int compareTo(Object o) {
// TODO Auto-generated method stub
return 0;
}
}//自定义数据格式



package com.dajiangtai.hadoop.test;


import java.io.IOException;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.LineReader;


//import com.dajiangtai.hadoop.test.VideoPlay.VideoPlayInputformate.VideoPlayRecorReader;


public class VideoPlayInputformate extends FileInputFormat<Text, VideoPlayData>{
//�����Ƿ��з�
@Override
protected boolean isSplitable(JobContext context, Path filename){
// TODO Auto-generated method stub
return false;

}
@Override
public RecordReader<Text, VideoPlayData> createRecordReader(InputSplit input, TaskAttemptContext context)throws IOException,InterruptedException{
// TODO Auto-generated method stub
return new VideoPlayRecorReader();


}

public  class VideoPlayRecorReader extends RecordReader<Text, VideoPlayData>{
public LineReader in;
public Text line;
public Text lineKey;
public VideoPlayData lineValue;

public void close()throws IOException{
if(in!=null){
in.close();
}
}//�ر������
@Override
public Text getCurrentKey()throws IOException,InterruptedException{
return lineKey;

}
@Override
public VideoPlayData getCurrentValue()throws IOException,InterruptedException{
return lineValue;

}
@Override
public float getProgress()throws IOException,InterruptedException {
return 0;
}



@Override
public  void initialize(InputSplit input,TaskAttemptContext context) throws IOException,InterruptedException{
FileSplit split = (FileSplit)input;
Configuration job= context.getConfiguration();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);

FSDataInputStream filein = fs.open(file);
in = new LineReader(filein, job);
line = new Text();
lineKey = new Text();
lineValue = new VideoPlayData();


}

public boolean nextKeyValue()throws IOException,InterruptedException{
int linesize = in.readLine(line);
if(linesize == 0){
return false;
}
String[] pieces = line.toString().split("\t");

if (pieces.length !=7){
throw new IOException("Invalid record received");

}
lineKey.set(pieces[0]+"\t"+pieces[1]);
lineValue.set(Integer.parseInt(pieces[2]), Integer.parseInt(pieces[3]), Integer.parseInt(pieces[4]), Integer.parseInt(pieces[5]), Integer.parseInt(pieces[6]));
return true;

}
}
}






评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值