为了完成Hadoop中没有的数据类型,需要自己创建一个。类似与C语言中的“自定义结构体”。
设置需要建立的成员变量,设定相应的变量类型。
需要编写一个空的构造函数。一个带参数的构造函数,使用this给变量进行赋值,还需要有一个整体的set函数。然后单独对各个变量编写对应变量的get与set方法。接下来是编写read函数和write函数。到此为止,自定义数据类型部分完成。下一步需要编写对应的inputformate类,实现自定义数据的读取。
输入的数据是以文件形式存储的,所以就继承Fileinputformate来实现。首先继承Fileinputformate的基类,然后重写isSplitable方法。判断是否需要分片。然后重写createRecordReader方法。这里,我们希望自定义的Recordreader的key是text,value是之前定义的VideoPlayData,需要新建一个实例,取名为VideoPlayRecorReader。
固定格式:
定义成员变量 均为public
public LineReader in;//行读取器
public Text lineKey;//自定义key类型
public 之前定义的数据类型 lineValue;//自定义value类型
public Text line;//每行数据类型
关闭输入流 close()
获取当前的key,getCurrentKey()
获取当前的value,注意返回值应该是自定义的数据类型 getCurrentValue()
获取当前进度,getProgress()
开始读取数据,initialize(),主要在这个方法里面读取文件中的key和Value。
将Key和Value填写到Key,Value对中,nextValue()
完成。
package com.dajiangtai.hadoop.test;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.WritableComparable;
public class VideoPlayData implements WritableComparable<Object>{
/**
* @param args
*/
private int playnumber;
private int collectnumber;
private int commentnumber;
private int againstnumber;
private int supportnumber;
public VideoPlayData(){}
public VideoPlayData(int playnumber ,int collectnumber , int commentnumber ,int againstnumber){
this.playnumber = playnumber;
this.collectnumber = collectnumber;
this.commentnumber = commentnumber;
this.againstnumber = againstnumber;
}//�����Ĺ��캯��
public void set(int playnumber, int collectnumber , int commentnumber ,int againstnumber ,
int supportnumber){
this.playnumber = playnumber;
this.collectnumber = collectnumber;
this.againstnumber = againstnumber;
this.supportnumber = supportnumber;
}
public int getPlaynumber() {
return playnumber;
}
public void setPlaynumber(int playnumber){
this.playnumber = playnumber;
}
public int getCollectnumber(){
return playnumber;
}
public void setCollectnumber(int collectnumber){
this.collectnumber = collectnumber;
}
public int getCommentnumber(){
return commentnumber;
}
public void setCommentnumber(int commentnumber){
this.commentnumber = commentnumber;
}
public int getAgainstnumber(){
return againstnumber;
}
public void setAgainstnumber(int againstnumber){
this.againstnumber = againstnumber;
}
public int getSupportnumber(){
return supportnumber;
}
public void setSupportnumber(int supportnumber){
this.supportnumber = supportnumber;
}
@Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
playnumber = in.readInt();
collectnumber = in.readInt();
commentnumber = in.readInt();
againstnumber = in.readInt();
supportnumber = in.readInt();
}
@Override
public void write(DataOutput out)throws IOException{
out.writeInt(playnumber);
out.writeInt(collectnumber);
out.writeInt(commentnumber);
out.writeInt(againstnumber);
out.writeInt(supportnumber);
}
@Override
public int compareTo(Object o) {
// TODO Auto-generated method stub
return 0;
}
}//自定义数据格式
package com.dajiangtai.hadoop.test;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.util.LineReader;
//import com.dajiangtai.hadoop.test.VideoPlay.VideoPlayInputformate.VideoPlayRecorReader;
public class VideoPlayInputformate extends FileInputFormat<Text, VideoPlayData>{
//�����Ƿ��з�
@Override
protected boolean isSplitable(JobContext context, Path filename){
// TODO Auto-generated method stub
return false;
}
@Override
public RecordReader<Text, VideoPlayData> createRecordReader(InputSplit input, TaskAttemptContext context)throws IOException,InterruptedException{
// TODO Auto-generated method stub
return new VideoPlayRecorReader();
}
public class VideoPlayRecorReader extends RecordReader<Text, VideoPlayData>{
public LineReader in;
public Text line;
public Text lineKey;
public VideoPlayData lineValue;
public void close()throws IOException{
if(in!=null){
in.close();
}
}//�ر������
@Override
public Text getCurrentKey()throws IOException,InterruptedException{
return lineKey;
}
@Override
public VideoPlayData getCurrentValue()throws IOException,InterruptedException{
return lineValue;
}
@Override
public float getProgress()throws IOException,InterruptedException {
return 0;
}
@Override
public void initialize(InputSplit input,TaskAttemptContext context) throws IOException,InterruptedException{
FileSplit split = (FileSplit)input;
Configuration job= context.getConfiguration();
Path file = split.getPath();
FileSystem fs = file.getFileSystem(job);
FSDataInputStream filein = fs.open(file);
in = new LineReader(filein, job);
line = new Text();
lineKey = new Text();
lineValue = new VideoPlayData();
}
public boolean nextKeyValue()throws IOException,InterruptedException{
int linesize = in.readLine(line);
if(linesize == 0){
return false;
}
String[] pieces = line.toString().split("\t");
if (pieces.length !=7){
throw new IOException("Invalid record received");
}
lineKey.set(pieces[0]+"\t"+pieces[1]);
lineValue.set(Integer.parseInt(pieces[2]), Integer.parseInt(pieces[3]), Integer.parseInt(pieces[4]), Integer.parseInt(pieces[5]), Integer.parseInt(pieces[6]));
return true;
}
}
}