MapReduce入门详解(三)

join操作

左外连接(map)-JoinMapSideMR

问题描述:
将两个文件中每行的内容拼接到一个文件中
思路分析:
准备好两个map,firstMapper和joinMapper,firstMapper负责获取文件内容,joinMapper负责拼接文件内容。利用Job开启两个firstMapper任务,获取到两个文件的内容,然后再开启一个joinMapper任务负责拼接获取到的两个文件。
注:不常用map端的连接操作,推荐reduce端的连接操作

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

 

public class JoinMapSideMR extends Configured implements Tool {

public static class FirstStepMapper extends Mapper<LongWritable, Text, Text, NullWritable>{

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

if(!value.toString().equals("")) {

context.write(value, NullWritable.get());

}

}

}

//读取连接好的数据的mapper

public static class JoinMapper extends Mapper<Text, TupleWritable, Text, Text>{

@Override

protected void map(Text key, TupleWritable value, Context context) throws IOException, InterruptedException {

String v = StreamSupport.stream(value.spliterator(), false).map(s -> ((Text) s).toString())

.collect(Collectors.joining("|"));

context.write(key,new Text(v));

}

}

@Override

public int run(String[] strings) throws Exception {

Configuration conf = getConf();

Path inpath1 = new Path(conf.get("inpath1"));

Path inpath2 = new Path(conf.get("inpath2"));

Path mr1 = new Path(conf.get("mr1"));

Path mr2 = new Path(conf.get("mr2"));

Path outpath = new Path(conf.get("outpath"));

//------------------------

Job job1 = Job.getInstance(conf,"first_step1_xj");

job1.setJarByClass(this.getClass());

job1.setMapperClass(FirstStepMapper.class);

job1.setMapOutputKeyClass(Text.class);

job1.setMapOutputValueClass(NullWritable.class);

job1.setReducerClass(Reducer.class);

job1.setOutputKeyClass(Text.class);

job1.setOutputValueClass(NullWritable.class);

TextInputFormat.addInputPath(job1,inpath1);

TextOutputFormat.setOutputPath(job1,mr1);

FileOutputFormat.setOutputCompressorClass(job1,new GzipCodec().getClass());

//------------------------

Job job2 = Job.getInstance(conf,"first_step2_xj");

job2.setJarByClass(this.getClass());

job2.setMapperClass(FirstStepMapper.class);

job2.setMapOutputKeyClass(Text.class);

job2.setMapOutputValueClass(NullWritable.class);

job2.setReducerClass(Reducer.class);

job2.setOutputKeyClass(Text.class);

job2.setOutputValueClass(NullWritable.class);

TextInputFormat.addInputPath(job2,inpath2);

TextOutputFormat.setOutputPath(job2,mr2);

FileOutputFormat.setOutputCompressorClass(job2,new GzipCodec().getClass());

//------------------------

Job job3 = Job.getInstance(conf,"map_join_xj");

job3.setJarByClass(this.getClass());

job3.setMapperClass(JoinMapper.class);

job3.setMapOutputKeyClass(Text.class);

job3.setMapOutputValueClass(Text.class);

job3.setNumReduceTasks(0);

job3.getConfiguration().set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ",");

String expr = CompositeInputFormat.compose("inner", KeyValueTextInputFormat.class, mr1, mr2);

job3.getConfiguration().set("mapreduce.join.expr",expr);

job3.setInputFormatClass(CompositeInputFormat.class);

TextOutputFormat.setOutputPath(job3,outpath);

List<Job> list = new ArrayList();

list.add(job1);

list.add(job2);

list.add(job3);

for (Job job : list) {

boolean succ = job.waitForCompletion(true);

if(!succ){

System.out.println(job.getJobName()+":"+ job.getJobState().getValue());

break;

}

}

return 0;

}

public static void main(String[] args)throws Exception {

ToolRunner.run(new JoinMapSideMR(),args);

}

}

左外连接(reduce)-JoinReduceSideMR

问题描述:
将两个文件中每行的内容拼接到一个文件中
思路分析:
准备好两个map,fistMapper和SecondMapper,两个map的key的输出类型都为复合类型,包含id和tag,另外准备两个类自定义分组和分区规则,只根据id来分组和分区。因此,这两个map的输出结果就会进入到同一个reduce中,最后在reduce中完成拼接操作。

复合类型-ArtistIDTag

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

 

public class ArtistIDTag implements WritableComparable<ArtistIDTag> {

private Text ArtistID = new Text(); // id

private IntWritable Tag = new IntWritable(); // 标记

public ArtistIDTag() {

}

public ArtistIDTag(Text artistID, IntWritable tag) {

this.ArtistID = new Text(artistID.toString());

this.Tag = new IntWritable(tag.get());

}

public Text getArtistID() {

return ArtistID;

}

public void setArtistID(Text artistID) {

this.ArtistID = new Text(artistID.toString());

}

public IntWritable getTag() {

return Tag;

}

public void setTag(IntWritable tag) {

this.Tag = new IntWritable(tag.get());

}

@Override

public int compareTo(ArtistIDTag o) {

return this.ArtistID.compareTo(o.ArtistID)==0 ? this.Tag.compareTo(o.Tag) : this.ArtistID.compareTo(o.ArtistID);

}

@Override

public void write(DataOutput dataOutput) throws IOException {

ArtistID.write(dataOutput);

Tag.write(dataOutput);

}

@Override

public void readFields(DataInput dataInput) throws IOException {

ArtistID.readFields(dataInput);

Tag.readFields(dataInput);

}

}

 

自定义分区规则-ArtistPartitioner

 

1

2

3

4

5

6

 

public class ArtistPartitioner extends Partitioner<ArtistIDTag, Text> {

@Override

public int getPartition(ArtistIDTag artistIDTag, Text text, int i) {

return Math.abs(artistIDTag.getArtistID().hashCode()*127)%i;

}

}

 

自定义分组规则-ArtistGroupComparator

 

1

2

3

4

5

6

7

8

9

10

11

12

 

public class ArtistGroupComparator extends WritableComparator{

public ArtistGroupComparator() {

super(ArtistIDTag.class,true);

}

@Override

public int compare(WritableComparable a, WritableComparable b) {

ArtistIDTag at1 = (ArtistIDTag) a;

ArtistIDTag at2 = (ArtistIDTag) b;

return at1.getArtistID().compareTo(at2.getArtistID());

}

}

 

连接-JoinReduceSideMR

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

 

public class JoinReduceSideMR extends Configured implements Tool {

public static void main(String[] args) throws Exception {

ToolRunner.run(new JoinReduceSideMR(),args);

}

public static class FirstMapper extends Mapper<LongWritable,Text,ArtistIDTag,Text>{

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

Stream.of(value.toString()).filter(s -> s.length()>0).forEach(ExceptionConsumer.of(s -> {

String id = s.substring(0,s.indexOf(","));

String info = s.substring(s.indexOf(",")+1,s.length());

context.write(new ArtistIDTag(new Text(id),new IntWritable(0)),new Text(info));

}));

}

}

public static class SecondMapper extends Mapper<LongWritable,Text,ArtistIDTag,Text>{

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

Stream.of(value.toString()).filter(s -> s.length()>0).forEach(ExceptionConsumer.of(s -> {

String id = s.substring(0,s.indexOf(","));

String info = s.substring(s.indexOf(",")+1,s.length());

context.write(new ArtistIDTag(new Text(id),new IntWritable(1)),new Text(info));

}));

}

}

public static class JoinReducer extends Reducer<ArtistIDTag,Text,Text,Text>{

@Override

protected void reduce(ArtistIDTag key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

Iterator<Text> ite = values.iterator();

String name = ite.next().toString();

while (ite.hasNext()){

Text count = ite.next();

String info = name.toString() + "|" + count.toString();

context.write(key.getArtistID(),new Text(info));

}

}

}

@Override

public int run(String[] strings) throws Exception {

Configuration conf = getConf();

Job job = Job.getInstance(conf, "join_reduce_xj");

job.setJarByClass(this.getClass());

// 多任务输入

MultipleInputs.addInputPath(job,new Path(conf.get("inpath1")),TextInputFormat.class,FirstMapper.class);

MultipleInputs.addInputPath(job,new Path(conf.get("inpath2")),TextInputFormat.class,SecondMapper.class);

job.setMapOutputKeyClass(ArtistIDTag.class);

job.setMapOutputValueClass(Text.class);

job.setReducerClass(JoinReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(Text.class);

job.setOutputFormatClass(TextOutputFormat.class);

TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath")));

// 设置分区规则

job.setPartitionerClass(ArtistPartitioner.class);

// 设置分组规则

job.setGroupingComparatorClass(ArtistGroupComparator.class);

return job.waitForCompletion(true)? 0 : 1;

}

}

 

DB交互操作

读取-DBtoHdfsMR

问题描述:
从mysql数据库中读取数据,并输出到hdfs
思路分析:

  1. 准备好一个实现了DBWritable接口的复合类型,在该类型中定义的属性分别对应数据库中的列名。
  2. 将该复合类型作为map阶段输入的value的类型即可。
  3. 让集群加载jdbc驱动类。
  4. 设置配置信息,连接到数据库。
  5. 将输入类型设置为DBInputFormat。
    复合类型-YearStationTempDB
    注:输入操作要实现WritableComparable接口,这里是读操作可以删除。
 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

 

public class YearStationTempDB implements DBWritable,WritableComparable<YearStationTempDB> {

private int year; // 年份

private String station; // 气象站编号

private int temperature; // 气温

public YearStationTempDB() {

}

public YearStationTempDB(int year, String station, int temperature) {

this.year = year;

this.station = station;

this.temperature = temperature;

}

@Override

public void write(PreparedStatement prep) throws SQLException {

prep.setInt(1,year);

prep.setString(2,station);

prep.setInt(3,temperature);

}

@Override

public void readFields(ResultSet rs) throws SQLException {

this.year = rs.getInt("year");

this.station = rs.getString("station");

this.temperature = rs.getInt("temperature");

}

public int getYear() {

return year;

}

public void setYear(int year) {

this.year = year;

}

public String getStation() {

return station;

}

public void setStation(String station) {

this.station = station;

}

public int getTemperature() {

return temperature;

}

public void setTemperature(int temperature) {

this.temperature = temperature;

}

@Override

public String toString() {

return year + "," + station +"," + temperature;

}

@Override

public void write(DataOutput dataOutput) throws IOException {

dataOutput.writeInt(year);

dataOutput.writeUTF(station);

dataOutput.writeInt(temperature);

}

@Override

public void readFields(DataInput dataInput) throws IOException {

year = dataInput.readInt();

station = dataInput.readUTF();

temperature = dataInput.readInt();

}

@Override

public int compareTo(YearStationTempDB o) {

return this.year - o.year == 0 ? this.station.compareTo(o.station) : this.year - o.year;

}

}

读取-DBtoHdfsMR

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

 

public class DBtoHdfsMR extends Configured implements Tool {

public static void main(String[] args) throws Exception {

ToolRunner.run(new DBtoHdfsMR(),args);

}

public static class DBMapper extends Mapper<LongWritable,YearStationTempDB,LongWritable,Text>{

@Override

protected void map(LongWritable key, YearStationTempDB value, Context context) throws IOException, InterruptedException {

context.write(key,new Text(value.toString()));

}

}

@Override

public int run(String[] strings) throws Exception {

Configuration conf = getConf();

Job job = Job.getInstance(conf,"bdtohdfs_xj");

job.setJarByClass(this.getClass());

job.setMapperClass(DBMapper.class);

job.setMapOutputKeyClass(LongWritable.class);

job.setMapOutputValueClass(Text.class);

// 如何让集群加载jdbc驱动类

// 1 将jar放入share/hadoop/yarn/下会自动上传jar到集群

// 2 把jar放入集群中lib目录下,重启集群

// 3 job.addFileToClassPath(),需要把jar包上传到hdfs

//job.addFileToClassPath(new Path("hdfs://172.16.0.4:9000/data/mysql-connector-java-5.1.38.jar"));

// 连接数据库

DBConfiguration.configureDB(job.getConfiguration(),"com.mysql.jdbc.Driver",

"jdbc:mysql://172.16.0.100:3306/hadoop","hadoop","hadoop");

job.setInputFormatClass(DBInputFormat.class); // 设置为DB输入类型

job.setOutputFormatClass(TextOutputFormat.class);

// year = 2000是条件,表示输入year=2000的数据

DBInputFormat.setInput(job,YearStationTempDB.class,"station_tbl","year = 2000","","year","station","temperature");

TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath")));

job.setNumReduceTasks(1);

return job.waitForCompletion(true)? 0 : 1;

}

}

 

输入-HdfstoDBMR

问题描述:
从hdfs读取数据,并输出到mysql数据库
思路分析:

  1. 准备好一个实现了WritableComparable接口的复合类型,在该类型中定义的属性分别对应数据库中的列名,并重写compareTo()方法。
  2. 将该复合类型作为reduce阶段输出的key的类型即可。
  3. 让集群加载jdbc驱动类。
  4. 设置配置信息,连接到数据库。
  5. 将输出类型设置为DBOutputFormat。
 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

 

public class HdfstoDBMR extends Configured implements Tool {

public static void main(String[] args) throws Exception {

ToolRunner.run(new HdfstoDBMR(),args);

}

public static class HTDMapper extends Mapper<LongWritable,Text,YearStation,IntWritable> {

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

NcdcRecordParser parser = new NcdcRecordParser();

parser.parse(()->value.toString()).ifPresent(ExceptionConsumer.of(

p->{

int year = p.getYear();

String stationId = p.getStationId();

int temp = p.getAirTemperature();

YearStation ys = new YearStation(year+"",stationId);

context.write(ys,new IntWritable(temp));

}

));

}

}

public static class HTDReducer extends Reducer<YearStation,IntWritable,YearStationTempDB,NullWritable>{

@Override

protected void reduce(YearStation key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

Stream<IntWritable> stream = StreamSupport.stream(values.spliterator(), false);

Integer max = stream.map(s -> s.get()).reduce(0, (x, y) -> Math.max(x, y));

int y = Integer.parseInt(key.getYear().toString());

YearStationTempDB yst = new YearStationTempDB(y,key.getStationid().toString(),max);

context.write(yst,NullWritable.get());

}

}

@Override

public int run(String[] strings) throws Exception {

Configuration conf = getConf();

Job job = Job.getInstance(conf,"hdfstodb_xj");

job.setJarByClass(this.getClass());

job.setMapperClass(HTDMapper.class);

job.setMapOutputKeyClass(YearStation.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(HTDReducer.class);

job.setOutputKeyClass(YearStationTempDB.class);

job.setOutputValueClass(NullWritable.class);

// 如何让集群加载jdbc驱动类

// 1 将jar放入share/hadoop/yarn/下会自动上传jar到集群

// 2 把jar放入集群中lib目录下,重启集群

// 3 job.addFileToClassPath(),需要把jar包上传到hdfs

//job.addFileToClassPath(new Path("hdfs://172.16.0.4:9000/data/mysql-connector-java-5.1.38.jar"));

// 连接数据库

DBConfiguration.configureDB(job.getConfiguration(),"com.mysql.jdbc.Driver",

"jdbc:mysql://172.16.0.100:3306/hadoop","hadoop","hadoop");

job.setInputFormatClass(TextInputFormat.class);

job.setOutputFormatClass(DBOutputFormat.class);

TextInputFormat.addInputPath(job,new Path(conf.get("inpath")));

DBOutputFormat.setOutput(job,"max_tmp_xj","year","station","temperature");

return job.waitForCompletion(true)? 0 : 1;

}

}

输入类型-InputFormat

常见输入类型

  1. TextInputFormat:按行获取字符串数据

     

    1

    2

     

    job.setInputFormatClass(TextInputFormat.class);

    TextInputFormat.addInputPath(job,new Path(conf.get("inpath")));

  2. CombineTextInputFormat:将多个输入文件压缩成一个文件,避免开启多个map

     

    1

    2

     

    job.setInputFormatClass(CombineTextInputFormat.class);

    CombineFileInputFormat.addInputPath(job,new Path(conf.get("inpath")));

  3. KeyValueTextInputFormat:按key-value形式获取数据

     

    1

    2

    3

     

    conf.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ","); // 设置key-value的分割符,只识别第一个分隔符

    job1.setInputFormatClass(KeyValueTextInputFormat.class);

    KeyValueTextInputFormat.addInputPath(job1, new Path(conf.get("input")));

  4. DBInputFormat:从数据库中获取数据

     

    1

    2

    3

     

    job.setInputFormatClass(DBInputFormat.class); // 设置为DB输入类型

    // year = 2000是条件,表示输入year=2000的数据

    DBInputFormat.setInput(job,YearStationTempDB.class,"station_tbl","year = 2000","","year","station","temperature");

自定义输入类型

思路分析:

  1. 创建一个解析类继承RecordReader
  2. 在解析类中完成获取数据的逻辑
  3. 创建一个自定义输入类型的类继承FileInputFormat
  4. 在自定义输入类型的类中重写方法createRecordReader()
  5. 在该方法中创建解析类的对象并调用initialize()方法进行初始化,最后返回该对象。
  6. 完成,可在其它类中调用该自定义输入类型

解析类-YearStationRecordReader

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

 

public class YearStationRecordReader extends RecordReader<YearStation, IntWritable> {

private LineRecordReader reader = new LineRecordReader();

private NcdcRecordParser parser = new NcdcRecordParser();

private YearStation ys = new YearStation();

private IntWritable tmp = new IntWritable();

@Override

public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException {

reader.initialize(inputSplit,taskAttemptContext);

}

@Override

public boolean nextKeyValue() throws IOException {

do {

// 判断是否有下一个值

if (!reader.nextKeyValue()) {

return false;

}

// 获取并解析当前值

Text line = reader.getCurrentValue();

parser.parse(line.toString());

}while (!parser.isValidTemperature()); // 如果气温返回值为false则继续循环下一个

int year = parser.getYear();

int tmp = parser.getAirTemperature();

String station = parser.getStationId();

ys.setYear(new Text(year+""));

ys.setStationid(new Text(station));

this.tmp.set(tmp);

return true;

}

@Override

public YearStation getCurrentKey() throws IOException, InterruptedException {

return this.ys;

}

@Override

public IntWritable getCurrentValue() throws IOException, InterruptedException {

return this.tmp;

}

@Override

public float getProgress() throws IOException, InterruptedException {

return reader.getProgress();

}

@Override

public void close() throws IOException {

reader.close();

}

}

 

自定义输入类型类-YearStationInputFormat

 

1

2

3

4

5

6

7

8

9

10

 

// 利用FileInputFormat数据分片功能,实现自定义输入类型

public class YearStationInputFormat extends FileInputFormat<YearStation, IntWritable> {

@Override

public RecordReader createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)

throws IOException {

YearStationRecordReader reader = new YearStationRecordReader();

reader.initialize(inputSplit,taskAttemptContext);

return reader;

}

}

 

复合类型-YearStation

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

 

public class YearStation implements WritableComparable<YearStation> {

private Text year = new Text(); // 年份

private Text stationid = new Text(); //气象站id

public YearStation() {

}

public YearStation(Text year, Text stationid) {

this.year = new Text(year.toString());

this.stationid = new Text(stationid.toString());

}

public YearStation(String year, String stationid) {

this.year = new Text(year);

this.stationid = new Text(stationid);

}

@Override

public int compareTo(YearStation o) {

return this.year.compareTo(o.year)==0 ? this.stationid.compareTo(o.stationid) : this.year.compareTo(o.year);

}

@Override

public void write(DataOutput dataOutput) throws IOException {

year.write(dataOutput);

stationid.write(dataOutput);

}

@Override

public void readFields(DataInput dataInput) throws IOException {

year.readFields(dataInput);

stationid.readFields(dataInput);

}

public Text getYear() {

return year;

}

public void setYear(Text year) {

this.year = new Text(year.toString());

}

public Text getStationid() {

return stationid;

}

public void setStationid(Text stationid) {

this.stationid = new Text(stationid.toString());

}

@Override

public String toString() {

return year.toString()+"\t"+stationid.toString();

}

}

 

测试自定义类型-MaxTmpByYearStationMR

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

 

public class MaxTmpByYearStationMR extends Configured implements Tool {

public static class MTBYSMapper extends Mapper<YearStation, IntWritable, YearStation,IntWritable> {

@Override

protected void map(YearStation key, IntWritable value, Context context) throws IOException, InterruptedException {

context.write(key,value);

}

}

public static class MTBYSReducer extends Reducer<YearStation, IntWritable, Text, IntWritable> {

@Override

protected void reduce(YearStation key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

Optional<Integer> max = StreamSupport.stream(values.spliterator(), false)

.map(e -> e.get()).reduce((x, y) -> Math.max(x, y));

context.write(new Text(key.toString()), new IntWritable(max.get()));

}

}

@Override

public int run(String[] strings) throws Exception {

Configuration conf = getConf();

Job job = Job.getInstance(conf, "MaxTmpByYS_xj");

job.setJarByClass(this.getClass());

job.setMapperClass(MTBYSMapper.class);

job.setMapOutputKeyClass(YearStation.class);

job.setMapOutputValueClass(IntWritable.class);

job.setReducerClass(MTBYSReducer.class);

job.setOutputKeyClass(Text.class);

job.setOutputValueClass(IntWritable.class);

job.setInputFormatClass(YearStationInputFormat.class);

job.setOutputFormatClass(TextOutputFormat.class);

YearStationInputFormat.addInputPath(job,new Path(conf.get("inpath")));

TextOutputFormat.setOutputPath(job,new Path(conf.get("outpath")));

return job.waitForCompletion(true)? 0 : 1;

}

public static void main(String[] args) throws Exception{

ToolRunner.run(new MaxTmpByYearStationMR(),args);

}

}

 

JobControl

简述:
如果MapReduce中需要用到多个job,而且多个job之间需要设置一些依赖关系,比如Job3需要依赖于Job2,Job2依赖于Job1,这就要用到JobControl。

代码实例:

 

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

 

Job getSDJob = Job.getInstance(conf, "get_sd_job_xj");

getSDJob.setJarByClass(GetSimilarityDegree.class);

// 3 为任务装配mapper类

getSDJob.setMapperClass(GetSimilarityDegree.GSDMapper.class);

getSDJob.setMapOutputKeyClass(Text.class);

getSDJob.setMapOutputValueClass(DoubleWritable.class);

// 5 配置数据输入路径

TextInputFormat.addInputPath(getSDJob, new Path("src/train_bin"));

// 6 配置结果输出路径

TextOutputFormat.setOutputPath(getSDJob, new Path("src/name_sd"));

Job sortBySDJob = Job.getInstance(conf, "sortBySDJob");

sortBySDJob.setJarByClass(SortedByDegree.class);

// 3 为任务装配mapper类

sortBySDJob.setMapperClass(SortedByDegree.SBDMapper.class);

sortBySDJob.setMapOutputKeyClass(TagDegree.class);

sortBySDJob.setMapOutputValueClass(NullWritable.class);

// 4 为任务装配reducer类

sortBySDJob.setReducerClass(SortedByDegree.SBDReducer.class);

sortBySDJob.setOutputKeyClass(Text.class);

sortBySDJob.setOutputValueClass(DoubleWritable.class);

// 5 配置数据输入路径

TextInputFormat.addInputPath(sortBySDJob, new Path("src/name_sd"));

// 6 配置结果输出路径

TextOutputFormat.setOutputPath(sortBySDJob, new Path("src/name_sd_sorted"));

Job getFKJob = Job.getInstance(conf, "getFKJob");

getFKJob.setJarByClass(GetFirstK.class);

// 3 为任务装配mapper类

getFKJob.setMapperClass(GetFirstK.GFKMapper.class);

getFKJob.setMapOutputKeyClass(TagDegree.class);

getFKJob.setMapOutputValueClass(IntWritable.class);

// 4 为任务装配reducer类

getFKJob.setReducerClass(GetFirstK.GFKReducer.class);

getFKJob.setOutputKeyClass(Text.class);

getFKJob.setOutputValueClass(Text.class);

// 5 配置数据输入路径

TextInputFormat.addInputPath(getFKJob, new Path("src/name_sd_sorted"));

// 6 配置结果输出路径

TextOutputFormat.setOutputPath(getFKJob, new Path("src/gfk_res"));

getFKJob.setGroupingComparatorClass(GFKGroupComparator.class);

Job getLRJob = Job.getInstance(conf, "getLRJob");

getLRJob.setJarByClass(GetLastResult.class);

// 3 为任务装配mapper类

getLRJob.setMapperClass(GetLastResult.GLRMapper.class);

getLRJob.setMapOutputKeyClass(Text.class);

getLRJob.setMapOutputValueClass(TagAvgNum.class);

// 4 为任务装配reducer类

getLRJob.setReducerClass(GetLastResult.GLRReducer.class);

getLRJob.setOutputKeyClass(Text.class);

getLRJob.setOutputValueClass(NullWritable.class);

// 5 配置数据输入路径

TextInputFormat.addInputPath(getLRJob, new Path("src/gfk_res"));

// 6 配置结果输出路径

TextOutputFormat.setOutputPath(getLRJob, new Path("src/last_res"));

ControlledJob getSD = new ControlledJob(getSDJob.getConfiguration());

ControlledJob sortBySD = new ControlledJob(sortBySDJob.getConfiguration());

ControlledJob getFK = new ControlledJob(getFKJob.getConfiguration());

ControlledJob getLR = new ControlledJob(getLRJob.getConfiguration());

// 添加依赖

getLR.addDependingJob(getFK);

getFK.addDependingJob(sortBySD);

sortBySD.addDependingJob(getSD);

JobControl con = new JobControl("test");

con.addJob(getSD);

con.addJob(sortBySD);

con.addJob(getFK);

con.addJob(getLR);

Thread t = new Thread(con);

t.start();

while (true) {

if (con.allFinished()) {

System.out.println("图片识别完毕,请查看结果");

System.exit(0);

}

}

 

最后更新: 2018年10月08日 18:25

原始链接: https://www.lousenjay.top/2018/09/03/MapReduce入门详解(三)/

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值