Hadoop在Java的映射

最新推荐文章于 2024-09-08 12:41:07 发布

XXXtrap

最新推荐文章于 2024-09-08 12:41:07 发布

阅读量86

点赞数

文章标签： hadoop java 大数据

本文链接：https://blog.csdn.net/qq_45776897/article/details/131519327

版权

1.pom配置

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  <modelVersion>4.0.0</modelVersion>

  <groupId>xtr</groupId>
  <artifactId>hadoopstu</artifactId>
  <version>1.0-SNAPSHOT</version>
  <packaging>jar</packaging>

  <name>hadoopstu</name>
  <url>http://maven.apache.org</url>

  <properties>
    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
    <hadoop.version>3.1.3</hadoop.version>
  </properties>

  <dependencies>
    <dependency>
      <groupId>junit</groupId>
      <artifactId>junit</artifactId>
      <version>4.11</version>
      <scope>test</scope>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-common</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-hdfs</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-core</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-mapreduce-client-common</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
    <dependency>
      <groupId>org.apache.hadoop</groupId>
      <artifactId>hadoop-client</artifactId>
      <version>${hadoop.version}</version>
    </dependency>
  </dependencies>
</project>

2.Test测试类

package xtr;

import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.checkerframework.checker.units.qual.A;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.IOException;

/**
 * Unit test for simple App.
 */
public class AppTest {
    FileSystem hdfs = null;
    @Before
    public void init() throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://192.168.153.141:9000");
        System.setProperty("HADOOP_USER_NAME","root");
        hdfs = FileSystem.get(conf);
    }
    @Test
    public void writeToHdfs() throws IOException {
        Path source = new Path("hello.txt");
        Path distinct = new Path("/info.txt");
        hdfs.copyFromLocalFile(source,distinct);
        System.out.println("上传成功");
    }
    @After
    public void close() throws IOException {
        hdfs.close();
    }

}

3.StudentScore类编写

package xtr.demo2a;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

public class StudentScore implements WritableComparable<StudentScore> {
    private int stuscore;
    private String clsname;

    public StudentScore() {
    }

    public StudentScore(int stuscore, String clsname) {
        this.stuscore = stuscore;
        this.clsname = clsname;
    }

    public int getStuscore() {
        return stuscore;
    }

    public void setStuscore(int stuscore) {
        this.stuscore = stuscore;
    }

    public String getClsname() {
        return clsname;
    }

    public void setClsname(String clsname) {
        this.clsname = clsname;
    }

    @Override
    public String toString() {
        return "StudentScore{" +
                "stuscore=" + stuscore +
                ", clsname='" + clsname + '\'' +
                '}';
    }

    public int compareTo(StudentScore o) {
        return this.stuscore>o.stuscore?1:0;
    }

    public void write(DataOutput dataOutput) throws IOException {
        dataOutput.writeInt(stuscore);
        dataOutput.writeUTF(clsname);
    }

    public void readFields(DataInput dataInput) throws IOException {
        this.stuscore=dataInput.readInt();
        this.clsname=dataInput.readUTF();
    }
}

4.map类

package xtr.demo2a;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class StudentMapper extends Mapper<LongWritable, Text,Text,StudentScore> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, StudentScore>.Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split(",");
        Text text = new Text(split[3]);
        StudentScore studentScore = new StudentScore(Integer.parseInt(split[2]), split[3]);
        context.write(text,studentScore);
    }
}

5.reduce类

package xtr.demo2a;

import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;
import java.math.BigDecimal;
import java.text.DecimalFormat;

public class StudentReduce extends Reducer<Text,StudentScore, Text, DoubleWritable> {
    @Override
    protected void reduce(Text key, Iterable<StudentScore> values, Reducer<Text, StudentScore, Text, DoubleWritable>.Context context) throws IOException, InterruptedException {
        int sum = 0;
        int count=0;
        String classname="";
        for (StudentScore i:
             values) {
            sum+=i.getStuscore();
            count++;
            classname=i.getClsname();
        }
        Text text = new Text(classname);
        DecimalFormat df = new DecimalFormat("#.00");
        String format = df.format((double) sum / count);
        DoubleWritable doubleWritable = new DoubleWritable(Double.parseDouble(format));
        context.write(text,doubleWritable);
        System.out.println(text+"\t"+doubleWritable);
    }
}

6.Driver类

package xtr.demo2a;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;


public class StudentDriver {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration configuration = new Configuration();
        Job job = Job.getInstance(configuration);
        job.setJarByClass(StudentDriver.class);

        job.setMapperClass(StudentMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(StudentScore.class);

        job.setReducerClass(StudentReduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

//        Path inpath = new Path("hdfs://kb141:9000/kb22/stuscore.csv");
        Path inpath = new Path("D:\\Java\\IDE\\workspace\\hadoopstu\\in\\stuscore.csv");

        FileInputFormat.setInputPaths(job,inpath);
//        Path outpath = new Path("hdfs://kb141:9000/kb22out");
        Path outpath = new Path("D:\\Java\\IDE\\workspace\\hadoopstu\\out\\out2a");

        //如果存在文件夹
        FileSystem fs = FileSystem.get(outpath.toUri(), configuration);
        if (fs.exists(outpath))
            fs.delete(outpath,true);
        FileOutputFormat.setOutputPath(job,outpath);

        boolean b = job.waitForCompletion(true);
        System.out.println(b?"成功":"失败");
        System.exit(b?0:1);
    }
}