清理实操

创建项目(注意java 命名规范)
创建好之后 先建立依赖dependencies
正常依赖##


junit

    <artifactId>junit</artifactId>
        <version>RELEASE</version>
    </dependency>

    <dependency>
        <groupId>log4j</groupId>
        <artifactId>log4j</artifactId>
        <version>1.2.17</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-common</artifactId>
        <version>2.7.2</version>
    </dependency>


    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.7.2</version>
    </dependency>

    <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-hdfs</artifactId>
        <version>2.7.2</version>
    </dependency>


     <dependency>

        <groupId>com.googlecode.json-simple</groupId>
        <artifactId>json-simple</artifactId>
        <version>1.1</version>
     </dependency>




</dependencies>`

注意最后一行,是添加json格式所需要的格式 **
添加依赖后 在
resources文件下创建日志文件
文件名必须为
log4j.properties**日志代码是

log4j.rootLogger=INFO, stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d %p [%c] - %m%n
log4j.appender.logfile=org.apache.log4j.FileAppender 
log4j.appender.logfile.File=target/spring.log
log4j.appender.logfile.layout=org.apache.log4j.PatternLayout
log4j.appender.logfile.layout.ConversionPattern=%d %p [%c] - %m%n

然后在就java下面就可以创建类了;注意类名的命名规范 一般建三个类 Map , Reducer , Driver

package WeatherData;


import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.json.simple.JSONObject;


import java.io.IOException;

public class WeatherMap extends Mapper<LongWritable, Text,Text, NullWritable> {

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
      Text k=new Text();
        //解决乱码使用的一条
      String line=new String(value.getBytes(),0,value.getLength(),"GBK");//读取第一行
        JSONObject obj = new JSONObject();//创建json对象
        String[] words = line.split(",");//北京, 2020.9.3, 多	云, 17-28, 无持续风向 3-4级用,分割开
        obj.put("date",words[1]);//类似向数组中添加属性那样
        obj.put("temp",words[3]);
        obj.put("city",words[0]);
        obj.put("weather",words[2]);
        obj.put("wind",words[4]);
        k.set(obj.toString());
        context.write(k,NullWritable.get());

    }
}

读取的第一行里使用了解决乱码的一条代码
然后自己理解一下Map里面的东西

package WeatherData;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class WeatherReducer  extends Reducer<Text, NullWritable,Text, NullWritable> {

    @Override
    protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
        context.write(key,NullWritable.get());

    }
}

reducer代码 也跟map一样 注意继承什么的
最后 的driver的代码

package WeatherData;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class WeatherDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {


        Configuration conf = new Configuration();

        //1 获取job对象
        Job job = Job.getInstance(conf);

        //2设置jar存储位置
        job.setJarByClass(WeatherDriver.class);

        //3关联map和reduce
        job.setMapperClass(WeatherMap.class);
        job.setReducerClass(WeatherReducer.class);

        //4设置mapper阶段输出的key和value类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);

        //5设置最终数据输出的key和value类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);

        //6设置输入路径和输出路径
        //FileInputFormat.setInputPaths(job, new Path("D:\\About_study\\idea\\workplace\\input\\hellow.txt"));
        // FileOutputFormat.setOutputPath(job, new Path("D:\\About_study\\idea\\workplace\\output"));
        Path p=setPath(job);
        p.getFileSystem(conf).delete(p,true);


        //7提交job
        Boolean result = job.waitForCompletion(true);
        if (result == true) {
            System.out.println("success");
        } else {
            System.out.println("failed");
        }
        System.exit(result ? 0 : 1);
    }
    private static Path setPath(Job job) throws IOException {
        Path inpath=new Path("D:\\About_study\\idea\\workplace\\input\\weatherdata.txt");
        Path outpath=new Path("D:\\About_study\\idea\\workplace\\output2");
        FileInputFormat.addInputPath(job,inpath);
        FileOutputFormat.setOutputPath(job,outpath);
        return outpath;
    }
}

基本上都一样 需要改的有
//4设置mapper阶段输出的key和value类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//5设置最终数据输出的key和value类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
1 输入输出的 key和value的字符串类型
2 自己文件的地址 以及output输出文件的地址
Path inpath=new Path(“D:\About_study\idea\workplace\input\weatherdata.txt”);
Path outpath=new Path(“D:\About_study\idea\workplace\output2”);
然后就可以运行了

  • 1
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值