idea 本地调试虚拟机中的hadoop2.6.0-cdh5.7.0项目

最新推荐文章于 2022-02-25 14:31:33 发布

leizhengtao520

最新推荐文章于 2022-02-25 14:31:33 发布

阅读量1.2k

点赞数

本文链接：https://blog.csdn.net/leizhengtao520/article/details/80320960

版权

涉及的软件或文件如下：

1.idea maven

2.hadoop.dll,winutils.exe ,winutils.pdb 适配windows64位操作系统（重要）而且这三个是适合hadoop2.7版本的也适合于2.6版本下载链接：点击打开链接

3.关闭hdfs的权限

在这个etc/hadoop下的hdfs-site.xml添加（记得先关闭集群再重启）

  <property> 

     <name>dfs.permissions</name> 

     <value>false</value> 
  </property>

设置没有权限，不过我们在正式的服务器上不能这样设置！！！！！！！！！！！！！！！

4.开始调试

maven 如下：

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>

    <groupId>com.baidu.hadoop</groupId>
    <artifactId>hadoop-train</artifactId>
    <version>1.0</version>

    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <hadoop.version>2.6.0-cdh5.7.0</hadoop.version>
    </properties>

    <repositories>
        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
        </repository>
    </repositories>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
            <scope>test</scope>
        </dependency>
    </dependencies>

</project>

调试模板：

package hadoop.mapreduce;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

/**
 * 使用mr 开发wordcount
 * Created by hello on 2018-05-15.
 */
public class WordCountApp {


    /**
     * Map 读取输入的文件
     */
    public static class MyMapper extends Mapper<LongWritable,Text,Text,LongWritable>{
        LongWritable one = new LongWritable(1);
        @Override
        protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
            //接收到每一行数据
            String line = value.toString();
            //按照指定分隔符拆分
            String[] words = line.split("\t");

            for(String s : words){
                //通过上下文对map输出
                context.write(new Text(s),one);
            }

        }
    }

    /**
     * Reducer归并操作
     */
    public static class MyReducer extends Reducer<Text,LongWritable,Text,LongWritable>{
        @Override
        protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException {

            long sum = 0 ;
            for(LongWritable value : values){
                sum += value.get();
            }
            //最终输出总数
            context.write(key,new LongWritable(sum));
        }
    }

    /**
     * driver：封装mr作业的所有信息
     */
    public static void main(String[] args) throws Exception{


        String HDFS_PATH ="hdfs://192.168.91.127:8020";

        //创建configuration
        Configuration configuration = new Configuration();

        //远程调试hadoop
        System.setProperty("hadoop.home.dir","D:\\KDR\\hadoop-2.6.0-cdh5.7.0");
        configuration.set("fs.defaultFS",HDFS_PATH);
        configuration.set("yarn.resourcemanager.hostname",HDFS_PATH);

        //创建job
        Job job = Job.getInstance(configuration,"worconut");
        //设置job处理类
        job.setJarByClass(WordCountApp.class);

        //设置作业处理的输入路径
        FileInputFormat.setInputPaths(job,new Path("/test/a.txt"));

        //设置map相关的参数
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(LongWritable.class);

        //设置reduce相关参数
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(LongWritable.class);

        //设置作业处理的输出路径
        FileOutputFormat.setOutputPath(job,new Path("/out/"));

        System.exit(job.waitForCompletion(true)? 0 : 1);

    }
}

junit单元测试：

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.net.URI;


/**
 * Created by hello on 2018-05-11.
 */
public class HDFSApp {

    public static final String HDFS_PATH ="hdfs://192.168.91.127:8020";
    FileSystem fileSystem = null;
    Configuration configuration = null;

    @Test
    public void mkdir() throws Exception{
        fileSystem.mkdirs(new Path("/hdfsapi/test"));
    }

    /**
     * 创建文件
     * @throws Exception
     */
    @Test
    public void create() throws Exception{
        FSDataOutputStream fs=fileSystem.create(new Path("/hdfsapi/test/a.txt"));
        fs.write("hello haddop".getBytes());
        fs.flush();
        fs.close();
    }

    /**
     * 查看文件内容
    */
    @Test
    public void cat() throws  Exception{
        FSDataInputStream in = fileSystem.open(new Path("/hdfsapi/test/b.txt"));
        IOUtils.copyBytes(in,System.out,1024);
        in.close();
    }

    /**
     * 重命名文件
     * @throws Exception
     */

    @Test
    public void rename() throws Exception{
        fileSystem.rename(new Path("/hdfsapi/test/a.txt"),new Path("/hdfsapi/test/b.txt"));
    }

    /**
     * 显示文件目录
     * @throws Exception
     */
    @Test
    public void catgory() throws Exception{
        FileStatus[] fs=fileSystem.listStatus(new Path("/hdfsapi/test/dalei.json"));
        for(FileStatus fileStatus : fs){
            String isDir = fileStatus.isDirectory() ? "文件夹":"文件";
            short replication =fileStatus.getReplication();
            long leng=fileStatus.getLen();
            Path path = fileStatus.getPath();
            System.out.println(isDir+"\t"+replication+"\t"+leng+"\t"+path);
        }
    }


    /**
     *
     * 上传文件到hdfs
     * @throws Exception
     */
    @Test
    public void upload() throws Exception{
        Path localPath = new Path("E:\\train.json");
        Path hdfsPath = new Path("/hdfsapi/test/");
        fileSystem.copyFromLocalFile(localPath,hdfsPath);
    }

    /**
     * 带进度条的上传
     * @throws Exception
     */
    @Test
    public void uploadWithProgress() throws Exception{
        Path localPath = new Path("E:\\train.json");
        Path hdfsPath = new Path("/hdfsapi/test/");
        Progressable progress = new Progressable() {
            public void progress() {
                System.out.print(">");
            }
        };
        InputStream in = new BufferedInputStream(new FileInputStream(new File("E:\\train.json")));
        FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/dalei.json"),progress);
        IOUtils.copyBytes(in,output,4096);
    }


    /**
     *从hdfs上下载文件
     * @throws Exception
     */
    @Test
    public void download() throws Exception{
        Path localPath = new Path("D:\\hi.txt");
        Path hdfsFile = new Path("/hdfsapi/test/dalei.json");
        fileSystem.copyToLocalFile(hdfsFile,localPath);

    }


    @Before
    public void setUp() throws Exception{
        System.out.println("hadoop setUp");
        configuration = new Configuration();
        fileSystem = FileSystem.get(new URI(HDFS_PATH),configuration,"root");
    }
    @After
    public void tearDown() throws Exception{
        configuration = null;
        fileSystem = null;
        System.out.println("hadoop tearDown");
    }

}