HDFS shell及Java API操作

最新推荐文章于 2023-04-25 18:47:47 发布

基础不牢，地动山摇

最新推荐文章于 2023-04-25 18:47:47 发布

阅读量597

点赞数

本文链接：https://blog.csdn.net/qq_41177704/article/details/128238238

版权

JAVA 同时被 2 个专栏收录

17 篇文章 0 订阅

订阅专栏

Hadoop

11 篇文章 0 订阅

订阅专栏

HDFS操作

命令操作

#查看hdfs根目录下面的文件
hdfs dfs -ls /
#mkdir在hdfs根目录创建一个test目录
hdfs dfs -mkdir  /test

在这里插入图片描述

#put将当期目录下面的1.txt文件上传到hdfs的test目录下面
hdfs dfs -put 1.txt /test

在这里插入图片描述

Java API操作

项目pom.xml文件。

    <properties>
        <maven.compiler.source>8</maven.compiler.source>
        <maven.compiler.target>8</maven.compiler.target>
        <project.bulid.sourceEcoding>UTF-8</project.bulid.sourceEcoding>
        <hadoop.version>2.7.6</hadoop.version>
    </properties>
    <dependencies>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-common</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-hdfs</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-core</artifactId>
            <version>${hadoop.version}</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-mapreduce-client-common</artifactId>
            <version>2.10.1</version>
        </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-client</artifactId>
            <version>${hadoop.version}</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.10</version>
        </dependency>
    </dependencies>

Java API代码。

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.net.URI;

/**
 * Java API操作HDFS
 */
public class HDFSApp {

    public static final String HDFS_PATH = "hdfs://192.168.26.111:9000";

    Configuration configuration = null;
    FileSystem fileSystem = null;

    @Before
    public void setUp() throws Exception {
        System.out.println("HDFSApp.setUp()");
        configuration = new Configuration();
        fileSystem = FileSystem.get(new URI(HDFS_PATH), configuration);
    }

    @After
    public void tearDown() throws Exception {
        fileSystem = null;
        configuration = null;
        System.out.println("HDFSApp.tearDown()");
    }


    /**
     * 创建目录
     */
    @Test
    public void mkdir() throws Exception {
        fileSystem.mkdirs(new Path("/hdfsapi/test"));
    }

    /**
     * 创建文件
     */
    @Test
    public void create() throws Exception {
        FSDataOutputStream output = fileSystem.create(new Path("/hdfsapi/test/a.txt"));
        output.write("hello world".getBytes());
        output.flush();
        output.close();
    }

    /**
     * 重命名
     */
    @Test
    public void rename() throws Exception {
        Path oldPath = new Path("/hdfsapi/test/a.txt");
        Path newPath = new Path("/hdfsapi/test/b.txt");
        System.out.println(fileSystem.rename(oldPath, newPath));
    }

    /**
     * 上传本地文件到HDFS
     */
    @Test
    public void copyFromLocalFile() throws Exception {
        Path src = new Path("/home/hadoop/data/hello.txt");
        Path dist = new Path("/hdfsapi/test/");
        fileSystem.copyFromLocalFile(src, dist);
    }

    /**
     * 查看某个目录下的所有文件
     */
    @Test
    public void listFiles() throws Exception {
        FileStatus[] listStatus = fileSystem.listStatus(new Path("/hdfsapi/test"));
        for (FileStatus fileStatus : listStatus) {
            String isDir = fileStatus.isDirectory() ? "文件夹" : "文件";  //文件/文件夹
            String permission = fileStatus.getPermission().toString(); //权限
            short replication = fileStatus.getReplication(); //副本系数
            long len = fileStatus.getLen(); //长度
            String path = fileStatus.getPath().toString(); //路径
            System.out.println(isDir + "\t" + permission + "\t" + replication + "\t" + len + "\t" + path);
        }
    }

    /**
     * 查看文件块信息
     */
    @Test
    public void getFileBlockLocations() throws Exception {
        FileStatus fileStatus = fileSystem.getFileStatus(new Path("/hdfsapi/test/b.txt"));
        BlockLocation[] blocks = fileSystem.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
        for (BlockLocation block : blocks) {
            for (String host : block.getHosts()) {
                System.out.println(host);
            }
        }
    }
}

拒绝当前用户访问的话，需要设置环境变量将用户替换为root用户。

在这里插入图片描述

设置环境变量HADOOP_USER_NAME=root。

在这里插入图片描述

MapReduce进行词频统计

1.创建要执行统计的目录

hdfs dfs -mkdir /wc
#上传文本文件
vi 1.txt

在这里插入图片描述

上传本地1.txt文件到hdfs上面。

hdfs dfs -put 1.txt /wc

在这里插入图片描述

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import java.io.IOException;
import java.net.URI;
import java.util.StringTokenizer;


/**
 * WordCount的MapReduce实现
 */
public class WordCountApp {
    public static class MyMapper
            extends Mapper<Object, Text, Text, IntWritable> {

        private final static IntWritable one = new IntWritable(1);
        private Text word = new Text();

        public void map(Object key, Text value, Context context
        ) throws IOException, InterruptedException {
            StringTokenizer itr = new StringTokenizer(value.toString());
            while (itr.hasMoreTokens()) {
                word.set(itr.nextToken());
                context.write(word, one);
            }
        }
    }

    public static class MyReducer
            extends Reducer<Text, IntWritable, Text, IntWritable> {
        private IntWritable result = new IntWritable();

        public void reduce(Text key, Iterable<IntWritable> values,
                           Context context) throws IOException, InterruptedException {
            int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            result.set(sum);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception {
        String INPUT_PATH = "hdfs://192.168.26.111:9000/wc";
        String OUTPUT_PATH = "hdfs://192.168.26.111:9000/outputwc";

        Configuration conf = new Configuration();
        final FileSystem fileSystem = FileSystem.get(new URI(INPUT_PATH), conf);
        if (fileSystem.exists(new Path(OUTPUT_PATH))) {
            fileSystem.delete(new Path(OUTPUT_PATH), true);
        }

        Job job = Job.getInstance(conf, "WordCountApp");

        // run jar class
        job.setJarByClass(WordCountApp.class);

        // 设置map
        job.setMapperClass(MyMapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);

        // 设置reduce
        job.setReducerClass(MyReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);

        // 设置input formart
        job.setInputFormatClass(TextInputFormat.class);
        Path inputPath = new Path(INPUT_PATH);
        FileInputFormat.addInputPath(job, inputPath);

        // 设置output format
        job.setOutputFormatClass(TextOutputFormat.class);
        Path outputPath = new Path(OUTPUT_PATH);
        FileOutputFormat.setOutputPath(job, outputPath);

        // 提交job
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

MapReduce跑不通参考这个解决https://blog.csdn.net/whs0329/article/details/121878162

记得设置用户root环境变量，exit 0表示词频统计运行成功。

在这里插入图片描述

查看词频统计结果。

在这里插入图片描述

基础不牢，地动山摇

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
HDFS shell及Java API操作

MapReduce跑不通参考这个解决https://blog.csdn.net/whs0329/article/details/121878162。拒绝当前用户访问的话，需要设置环境变量将用户替换为root用户。记得设置用户root环境变量，exit 0表示词频统计运行成功。设置环境变量HADOOP_USER_NAME=root。上传本地1.txt文件到hdfs上面。项目pom.xml文件。1.创建要执行统计的目录。Java API代码。
复制链接

扫一扫

专栏目录