Hadoop第二天

最新推荐文章于 2024-05-30 18:02:01 发布

weixin_48508747

最新推荐文章于 2024-05-30 18:02:01 发布

阅读量2k

点赞数

文章标签： hadoop

本文链接：https://blog.csdn.net/weixin_48508747/article/details/124002739

版权

一个简单地Wordcount例子

在wordcount.txt写入

Spark Hadoop JAVA Python Spark Hadoop HIVE

WordCountMapper

package Mapreducer.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

/**
 * Mapper<KEYIN, VALUEIN, KEYOUT, VALUEOUT>
 * <p>
 * LongWritable :表示对应Java中的Long,其对应的值是数据读取的偏移量
 * Text:表示读取文件中的一行数据,对应java中的String
 * <p>
 * Text:表示输出数据的Key为text类型
 * IntWritable：表示输出数据的value为 IntWritable 类型 对应JAVA中的int
 */

public class WordCountMapper extends Mapper<LongWritable, Text,Text, IntWritable> {
    @Override
    protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException {
        //不需要调用父类的方法
//        super.map(key, value, context);
        String[] words= value.toString().split(" ");
        for (String word : words) {
            context.write(new Text(word),new IntWritable(1));
        }
    }
}

WordCountReducer

package Mapreducer.wordcount;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;


import java.io.IOException;

public class WordCountReducer extends Reducer<Text, IntWritable,Text,IntWritable> {
    /**
     * Reduce端的具体实现方法
     *
     * @param key     Map端传过来的key
     * @param values  Map端传过来相同Key的values的集合
     * @param context 上下文操作对象
     * @throws IOException
     * @throws InterruptedException
     */
    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException {
        //不调用父类的Reduce
//        super.reduce(key, values, context);
        int count=0;
        for (IntWritable value : values) {
            count +=value.get();
        }
        context.write(key,new IntWritable(count));
    }
}

WordCountDriver

package Mapreducer.wordcount;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

// Drive用于提交我们的Job任务
public class WordCountDriver {
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        //        conf.set();
        // getInstance(Configuration conf) 获取Job的实例对象
        Job job = Job.getInstance(conf);
        //        job.setJar();

        // 设置Mapper和Reducer的具体实现类
        job.setMapperClass(WordCountMapper.class);
        job.setReducerClass(WordCountReducer.class);
        // 设置Map端输出的数据类型
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        // 设置Reduce端输出的数据类型
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        /**
         * setInputPaths(Job job, String commaSeparatedPaths)
         *
         * void setOutputPath(Job job, Path outputDir) {
         */
        // 设置输入输入路径
        FileInputFormat.setInputPaths(job,new Path("E:\\JAVA XMS\\Hadoop\\input"));
        FileOutputFormat.setOutputPath(job,new Path("E:\\JAVA XMS\\Hadoop\\output"));
        // job提交
        boolean res = job.waitForCompletion(true);
    }
}

HDFS

用Java连接HDFS

HDFS_0_FileSystem

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

public class HDFS_0_FileSystem {
    public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {
        //创建一个配置文件
        Configuration conf = new Configuration();
        /**
         * Get a filesystem instance based on the uri, the passed configuration and the user
         * Params:
         * uri – of the filesystem
         * conf – the configuration to use
         * user – to perform the get as
         * Returns:
         * the filesystem instance
         */
        //可以在主节点master下的hadoop-2.6.7\etc\hadoop\core-site.xml里找到
        URI uri = new URI("hdfs://master:9000");
        FileSystem fs = FileSystem.get(uri, conf, "root");
        System.out.println(fs.getClass().getName());

    }
}

HDFS_1_UPFile

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
import java.net.URISyntaxException;

public class HDFS_1_UPFile {
    public static void main(String[] args) throws URISyntaxException, IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://master:9000");
        FileSystem fileSystem = FileSystem.get(conf);
        /**
         * Params:
         * delSrc – whether to delete the src
         * overwrite – whether to overwrite an existing file
         * src – path
         * dst – path
         *
         * copyFromLocalFile(boolean delSrc, Path src, Path dst)
         */
        fileSystem.copyFromLocalFile(false,true,new Path("E:\\JAVA XMS\\Hadoop\\data\\sanguo.txt"),new Path("/sanguo/"));
        fileSystem.close();
        System.out.println("upFiles has finished");
    }
}

HDFS_2_UPFileReplication

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;
//这里查看副本数在主节点master下的hadoop-2.6.7\etc\hadoop\hdfs-site.xml里找到

public class HDFS_2_UPFileReplication {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://master:9000");
        conf.set("dfs.replication","3");
        FileSystem fs = FileSystem.get(conf);
        fs.copyFromLocalFile(false,true,new Path("E:\\JAVA XMS\\Hadoop\\data\\sanguo.txt"),new Path("/sanguo/"));
        fs.close();
        System.out.println("upFiles has finished");
    }
}

HDFS_3_DownFile

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;

public class HDFS_3_DownFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://master:9000");
        conf.set("dfs.replication","3");
        FileSystem fs = FileSystem.get(conf);
        fs.copyToLocalFile(false,new Path("/sanguo/sanguo.txt"),new Path("E:\\JAVA XMS\\Hadoop\\data"));
        fs.close();
        System.out.println("Finished");
    }
}

HDFS_4_RenameFile

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;

public class HDFS_4_RenameFile {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://master:9000");
        conf.set("dfs.replication","3");
        FileSystem fs = FileSystem.get(conf);
        fs.rename(new Path("/sanguo/sanguo.txt"),new Path("/sanguo/sanguo3.txt"));
        fs.close();
        System.out.println("Finished");
    }
}

HDFS_5_DeletFiles

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;

public class HDFS_5_DeletFiles {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://master:9000");
        conf.set("dfs.replication","3");
        FileSystem fs = FileSystem.get(conf);
        /**
         * Params:
         * f – the path to delete.
         * recursive – if path is a directory and set to true, the directory is deleted else throws an exception. In case of a file the recursive can be set to either true or false.
         * Returns:
         * true if delete is successful else false.
         *
         * boolean delete(Path f, boolean recursive)
         */
        fs.delete(new Path("/test1/test2"),false);
        fs.close();
        System.out.println("Finished");
    }
}

HDFS_6_FileStatus

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;

import java.io.IOException;

public class HDFS_6_FileStatus {
    public static void main(String[] args) throws IOException {
        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "hdfs://master:9000");
        conf.set("dfs.replication", "3");
        FileSystem fs = FileSystem.get(conf);
        /**
         * Params:
         * f – is the path
         * recursive – if the subdirectories need to be traversed recursively
         * 如果需要递归 则设置为 true
         */
        RemoteIterator<LocatedFileStatus> lsr = fs.listFiles(new Path("/sanguo"), true);
        while (lsr.hasNext()) {
            LocatedFileStatus next = lsr.next();
            // 获取每个block的位置信息
            BlockLocation[] blocks = next.getBlockLocations();
            // 获取block块大小
            System.out.println(next.getBlockSize());
            // 获取路径
            System.out.println(next.getPath());
            // 获取所有者
            System.out.println(next.getOwner());

            for (BlockLocation block : blocks) {
                System.out.println("hosts:" + block.getHosts());
                System.out.println("name:" + block.getNames());
                System.out.println("length:" + block.getLength());
            }
            fs.close();
            System.out.println("Finished");
        }
    }
}

HDFS_7_isFileorDirector

package HDFS;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.IOException;

public class HDFS_7_isFileorDirector {
    public static void main(String[] args) throws IOException {


        Configuration configuration = new Configuration();
        configuration.set("fs.defaultFS", "hdfs://master:9000");
        configuration.set("dfs.replication", "3");
        FileSystem fs = FileSystem.get(configuration);
        FileStatus[] fileStatuses = fs.listStatus(new Path("/sanguo"));
        for (FileStatus fileStatus : fileStatuses) {
            System.out.println(fileStatus.isFile());
            System.out.println(fileStatus.isDirectory());
        }
        fs.close();
        System.out.println("application has finished");
    }
}

weixin_48508747

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
Hadoop第二天

一个简单地Wordcount例子在wordcount.txt写入Spark Hadoop JAVA Python Spark Hadoop HIVEWordCountMapperpackage Mapreducer.wordcount;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Te...
复制链接

扫一扫