文章目录
环境:
jdk 1.8
maven 3.6.3
hadoop 2.8.5
idea 2019.3
Vmware 12.0
IDEA的Maven环境配置
Maven安装以及配置文件的修改
下载maven文件并解压到本地目录
本地仓库
在maven同级目录新建repository文件夹
打开apache-maven-3.6.3\conf目录,发现setting.xml文件。
发现标签标签,写上本地新建repository文件夹的目录
按自己的目录来
E:\Enviro Config\maven\repository
<localRepository>E:\Enviro Config\maven\repository</localRepository>
阿里云镜像
在setting文件内发现mirrors标签,在该标签内添加
<!-- 阿里云仓库 -->
<mirror>
<id>alimaven</id>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
<mirrorOf>central</mirrorOf>
</mirror>
设置Maven下载的jdk版本
在setting文件内发现profiles标签,在该标签内添加
<profile>
<id>JDK-1.8</id>
<activation>
<activeByDefault>true</activeByDefault>
<jdk>1.8</jdk>
</activation>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<maven.compiler.compilerVersion>1.8</maven.compiler.compilerVersion>
</properties>
</profile>
新建Maven项目
注意标红的地方
后面的名称自己随便写就行。
在pom.xml的project标签加上依赖:
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.8.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-hdfs -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.8.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-mapreduce-client-core -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.8.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-client -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.8.5</version>
</dependency>
<!-- https://mvnrepository.com/artifact/org.apache.hadoop/hadoop-yarn-api -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-api</artifactId>
<version>2.8.5</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
</dependencies>
IDEA中maven的配置
打开idea的setting,找到maven。
选择自己刚才配置的地方,三个分别的maven、maven配置文件、本地仓库。
一些设置:
HDFS Java API
在工程main内新建packeges:com.itcast.hdfsdemo。
在包内新建java代码,目录和IP都需要改一下。运行前不要忘记开启hdfs。
start-all.sh
copyFromLocal.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class copyFromLocal {
public static void main(String[] arg) throws Exception{
Configuration conf=new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.74.133:9000");
//通过如下的方式进行客户端身份的设置
FileSystem fs= FileSystem.get(conf);
//本地文件
Path src =new Path("本地地址");
//HDFS为止
Path dst =new Path("hdfs://192.168.74.133:9000/mydir/");
fs.copyFromLocalFile(src,dst);
System.out.println("上传成功!");
}
}
copyToLocalFile.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class copyToLocalFile {
public static void main(String[] arg) throws Exception{
Configuration conf=new Configuration();
conf.set("fs.defaultFS","hdfs://192.168.74.133:9000");
//通过如下的方式进行客户端身份的设置
FileSystem fs= FileSystem.get(conf);
//本地文件
Path dst =new Path("");
//HDFS为止
Path src =new Path("hdfs://192.168.74.133:9000/page1.html");
fs.copyToLocalFile(src,dst);
System.out.println("下载成功!");
}
}
FileSystemCat.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.InputStream;
import java.net.URI;
public class FileSystemCat {
public static void main(String[] arg) throws Exception{
String uri="hdfs://centos01:9000/page1.html";
Configuration conf = new Configuration();
FileSystem fs=FileSystem.get(URI.create(uri),conf);
InputStream in = fs.open(new Path(uri));
IOUtils.copyBytes(in,System.out,4096,false);
IOUtils.closeStream(in);
}
}
getListMetaData.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.InputStream;
import java.net.URI;
public class getListMetaData {
//构造配置参数对象
static Configuration conf = new Configuration();
//设置参数,指定要访问的文件系统的类型
private static FileSystem hdfs;
public static void showDir(FileStatus fs) throws Exception{
conf.set("fs.defaultFS", "hdfs://centos01:9000");
hdfs=FileSystem.get(conf);
Path path = fs.getPath();
System.out.println(path.toString());
if (fs.isDirectory()){
FileStatus[] f = hdfs.listStatus(path);
if (f.length > 0){
for (FileStatus file: f){
showDir(file);
}
}
}
else{
String p = fs.getPath().toString();
String uri=p;
Configuration conf = new Configuration();
FileSystem hdfs=FileSystem.get(URI.create(uri),conf);
InputStream in = hdfs.open(new Path(uri));
IOUtils.copyBytes(in,System.out,4096,false);
IOUtils.closeStream(in);
}
}
public static void main(String[] arg) throws Exception{
conf.set("fs.defaultFS", "hdfs://centos01:9000");
hdfs=FileSystem.get(conf);
Path path = new Path("/");
FileStatus[] list = hdfs.listStatus(path);
if(list.length > 0){
for (FileStatus f: list){
showDir(f);
}
hdfs.close();
}
}
}
HDFSmkdir.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSmkdir {
public static void main(String[] arg) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://centos01:9000");
FileSystem hdfs = FileSystem.get(conf);
boolean isok=hdfs.mkdirs(new Path("hdfs:/mydir"));
if(isok){
System.out.println("创建目录成功!");
}
else{
System.out.println("创建目录失败!");
}
hdfs.close();
}
}
HDFSmkfile.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSmkfile {
public static void main(String[] arg) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://centos01:9000");
FileSystem hdfs = FileSystem.get(conf);
FSDataOutputStream outputStream=hdfs.create(new Path("hdfs:/mydir/newFile.txt"));
outputStream.write("nice to meet you".getBytes());
outputStream.close();
hdfs.close();
}
}
HDFSrm.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class HDFSrm {
public static void main(String[] arg) throws Exception{
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://centos01:9000");
FileSystem fs=FileSystem.get(conf);
Path path = new Path("/mydir/newFile.txt");
boolean isDeleted=fs.delete(path,false);
//递归删除
//boolean isDeleted=hdfs.delete(delef,true);
System.out.println("Delete?"+isDeleted);
fs.close();
}
}
ListStatus.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class ListStatus {
//构造配置参数对象
static Configuration conf = new Configuration();
//设置参数,指定要访问的文件系统的类型
private static FileSystem hdfs;
public static void showDir(FileStatus fs) throws Exception{
conf.set("fs.defaultFS", "hdfs://centos01:9000");
hdfs=FileSystem.get(conf);
Path path = fs.getPath();
System.out.println(path.toString());
if (fs.isDirectory()){
FileStatus[] f = hdfs.listStatus(path);
if (f.length > 0){
for (FileStatus file: f){
showDir(file);
}
}
}
}
public static void main(String[] arg) throws Exception{
hdfs=FileSystem.get(conf);
Path path = new Path("/");
FileStatus[] list = hdfs.listStatus(path);
if(list.length > 0){
for (FileStatus f: list){
showDir(f);
}
hdfs.close();
}
}
}
MapReduce WordCount
不要忘记准备文件,且output文件夹不能存在,否则会报错。
其实我个人一般喜欢分开写map、reduce、jobmain分开写类。
WordCountDemo.java
package com.itcast.hdfsdemo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
/**
* MapReduce单词统计
*/
public class WordCountDemo {
/**
* 自定义Mapper继承:org.apache.hadoop.mapreduce.Mapper,实现map方法
*/
public static class WordCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
@Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
String[] words = value.toString().split(" ");
for (String word : words) {
context.write(new Text(word), new IntWritable(1));
}
}
}
/**
* 自定义Reducer继承:org.apache.hadoop.mapreduce.Reducer,实现reduce方法
*/
public static class WordCountReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
int count = 0;
for (IntWritable writable : values) {
count += writable.get();
}
context.write(key, new IntWritable(count));
}
}
/**
* job启动类,设置参数并集群中提交job
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://centos01:9000");
Job job = Job.getInstance(conf);
job.setJarByClass(WordCountDemo.class);
job.setMapperClass(WordCountMapper.class);
job.setReducerClass(WordCountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.setInputPaths(job, new Path("/tmp/input"));
FileOutputFormat.setOutputPath(job, new Path("/tmp/output"));
boolean b = job.waitForCompletion(true);
System.exit(b ? 0 : 1);
}
}