1、配置国内maven阿里更新库
在里面添加:
<mirror>
<id>alimaven</id>
<mirrorOf>central</mirrorOf>
<name>aliyun maven</name>
<url>http://maven.aliyun.com/nexus/content/groups/public/</url>
</mirror>
复制代码
2、创建maven工程
启动idea
复制代码
在pom.xml添加配置HDNF文件
<properties>
<hadoop.version>2.7.3</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.2</version>
</dependency>
<dependency>
<groupId>commons-logging</groupId>
<artifactId>commons-logging</artifactId>
<version>1.1.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-jobclient</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-app</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-hs</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
在windos下本地C盘hosts文件配置:文件位置;C:\Windows\System32\drivers\etc
192.168.200.11 node1
192.168.200.12 node2
192.168.200.13 node3
复制代码
3、复制HDFS配置文件到项目resource根目录下
3、编写上传代码
package hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.*;
import java.net.URI;
/**
* 将本地文件系统的文件通过java-API写入到HDFS文件
*/
public class FileCopyFromLocal {
public static void main(String[] args) {
String source = args[0]; //ben di lu jin
String destination = args[1]; //hdfs lu jing
InputStream in = null;
try {
in = new BufferedInputStream(new FileInputStream(source));
//HDFS读写的配置文件
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(destination),conf);
//调用Filesystem的create方法返回的是FSDataOutputStream对象
//该对象不允许在文件中定位,因为HDFS只允许一个已打开的文件顺序写入或追加
OutputStream out = fs.create(new Path(destination));
IOUtils.copyBytes(in,out,4096,true);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
复制代码
配置路径:/opt/bigdata/hadoop-2.7.3/etc/hadoop/kms-site.xml hdfs://node1:9000/data/kms-site.xml
4、编写从hdfs下载到本地代码
package hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import java.io.BufferedOutputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
/**
* 从HDFS读取文件
* 打包运行jar包 [bruce@node-01 Desktop]$ hadoop jar com.kaikeba.hadoop-1.0-SNAPSHOT.jar com.kaikeba.hadoop.hdfs.FileReadFromHdfs
*/
public class FileReadFromHdfs {
public static void main(String[] args) {
//
String srcFile = "hdfs://node1:9000/data/kms-site.xml";
Configuration conf = new Configuration();
try {
FileSystem fs = FileSystem.get(URI.create(srcFile),conf);
FSDataInputStream hdfsInStream = fs.open(new Path(srcFile));
BufferedOutputStream outputStream = null;
outputStream = new BufferedOutputStream(new FileOutputStream("/home/hadoop/Templates/kms-site.xml"));
IOUtils.copyBytes(hdfsInStream, outputStream, 4096, true);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
复制代码
5、SequenceFiles代码
package sequenceFiles;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.net.URI;
public class SequenceFileWriteNewVersion {
private static final String[] DATA = {
"One, two, buckle my shoe",
"Three, four, shut the door",
"Five, six, pick up sticks",
"Seven, eight, lay them straight",
"Nine, ten, a big fat hen"
};
public static void main(String[] args) {
//输出路径
String uri = args[0];
Configuration conf = new Configuration();
try {
FileSystem fs = FileSystem.get(URI.create(uri), conf);
Path path = new Path(args[0]);
IntWritable key = new IntWritable();
Text value = new Text();
SequenceFile.Writer.Option pathOption = SequenceFile.Writer.file(path);
SequenceFile.Writer.Option keyOption = SequenceFile.Writer.keyClass(IntWritable.class);
SequenceFile.Writer.Option valueOption = SequenceFile.Writer.valueClass(Text.class);
SequenceFile.Writer.Option compressOption = SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK);
SequenceFile.Writer writer = SequenceFile.createWriter(conf, pathOption, keyOption, valueOption, compressOption);
for (int i = 0; i < 100; i++) {
key.set(100 - i);
value.set(DATA[i % DATA.length]);
System.out.printf("[%s]\t%s\t%s\n", writer.getLength(), key, value);
writer.append(key, value);
}
IOUtils.closeStream(writer);
} catch (IOException e) {
e.printStackTrace();
}
}
复制代码
}