本文使用jupyter notebook实现
1、Maven引入Jar包
1.1 Maven导入方式(在线导入,特别慢)
%%loadFromPOM
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>3.3.3</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>3.3.3</version>
</dependency>
1.2 本地导入方式(快速但是复杂)
%jars /home/jovyan/mavenlib/hadoop3.3.3/accessors-smart-2.4.7.jar /home/jovyan/mavenlib/hadoop3.3.3/animal-sniffer-annotations-1.17.jar /home/jovyan/mavenlib/hadoop3.3.3/asm-9.1.jar /home/jovyan/mavenlib/hadoop3.3.3/audience-annotations-0.5.0.jar /home/jovyan/mavenlib/hadoop3.3.3/avro-1.7.7.jar /home/jovyan/mavenlib/hadoop3.3.3/checker-qual-2.5.2.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-beanutils-1.9.4.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-cli-1.2.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-codec-1.15.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-collections-3.2.2.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-compress-1.21.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-configuration2-2.1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-io-2.8.0.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-lang3-3.12.0.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-logging-1.1.3.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-math3-3.1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-net-3.6.jar /home/jovyan/mavenlib/hadoop3.3.3/commons-text-1.4.jar /home/jovyan/mavenlib/hadoop3.3.3/curator-client-4.2.0.jar /home/jovyan/mavenlib/hadoop3.3.3/curator-framework-4.2.0.jar /home/jovyan/mavenlib/hadoop3.3.3/curator-recipes-4.2.0.jar /home/jovyan/mavenlib/hadoop3.3.3/dnsjava-2.1.7.jar /home/jovyan/mavenlib/hadoop3.3.3/failureaccess-1.0.jar /home/jovyan/mavenlib/hadoop3.3.3/gson-2.8.9.jar /home/jovyan/mavenlib/hadoop3.3.3/guava-27.0-jre.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-annotations-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-auth-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-client-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-common-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-hdfs-client-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-mapreduce-client-common-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-mapreduce-client-core-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-mapreduce-client-jobclient-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-shaded-guava-1.1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-shaded-protobuf_3_7-1.1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-yarn-api-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-yarn-client-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/hadoop-yarn-common-3.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/httpclient-4.5.13.jar /home/jovyan/mavenlib/hadoop3.3.3/httpcore-4.4.13.jar /home/jovyan/mavenlib/hadoop3.3.3/j2objc-annotations-1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-annotations-2.13.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-core-2.13.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-core-asl-1.9.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-databind-2.13.2.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-jaxrs-1.9.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-jaxrs-base-2.13.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-jaxrs-json-provider-2.13.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-mapper-asl-1.9.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-module-jaxb-annotations-2.13.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jackson-xc-1.9.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jakarta.activation-api-1.2.1.jar /home/jovyan/mavenlib/hadoop3.3.3/jakarta.xml.bind-api-2.3.3.jar /home/jovyan/mavenlib/hadoop3.3.3/javax.servlet-api-3.1.0.jar /home/jovyan/mavenlib/hadoop3.3.3/javax.ws.rs-api-2.1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/jaxb-api-2.2.11.jar /home/jovyan/mavenlib/hadoop3.3.3/jaxb-impl-2.2.3-1.jar /home/jovyan/mavenlib/hadoop3.3.3/jcip-annotations-1.0-1.jar /home/jovyan/mavenlib/hadoop3.3.3/jersey-client-1.19.jar /home/jovyan/mavenlib/hadoop3.3.3/jersey-core-1.19.jar /home/jovyan/mavenlib/hadoop3.3.3/jersey-json-1.19.jar /home/jovyan/mavenlib/hadoop3.3.3/jersey-server-1.19.jar /home/jovyan/mavenlib/hadoop3.3.3/jersey-servlet-1.19.jar /home/jovyan/mavenlib/hadoop3.3.3/jettison-1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-client-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-http-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-io-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-security-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-server-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-servlet-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-util-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-util-ajax-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-webapp-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jetty-xml-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/jline-3.9.0.jar /home/jovyan/mavenlib/hadoop3.3.3/jsch-0.1.55.jar /home/jovyan/mavenlib/hadoop3.3.3/json-smart-2.4.7.jar /home/jovyan/mavenlib/hadoop3.3.3/jsp-api-2.1.jar /home/jovyan/mavenlib/hadoop3.3.3/jsr305-3.0.2.jar /home/jovyan/mavenlib/hadoop3.3.3/jsr311-api-1.1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-admin-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-client-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-common-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-core-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-crypto-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-identity-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-server-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-simplekdc-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerb-util-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerby-asn1-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerby-config-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerby-pkix-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerby-util-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/kerby-xdr-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar /home/jovyan/mavenlib/hadoop3.3.3/log4j-1.2.17.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-buffer-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-codec-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-common-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-handler-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-resolver-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-transport-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-transport-native-epoll-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/netty-transport-native-unix-common-4.1.42.Final.jar /home/jovyan/mavenlib/hadoop3.3.3/nimbus-jose-jwt-9.8.1.jar /home/jovyan/mavenlib/hadoop3.3.3/okhttp-2.7.5.jar /home/jovyan/mavenlib/hadoop3.3.3/okio-1.6.0.jar /home/jovyan/mavenlib/hadoop3.3.3/paranamer-2.3.jar /home/jovyan/mavenlib/hadoop3.3.3/protobuf-java-2.5.0.jar /home/jovyan/mavenlib/hadoop3.3.3/re2j-1.1.jar /home/jovyan/mavenlib/hadoop3.3.3/reload4j-1.2.18.3.jar /home/jovyan/mavenlib/hadoop3.3.3/slf4j-api-1.7.36.jar /home/jovyan/mavenlib/hadoop3.3.3/slf4j-log4j12-1.7.25.jar /home/jovyan/mavenlib/hadoop3.3.3/slf4j-reload4j-1.7.36.jar /home/jovyan/mavenlib/hadoop3.3.3/snappy-java-1.1.8.2.jar /home/jovyan/mavenlib/hadoop3.3.3/stax2-api-4.2.1.jar /home/jovyan/mavenlib/hadoop3.3.3/token-provider-1.0.1.jar /home/jovyan/mavenlib/hadoop3.3.3/websocket-api-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/websocket-client-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/websocket-common-9.4.43.v20210629.jar /home/jovyan/mavenlib/hadoop3.3.3/woodstox-core-5.3.0.jar /home/jovyan/mavenlib/hadoop3.3.3/zookeeper-3.5.6.jar /home/jovyan/mavenlib/hadoop3.3.3/zookeeper-jute-3.5.6.jar
2、导入相关的包
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import java.io.*;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
//全局变量
FileSystem fs = null;
String localPath = System.getProperty("java.io.tmpdir");
Path homePath = null;
System.out.println(localPath);
3、连接文件系统
Configuration configuration = new Configuration();
configuration.set("dfs.client.use.datanode.hostname", "true");
configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
FileSystem fs = FileSystem.get(new URI("hdfs://g10master:9000"), configuration, "root");
System.out.println("文件系统连接成功,当前用户的家目录为:" + fs.getHomeDirectory());
4、获取家目录
homePath = fs.getHomeDirectory();
System.out.println(homePath);
5、判断文件或者文件夹是否存在
public Boolean exists(String src) throws Exception{
Boolean exists = fs.exists(new Path(src));
return exists;
}
System.out.println(exists("/user/root/test.txt"));
6、创建文件夹
public void mkdir(String destFile) throws Exception{
Path src=new Path(destFile);
if (!fs.exists(src)) {
fs.mkdirs(src);
System.out.println("创建文件夹成功");
} else {
System.out.println("文件夹已经存在");
}
}
mkdir("/user/root/zhli");
7、创建文件
public void makeNewFile(String fileName) throws Exception {
Path path=new Path(fileName);
fs.create(path,true);
System.out.println("文件创建成功");
}
makeNewFile("/user/root/zhli2.txt");
8、创建一个新文件并写入内容
public void makeNewFile(String fileName, String content) throws Exception {
Path path=new Path(fileName);
FSDataOutputStream out = fs.create(path,true);
out.write(content.getBytes(StandardCharsets.UTF_8));
out.close();
System.out.println("文件创建成功");
}
makeNewFile("/user/root/zhli3.txt","3333");
9、查看文件内容
public void read(String destFile) throws Exception{
BufferedReader reader=new BufferedReader(new InputStreamReader(fs.open(new Path(destFile))));
String line=null;
while((line=reader.readLine())!=null){
System.out.println(line);
}
reader.close();
}
read("/user/root/test.txt");
10、删除文件
public void rmFile(String destFile) throws Exception{
Path src=new Path(destFile);
if (!fs.exists(src)) {
System.out.println("文件不存在,无法删除");
}else{
fs.delete(src);
System.out.println("文件删除成功");
}
}
rmFile("/user/root/zhli")
11、上传文件
public void uploadFile(String localFile,String destFile) throws Exception{
File file = new File(localFile);
if (!file.exists()) {
file.createNewFile();
}
BufferedWriter writer = new BufferedWriter(new FileWriter(file));
writer.write(System.currentTimeMillis() + "");
writer.close();
Path src = new Path(localFile);
Path dst = new Path(destFile);
if (fs.exists(dst)) {
System.out.println(destFile+"文件存在,无法上传");
} else {
fs.copyFromLocalFile(src, dst);
System.out.println("文件上传成功");
}
}
uploadFile("/home/jovyan/hosts.txt","/user/root/host"+System.currentTimeMillis()+".txt");
12、下载文件
public void downloadFile(String destFile,String localFile) throws Exception{
Path destPath=new Path(destFile);
if (!fs.exists(destPath)) {
System.out.println("hadoop上没有该文件,无法下载");
}else{
File file = new File(localFile);
if (file.exists()) {
System.out.println("本地已存在当前文件,无法下载保存");
}else{
fs.copyToLocalFile(false, destPath, new Path(localFile), true);
System.out.println("文件下载成功");
BufferedReader reader = new BufferedReader(new FileReader(new File(localFile)));
String line = null;
System.out.println("下载下来的文件内容是:");
while ((line = reader.readLine()) != null) {
System.out.println(line);
}
reader.close();
}
}
}
downloadFile("/user/root/test.txt","/home/jovyan/"+System.currentTimeMillis()+".txt");
13、文件重命名
public void rename(String oldName,String newName) throws Exception{
Path oldPath = new Path(oldName);
Path newPath = new Path(newName);
if (fs.exists(oldPath) && !fs.exists(newPath)) {
fs.rename(oldPath, newPath);
System.out.println("文件重命名成功");
}else{
System.out.println("源文件不存在或者新文件已经存在,无法重命名");
}
}
rename("/user/root/test.txt","/user/root/test123.txt");
14、向文件中追加内容
public void appendToFile(String destFile,String content) throws Exception{
Path path = new Path(destFile);
if (fs.exists(path)) {
FSDataOutputStream out = fs.append(path);
out.write(content.getBytes(StandardCharsets.UTF_8));
out.close();
System.out.println("内容追加成功");
}else{
System.out.println("文件不存在,无法追加内容");
}
}
appendToFile("/user/root/test.txt","我刚加入的内容"+System.currentTimeMillis());
15、文件合并
public void concatFile(String srcFile,String DestFile) throws Exception{
Path srcPath = new Path(srcFile);
Path destPath = new Path(DestFile);
if (fs.exists(srcPath) && fs.exists(destPath)) {
FSDataInputStream in = fs.open(srcPath);
FSDataOutputStream out = fs.append(destPath);
BufferedReader reader = new BufferedReader(new InputStreamReader(in));
String line = null;
while ((line = reader.readLine()) != null) {
out.write((line+"\n").getBytes(StandardCharsets.UTF_8));
}
in.close();
out.close();
System.out.println("文件合并成功");
}else{
System.out.println("文件不存在");
}
}
concatFile("/user/root/test.txt","/user/root/test123.txt");
16、列举文件夹下的所有文件
public void listFile(String destPath,Boolean showDetails) throws Exception{
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path(destPath), true);
while (listFiles.hasNext()) {
LocatedFileStatus fileStatus = listFiles.next();
System.out.println(fileStatus.getPath().getName());
if(showDetails){
System.out.println(fileStatus.getBlockSize());
System.out.println(fileStatus.getPermission());
System.out.println(fileStatus.getLen());
BlockLocation[] blockLocations = fileStatus.getBlockLocations();
for (BlockLocation bl : blockLocations) {
System.out.println("block-len:" + bl.getLength() + "--" + "block-offset:" + bl.getOffset());
String[] hosts = bl.getHosts();
for (String host : hosts) {
System.out.println(host);
}
}
System.out.println("------------------");
}
}
}
listFile("/user/root/",false);
17、合并多个文件
注意:合并后,源文件将被删除
public void appendToFile(String dest,String... srcs) throws Exception {
Path destpath = new Path(dest);
List<Path> pathList=new ArrayList<>();
for(String src:srcs){
pathList.add(new Path(src));
}
if (!fs.exists(destpath)) {
makeNewFile(dest,"");
}
fs.concat(destpath, pathList.toArray(new Path[pathList.size()]));
System.out.println("文件合并成功");
}
appendToFile("/user/root/test.txt","/user/root/host1696937069683.txt","/user/root/test1696935703640.txt","test1696935228413.txt");
18、改良后的多个文件合并
private void appendToFile(Path dest, Path[] srcs, boolean del) throws Exception {
if (del) {
fs.concat(dest, srcs);
} else {
if (!fs.exists(dest)) fs.createNewFile(dest);
FSDataOutputStream os = fs.append(dest);
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(os));
for (Path src : srcs) {
FSDataInputStream is = fs.open(src);
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String line = null;
while ((line = reader.readLine()) != null) {
writer.newLine();
writer.write(line);
}
reader.close();
is.close();
}
writer.close();
os.close();
}
}
appendToFile(new Path("/user/root/test.txt"),new Path[]{new Path("/user/root/test1696935228413.txt"),new Path("/user/root/test1696935703640.txt")},false);
19、关闭文件系统
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
e.printStackTrace();
}
}
Java操作Hadoop文件大作业
作业描述
- 1.在g10master主机的
/user/
的目录下创建一个以组号为名的文件夹,例如第10组就是/user/g10
。 - 2.在
/user/组号/
文件夹下创建以小组成员学号为名称的文本文件,内容为学号+姓名,有几个学生创建几个文件。例如文件名12006242601.txt
,内容为:12006242601 张亮
- 3.将所有学生文件合并到
/user/组号/组号.txt
的文件中来,要求保留原来的文件。 - 4.查看
/user/组号/组号.txt
文件的内容 - 5.查看
/user/组号/
下所有文件的信息 - 6.将
/user/组号/组号.txt
文件下载到/home/jovyan/组号.txt
参考代码