HDFS常用操作命令
master上以用户登录
cd ~、ls /
查看HDFS文件目录:
hadoop fs -ls hdfs://master:9999/(当前还没有文件)
效果等同于:hadoop fs -ls /
效果也等同于:hdfs dfs -ls /
在这个根目录创建一个文件或目录:
hadoop fs -mkdir /user
效果等同于:hadoop fs -mkdir hdfs://master:9999/user
在WebUI上刷新可以看到多了一个user目录
再创建一个目录:hadoop fs -mkdir /user/hadoop-twq/cmd(会报错)
正确的方式:hadoop fs -mkdir -p /user/hadoop-twq/cmd(递归创建)
把本地文件word.txt上传到HDFS文件系统上去:
hadoop fs -copyFromLocal word.txt /user/hadoop-twq/cmd
查看:hadoop fs -ls /user/hadoop-twq/cmd(txt文件存在)
再上传一次就会报错,如果想要覆盖文件要加-f
hadoop fs -copyFromLocal -f word.txt /user/hadoop-twq/cmd
查看文件内容:
hadoop fs -cat /user/hadoop-twq/cmd/word.txt
用put方式代替copyFromlocal方式上传:
hadoop fs -put -f word.txt /user/hadoop-twq/cmd
将两个文件一起上传:
hadoop fs -put test.sh word2.txt /user/hadoop-twq/cmd
put还可以将文件流写到hdfs中:
hadoop fs -put - /user/hadoop-twq/cmd/put.txt(回车输入内容,Ctrl+D结束输入)然后查看
下载put.txt:
hadoop fs -get /user/hadoop-twq/cmd/put.txt
创建文件:
hadoop fs -touchz /user/hadoop-twq/cmd/flag.txt
将flag.txt移动到/user/hadoop-twq目录下:
hadoop fs -mv /user/hadoop-twq/cmd/flag.txt /user/hadoop-twq
将put.txt文件权限改成744:
hadoop fs -chmod 744 /user/hadoop-twq/cmd/put.txt
将cmd目录权限改成777:
hadoop fs -chmod -R 777 /user/hadoop-twq/cmd
查看big_file.txt文件大小:
ll -h(发现是153M)
把big_file.txt上传到hdfs文件系统中:
hadoop fs -put big_file.txt /user/hadoop-twq/cmd
尝试列出文件命令的异同:
hadoop fs -ls /user/hadoop-twq/cmd
hadoop fs -ls -h /user/hadoop-twq/cmd
hadoop fs -ls -d /user/hadoop-twq/cmd
hadoop fs -ls -R /user/hadoop-twq/cmd
hadoop fs -ls -R /user/hadoop-twq
hadoop fs -ls -R /user
查看文件大小:
hadoop fs -du /user/hadoop-twq/cmd
hadoop fs -du -h /user/hadoop-twq/cmd(大小可读)
hadoop fs -du -s -h /user/hadoop-twq/cmd(整个目录有多大)
查看hdfs总共有多大:
hadoop fs -df
hadoop fs -df -h(大小可读)
总之hdfs的命令是与Linux命令很相似的,文件目录结构都是目录树的结构
(CDH_HADOOP_2.6)版本信息
导入jar包
<repositories>
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
</repositories>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.6.0-mr1-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>2.6.0-cdh5.14.0</version>
</dependency>
<!-- https://mvnrepository.com/artifact/junit/junit -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.11</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>RELEASE</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
<encoding>UTF-8</encoding>
<!-- <verbal>true</verbal>-->
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>2.4.3</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<minimizeJar>true</minimizeJar>
</configuration>
</execution>
</executions>
</plugin>
<!-- <plugin>
<artifactId>maven-assembly-plugin </artifactId>
<configuration>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
<archive>
<manifest>
<mainClass>cn.itcast.hadoop.db.DBToHdfs2</mainClass>
</manifest>
</archive>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>-->
</plugins>
</build>
案例代码
import org.apache.commons.io.IOUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.testng.annotations.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* @program: hdfs
* @description:
* @author: wenglei
* @create: 2020-05-25 15:10
**/
/*使用文件系统方式访问数据*/
public class Demo01 {
/* 获取FileSystem*/
/*第一种方法*/
@Test
public void copyToLocalFile01() throws Exception {
FileSystem system = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
System.out.println(system.toString());
}
/*第二种方法*/
@Test
public void copyToLocalFile02() throws Exception{
Configuration entries = new Configuration();
entries.set("fs.defaultFS","hdfs://node01:8020");
FileSystem fileSystem = FileSystem.get(new URI("/"), entries);
System.out.println(fileSystem);
}
/*hdfs指定用户,进行数据下载 下载文件到本地*/
@Test
public void getConfig() throws URISyntaxException, IOException, InterruptedException {
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration(), "root");
fileSystem.copyToLocalFile(new Path("/wordcount"),new Path("file:///e:/wordcount"));
fileSystem.close();
}
/*本地文件上传hdfs*/
@Test
public void putData() throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
fileSystem.copyFromLocalFile(new Path("file:///d:/企业微信截图_158225458022.png"),new Path("/hello"));
fileSystem.close();
}
/*递归遍历 hdfs系统中所有文件*/
@Test
public void getFiles() throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
FileStatus[] status = fileSystem.listStatus(new Path("/"));
for (FileStatus fileStatus : status) {
//判断是否是目录
if (fileStatus.isDirectory()){
//获取目录路径,进行递归遍历获取文件
Path path = fileStatus.getPath();
listAllFiles(fileSystem,path);
}else{
System.out.println("文件路劲是:"+fileStatus.getPath().toString());
}
}
}
public void listAllFiles(FileSystem fileSystem,Path path) throws Exception{
FileStatus[] fileStatuses = fileSystem.listStatus(path);
for (FileStatus fileStatus : fileStatuses) {
if(fileStatus.isDirectory()){
listAllFiles(fileSystem,fileStatus.getPath());
}else{
Path path1 = fileStatus.getPath();
System.out.println("文件路径为"+path1);
}
}
}
/*hdfs上创建文件夹*/
@Test
public void mkdirs() throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
boolean mkdirs = fileSystem.mkdirs(new Path("/hello/mkdir"));
if (mkdirs == true){
System.out.println("文件创建成功");
}else{
System.out.println("文件创建失败");
}
}
/*hdfs上删除文件*/
@Test
public void deletes() throws Exception{
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration());
boolean delete = fileSystem.delete(new Path("/hello"));
if(delete == true){
System.out.println("删除成功");
}else{
System.out.println("删除失败");
}
}
/*多个本地文件上传,合并成为一个大的文件*/
@Test
public void mergeFile() throws Exception{
//获取分布式文件系统
FileSystem fileSystem = FileSystem.get(new URI("hdfs://node01:8020"), new Configuration(), "administrator");
FSDataOutputStream outputStream = fileSystem.create(new Path("/hello/hellos"));
//获取本地文件夹
LocalFileSystem local = FileSystem.getLocal(new Configuration());
FileStatus[] listStatus = local.listStatus(new Path("file:///e:\\wordcount"));
for (FileStatus status : listStatus) {
//获取本地文件路径
FSDataInputStream inputStream = local.open(status.getPath());
//本地复制粘贴 hdfs
IOUtils.copy(inputStream,outputStream);
//关闭输出流
IOUtils.closeQuietly(inputStream);
}
//关闭输入流
IOUtils.closeQuietly(outputStream);
local.close();
fileSystem.close();
}
}