1. HDFS 的 Shell 操作 (开发重点)
1.1 基本语法
$ bin/hadoop fs 具体命令
或者
$ bin/hdfs dfs 具体命令
dfs 是 fs 的实现类。
1.2 命令大全
[dwjf321@hadoop102 hadoop-2.7.2]$ bin/hadoop fs
[-appendToFile <localsrc> ... <dst>]
[-cat [-ignoreCrc] <src> ...]
[-checksum <src> ...]
[-chgrp [-R] GROUP PATH...]
[-chmod [-R] <MODE[,MODE]... | OCTALMODE> PATH...]
[-chown [-R] [OWNER][:[GROUP]] PATH...]
[-copyFromLocal [-f] [-p] <localsrc> ... <dst>]
[-copyToLocal [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-count [-q] <path> ...]
[-cp [-f] [-p] <src> ... <dst>]
[-createSnapshot <snapshotDir> [<snapshotName>]]
[-deleteSnapshot <snapshotDir> <snapshotName>]
[-df [-h] [<path> ...]]
[-du [-s] [-h] <path> ...]
[-expunge]
[-get [-p] [-ignoreCrc] [-crc] <src> ... <localdst>]
[-getfacl [-R] <path>]
[-getmerge [-nl] <src> <localdst>]
[-help [cmd ...]]
[-ls [-d] [-h] [-R] [<path> ...]]
[-mkdir [-p] <path> ...]
[-moveFromLocal <localsrc> ... <dst>]
[-moveToLocal <src> <localdst>]
[-mv <src> ... <dst>]
[-put [-f] [-p] <localsrc> ... <dst>]
[-renameSnapshot <snapshotDir> <oldName> <newName>]
[-rm [-f] [-r|-R] [-skipTrash] <src> ...]
[-rmdir [--ignore-fail-on-non-empty] <dir> ...]
[-setfacl [-R] [{-b|-k} {-m|-x <acl_spec>} <path>]|[--set <acl_spec> <path>]]
[-setrep [-R] [-w] <rep> <path> ...]
[-stat [format] <path> ...]
[-tail [-f] <file>]
[-test -[defsz] <path>]
[-text [-ignoreCrc] <src> ...]
[-touchz <path> ...]
[-usage [cmd ...]]
1.3 常用命令实操
-
-help:输出这个命令参数
$ hadoop fs -help rm
-
-ls:显示目录信息
$ hadoop fs -ls /
-
-mkdir:在 HDFS 上创建目录
$ hadoop fs -mkdir -p /dwjf/test
-
-moveFromLocal:从本地剑气粘贴到 HDFS
$ hadoop fs -moveFromLacal ./kongming.txt /dwjf/test
-
-appendToFile:追加一个文件到已经存在的文件末尾
$ hadoop fs -appendToFile liubei.txt /sanguo/shuguo/kongming.txt
-
-cat:显示文件内容
$ hadoop fs -cat /sanguo/shuguo/kongming.txt
-
-chgrp、-chmod、-chown:Linux 文件系统中的用法,修改文件所属权限
$ hadoop fs -chmod 666 /sanguo/shuguo/kongming.txt $ hadoop fs -chown dwjf:dwjf /sanguo/shuguo/kongming.txt
-
-copyFromLocal:从本地文件系统拷贝文件到 HDFS 路径去
$ hadoop fs -copyFromLocal README.txt /
-
-copyToLocal:从 HDFS 拷贝到本地
$ hadoop fs -copyToLocal /sanguo/shuguo/kongming.txt ./
-
-cp:从 HDFS 的一个路径拷贝到 HDFS 的另一个路径
$ hadoop fs -cp /sanguo/shuguo/kongming.txt /zhuge.txt
-
-mv:在 HDFS 目录中移动文件
hadoop fs -mv /zhuge.txt /sanguo/shuguo/
-
-get:等同于 copyToLocal,就是从 HDFS 下载文件到本地
$ hadoop fs -get /sanguo/shuguo/kongming.txt ./
-
-getmerge:合并下载多个文件,比如 HDFS 的目录 /user/dwjf/test 下有多个文件:test1.txt,test2.txt,test3.txt,…
$ hadoop fs -getmerge /user/atguigu/test/* ./zaiyiqi.txt
-
-put:等同于 copyFromLocal
$ hadoop fs -put ./zaiyiqi.txt /user/atguigu/test/
-
-tail:显示一个文件的末尾
$ hadoop fs -tail /sanguo/shuguo/kongming.txt
-
-rm:删除文件或文件夹
$ hadoop fs -rm /user/dwjf/test/jinlian2.txt
-
-rmdir:删除空目录
$ hadoop fs -mkdir /test $ hadoop fs -rmdir /test
-
-du:统计文件夹的大小信息
$ hadoop fs -du -s -h /user/atguigu/test 2.7 K /user/atguigu/test $ hadoop fs -du -h /user/atguigu/test 1.3 K /user/atguigu/test/README.txt 15 /user/atguigu/test/jinlian.txt 1.4 K /user/atguigu/test/zaiyiqi.txt
-
-setrep:设置 HDFS 中文件的副本数量
$ hadoop fs -setrep 10 /kongming.txt
这里设置的副本只是记录在 NameNode 的元数据中,是否真的会有这么多副本,还得看 DataNode 的数量。因为目前只有 3 台设备,最多也就 3 个副本,只有节点数增加到 10 台时,副本数才能达到10。
2. HDSF Java客户端操作(开发重点)
-
添加pom依赖:
<dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.2</version> </dependency>
-
将 hdfs-site.xml 拷贝到项目的根目录下
<property> <name>dfs.replication</name> <value>3</value> </property> <!-- 指定Hadoop辅助名称节点主机配置 --> <property> <name>dfs.namenode.secondary.http-address</name> <value>hadoop104:50090</value> </property>
-
参数优先级
参数优先级排序:(1)客户端代码中设置的值 >(2)ClassPath下的用户自定义配置文件 >(3)然后是服务器的默认配置
2.1 Java 客户端操作 HDFS
package com.hadoop.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.mapred.JobConf;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
/**
* hdfs操作
*/
public class HDFSDao {
private static final Logger logger = LoggerFactory.getLogger(HDFSDao.class);
public static final String HDFS = "hdfs://hadoop102:9000/";
private String hdfsPath;
private Configuration conf;
public HDFSDao() {
conf = config();
this.hdfsPath = HDFS;
}
public HDFSDao(Configuration conf){
this(HDFS,conf);
}
public HDFSDao(String hdfs, Configuration conf){
this.hdfsPath = hdfs;
this.conf = conf;
}
public JobConf config(){
JobConf conf = new JobConf(HDFSDao.class);
conf.setJobName(HDFSDao.class.getSimpleName());
conf.addResource("classpath:/hadoop/core-site.xml");
conf.addResource("classpath:/hadoop/hdfs-site.xml");
conf.addResource("classpath:/hadoop/mapred-site.xml");
return conf;
}
/**
* 创建目录
* @param folder
* @throws IOException
*/
public void mkdirs(String folder) throws IOException {
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(hdfsPath), conf);
if (!fs.exists(path)){
fs.mkdirs(path);
logger.info(folder+" create success");
}
fs.close();
}
/**
* 删除目录
* @param folder
* @throws IOException
*/
public void rmr(String folder) throws IOException{
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(hdfsPath),conf);
fs.deleteOnExit(path);
logger.info(folder+" delete success");
fs.close();
}
/**
* 查看目录
* @param folder
* @throws IOException
*/
public void ls(String folder) throws IOException{
Path path = new Path(folder);
FileSystem fs = FileSystem.get(URI.create(hdfsPath),conf);
FileStatus[] list = fs.listStatus(path);
logger.info("ls:"+folder);
logger.info("=======================================");
for (FileStatus status : list) {
logger.info("name: %s, folder: %s, size: %d\n", status.getPath(), status.isDir(), status.getLen());
}
logger.info("=======================================");
fs.close();
}
/**
* 写入文件
* @param file
* @param content
* @throws IOException
*/
public void createFile(String file, String content) throws IOException{
FileSystem fs = null;
byte[] bytes = content.getBytes();
FSDataOutputStream stream = null;
try {
fs = FileSystem.get(URI.create(hdfsPath),conf);
stream = fs.create(new Path(file));
stream.write(bytes);
stream.flush();
// 保证文件写入磁盘
// 使用hflush或hsync会导致吞吐量下降,因此设计应用时,需要在吞吐量以及数据的健壮性之间做权衡。
// 另外,文件写入过程中,当前正在写入的Block对其他Reader不可见。
stream.hsync();
}finally {
if (fs != null){
fs.close();
}
if (stream != null){
stream.close();
}
}
}
/**
* 上传文件
* @param local
* @param remote
* @throws IOException
*/
public void upload(String local, String remote) throws IOException{
FileSystem fs = FileSystem.get(URI.create(hdfsPath),conf);
fs.copyFromLocalFile(new Path(local), new Path(remote));
logger.info("upload "+local+" success");
fs.close();
}
/**
* 下载文件
* @param remote
* @param local
* @throws IOException
*/
public void download(String remote, String local) throws IOException{
FileSystem fs = FileSystem.get(URI.create(hdfsPath),conf);
fs.copyToLocalFile(new Path(remote),new Path(local));
logger.info("download "+remote+" success");
fs.close();
}
/**
* 查看文件内容
* @param remoteFile
* @return
* @throws IOException
*/
public String cat(String remoteFile) throws IOException{
Path path = new Path(remoteFile);
FileSystem fs= null;
FSDataInputStream stream = null;
try {
fs = FileSystem.get(URI.create(hdfsPath),conf);
stream = fs.open(path);
OutputStream out = new ByteArrayOutputStream();
IOUtils.copyBytes(stream,out,4096,false);
String s = out.toString();
return s;
}finally {
if (stream != null){
stream.close();
}
if (fs != null){
fs.close();
}
}
}
}