步入正题
本地安装的hadoop版本为3.1.3
pom.xml
<properties>
<java.version>1.8</java.version>
<spark.version>3.0.0</spark.version>
<scala.version>2.12.10</scala.version>
<scala.binary.version>2.12</scala.binary.version>
<hive.version>2.3.7</hive.version>
<hadoop.version>3.1.1</hadoop.version>
<kafka.version>2.4.1</kafka.version>
<zookeeper.version>3.4.14</zookeeper.version>
<guava.version>14.0.1</guava.version>
</properties>
<!--spark框架开始-->
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-core_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-sql_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-hive_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!--
Add joda time to ensure that anything downstream which doesn't pull in spark-hive
gets the correct joda time artifact, so doesn't have auth failures on later Java 8 JVMs
-->
<dependency>
<groupId>joda-time</groupId>
<artifactId>joda-time</artifactId>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.spark</groupId>
<artifactId>spark-streaming_${scala.binary.version}</artifactId>
<version>${spark.version}</version>
</dependency>
<!--spark框架结束-->
<!--hadoop开始-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>${hadoop.version}</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
</dependency>
<!--hadoop结束-->
HDFSController
package com.example.controller.hdfs;
import com.example.utils.hdfs.HDFSUtils;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.PostMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;
/**
* @ClassName HDFSController
* @Date 2020/8/26 11:41
*/
@RestController
@RequestMapping("/spark/hdfs")
public class HDFSController {
@GetMapping("/read")
public String readFile(String fileName) throws Exception {
return HDFSUtils.readFile(fileName);
}
@PostMapping("/upload")
public void upload(MultipartFile file) throws Exception {
HDFSUtils.createFile("ypp",file);
}
}
工具类HDFSUtils
package com.example.utils.hdfs;
import com.alibaba.fastjson.JSONObject;
import com.example.utils.properties.HDFSPropertiesUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
import org.springframework.web.multipart.MultipartFile;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* @ClassName HDFSUtils
* @Date 2020/8/26 15:25
*/
public class HDFSUtils {
private static String hdfsPath;
private static String hdfsName;
private static final int bufferSize = 1024 * 1024 * 64;
static {
//设置成自己的
hdfsPath= HDFSPropertiesUtils.getPath();
hdfsName=HDFSPropertiesUtils.getUserName();
}
/**
* 获取HDFS配置信息
* @return
*/
private static Configuration getConfiguration() {
Configuration configuration = new Configuration();
configuration.set("fs.defaultFS", hdfsPath);
configuration.set("HADOOP_USER_NAME",hdfsName);
return configuration;
}
/**
* 获取HDFS文件系统对象
* @return
* @throws Exception
*/
public static FileSystem getFileSystem() throws Exception {
/*
//通过这种方式设置java客户端访问hdfs的身份:会以 ypp 的身份访问 hdfs文件系统目录下的路径:/user/ypp 的目录
System.setProperty("HADOOP_USER_NAME","ypp");
Configuration configuration = new Configuration();
configuration.set("fs.defauleFS","hdfs://ypp:9090");
FileSystem fileSystem = FileSystem.get(configuration);
*/
/*
客户端去操作hdfs时是有一个用户身份的,默认情况下hdfs客户端api会从jvm中获取一个参数作为自己的用户身份
也可以在构造客户端fs对象时,通过参数传递进去
FileSystem fileSystem = FileSystem.get(new URI(hdfsPath), getConfiguration(), hdfsName);
*/
FileSystem fileSystem = FileSystem.get(new URI(hdfsPath), getConfiguration());
return fileSystem;
}
/**
* 在HDFS创建文件夹
* @param path
* @return
* @throws Exception
*/
public static boolean mkdir(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return false;
}
if (existFile(path)) {
return true;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
boolean isOk = fs.mkdirs(srcPath);
fs.close();
return isOk;
}
/**
* 判断HDFS文件是否存在
* @param path
* @return
* @throws Exception
*/
public static boolean existFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return false;
}
FileSystem fs = getFileSystem();
Path srcPath = new Path(path);
boolean isExists = fs.exists(srcPath);
return isExists;
}
/**
* 读取HDFS目录信息
* @param path
* @return
* @throws Exception
*/
public static List<Map<String, Object>> readPathInfo(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path newPath = new Path(path);
FileStatus[] statusList = fs.listStatus(newPath);
List<Map<String, Object>> list = new ArrayList<>();
if (null != statusList && statusList.length > 0) {
for (FileStatus fileStatus : statusList) {
Map<String, Object> map = new HashMap<>();
map.put("filePath", fileStatus.getPath());
map.put("fileStatus", fileStatus.toString());
list.add(map);
}
return list;
} else {
return null;
}
}
/**
* HDFS创建文件
* @param path
* @param file
* @throws Exception
*/
public static void createFile(String path, MultipartFile file) throws Exception {
if (StringUtils.isEmpty(path) || null == file.getBytes()) {
return;
}
String fileName = file.getOriginalFilename();
FileSystem fs = getFileSystem();
// 上传时默认当前目录,后面自动拼接文件的目录
Path newPath = new Path(path + "/" + fileName);
// 打开一个输出流
FSDataOutputStream outputStream = fs.create(newPath);
outputStream.write(file.getBytes());
outputStream.close();
fs.close();
}
/**
* 读取HDFS文件内容
* @param path
* @return
* @throws Exception
*/
public static String readFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
FSDataInputStream inputStream = null;
try {
inputStream = fs.open(srcPath);
// 防止中文乱码
BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream));
String lineTxt = "";
StringBuffer sb = new StringBuffer();
while ((lineTxt = reader.readLine()) != null) {
sb.append(lineTxt);
}
return sb.toString();
} finally {
inputStream.close();
fs.close();
}
}
/**
* 读取HDFS文件列表
* @param path
* @return
* @throws Exception
*/
public static List<Map<String, String>> listFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
// 递归找到所有文件
RemoteIterator<LocatedFileStatus> filesList = fs.listFiles(srcPath, true);
List<Map<String, String>> returnList = new ArrayList<>();
while (filesList.hasNext()) {
LocatedFileStatus next = filesList.next();
String fileName = next.getPath().getName();
Path filePath = next.getPath();
Map<String, String> map = new HashMap<>();
map.put("fileName", fileName);
map.put("filePath", filePath.toString());
returnList.add(map);
}
fs.close();
return returnList;
}
/**
* HDFS重命名文件
* @param oldName
* @param newName
* @return
* @throws Exception
*/
public static boolean renameFile(String oldName, String newName) throws Exception {
if (StringUtils.isEmpty(oldName) || StringUtils.isEmpty(newName)) {
return false;
}
FileSystem fs = getFileSystem();
// 原文件目标路径
Path oldPath = new Path(oldName);
// 重命名目标路径
Path newPath = new Path(newName);
boolean isOk = fs.rename(oldPath, newPath);
fs.close();
return isOk;
}
/**
* 删除HDFS文件
* @param path
* @return
* @throws Exception
*/
public static boolean deleteFile(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return false;
}
if (!existFile(path)) {
return false;
}
FileSystem fs = getFileSystem();
Path srcPath = new Path(path);
boolean isOk = fs.deleteOnExit(srcPath);
fs.close();
return isOk;
}
/**
* 上传HDFS文件
* @param path
* @param uploadPath
* @throws Exception
*/
public static void uploadFile(String path, String uploadPath) throws Exception {
if (StringUtils.isEmpty(path) || StringUtils.isEmpty(uploadPath)) {
return;
}
FileSystem fs = getFileSystem();
// 上传路径
Path clientPath = new Path(path);
// 目标路径
Path serverPath = new Path(uploadPath);
// 调用文件系统的文件复制方法,第一个参数是否删除原文件true为删除,默认为false
fs.copyFromLocalFile(false, clientPath, serverPath);
fs.close();
}
/**
* 下载HDFS文件
* @param path
* @param downloadPath
* @throws Exception
*/
public static void downloadFile(String path, String downloadPath) throws Exception {
if (StringUtils.isEmpty(path) || StringUtils.isEmpty(downloadPath)) {
return;
}
FileSystem fs = getFileSystem();
// 上传路径
Path clientPath = new Path(path);
// 目标路径
Path serverPath = new Path(downloadPath);
// 调用文件系统的文件复制方法,第一个参数是否删除原文件true为删除,默认为false
fs.copyToLocalFile(false, clientPath, serverPath);
fs.close();
}
/**
* HDFS文件复制
* @param sourcePath
* @param targetPath
* @throws Exception
*/
public static void copyFile(String sourcePath, String targetPath) throws Exception {
if (StringUtils.isEmpty(sourcePath) || StringUtils.isEmpty(targetPath)) {
return;
}
FileSystem fs = getFileSystem();
// 原始文件路径
Path oldPath = new Path(sourcePath);
// 目标路径
Path newPath = new Path(targetPath);
FSDataInputStream inputStream = null;
FSDataOutputStream outputStream = null;
try {
inputStream = fs.open(oldPath);
outputStream = fs.create(newPath);
IOUtils.copyBytes(inputStream, outputStream, bufferSize, false);
} finally {
inputStream.close();
outputStream.close();
fs.close();
}
}
/**
* 打开HDFS上的文件并返回byte数组
* @param path
* @return
* @throws Exception
*/
public static byte[] openFileToBytes(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
try {
FSDataInputStream inputStream = fs.open(srcPath);
return IOUtils.readFullyToByteArray(inputStream);
} finally {
fs.close();
}
}
/**
* 打开HDFS上的文件并返回java对象
* @param path
* @return
* @throws Exception
*/
public static <T extends Object> T openFileToObject(String path, Class<T> clazz) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
String jsonStr = readFile(path);
return JSONObject.parseObject(jsonStr, clazz);
}
/**
* 获取某个文件在HDFS的集群位置
* @param path
* @return
* @throws Exception
*/
public static BlockLocation[] getFileBlockLocations(String path) throws Exception {
if (StringUtils.isEmpty(path)) {
return null;
}
if (!existFile(path)) {
return null;
}
FileSystem fs = getFileSystem();
// 目标路径
Path srcPath = new Path(path);
FileStatus fileStatus = fs.getFileStatus(srcPath);
return fs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen());
}
}
注意容易出现权限问题,由于开发阶段我都是用的自己windows安装的,HADOOP_USER_NAME直接用的administrator
文件内容
上传文件
上传文件 hadoop namenode日志
查看HDFS文件
hadoop fs -ls /user/ypp/ypp #查看指定目录下的文件和文件夹。/user/ypp/ypp 是HDFS上的目录,不是本地目录,命令不分操作系统
查看文件内容
hadoop fs -cat /user/ypp/ypp/111.txt #查看文件内容,这里涉及到中文乱码了(后面补充解决),同样是HDFS上的目录,不是本地目录,命令不分操作系统