1.读取单个文件
[java] view plain copy
- Date date = DateUtil.getSpecifiedDayBefore();
- String yesterday = DateUtil.dateToStr(date, "yyyy-MM-dd");
- String path = "hdfs://ip:9000/output_log/output_log_click" + yesterday;
- Configuration conf = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(path), conf);
- FSDataInputStream hdfsInStream = fs.open(new Path(path));
- InputStreamReader isr = new InputStreamReader(hdfsInStream, "utf-8");
- BufferedReader br = new BufferedReader(isr);
- String line;
- // int k = 0;
- while ((line = br.readLine()) != null) {
- System.out.println(line);
- }
2.读取文件夹
[java] view plain copy
- Date date = DateUtil.getSpecifiedDayBefore();
- String yesterday = DateUtil.dateToStr(date, "yyyy-MM-dd");
- String path = "hdfs://ip:9000/output_log/output_log_click" + yesterday;
- Configuration conf = new Configuration();
- FileSystem fs = FileSystem.get(URI.create(path), conf);
- FileStatus[] status = fs.listStatus(new Path(path));
- for (FileStatus file : status) {
- if (!file.getPath().getName().startsWith("newsMap")) {
- continue;
- }
- FSDataInputStream hdfsInStream = fs.open(file.getPath());
- InputStreamReader isr = new InputStreamReader(hdfsInStream, "utf-8");
- BufferedReader br = new BufferedReader(isr);
- String line;
- // int k = 0;
- while ((line = br.readLine()) != null) {
- System.out.println(line);
- }
- }
package archy.com;
import java.io.InputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class MyHdfs {
// hadoop fs的配置文件
static Configuration conf = new Configuration(true);
static {
// 指定hadoop fs的地址
conf.set("fs.default.name", "hdfs://bigdata01.com:8020");
}
/**
* 将本地文件(filePath)上传到HDFS服务器的指定路径(dst)
*
* @author 冯琪
*/
public static void uploadFileToHDFS(String filePath, String dst)
throws Exception {
// 创建一个文件系统
FileSystem fs = FileSystem.get(conf);
Path srcPath = new Path(filePath);
Path dstPath = new Path(dst);
Long start = System.currentTimeMillis();
fs.copyFromLocalFile(false, srcPath, dstPath);
System.out.println("Time:" + (System.currentTimeMillis() - start));
System.out.println("________准备上传文件" + conf.get("fs.default.name")
+ "____________");
fs.close();
getDirectoryFromHdfs(dst);
}
/**
* 下载文件
*
* @author 冯琪
*/
public static void downLoadFileFromHDFS(String src) throws Exception {
FileSystem fs = FileSystem.get(conf);
Path srcPath = new Path(src);
InputStream in = fs.open(srcPath);
try {
// 将文件COPY到标准输出(即控制台输出)
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
fs.close();
}
}
/**
* 遍历指定目录(direPath)下的所有文件
*
* @author 冯琪
*/
public static void getDirectoryFromHdfs(String direPath) throws Exception {
FileSystem fs = FileSystem.get(URI.create(direPath), conf);
FileStatus[] filelist = fs.listStatus(new Path(direPath));
for (int i = 0; i < filelist.length; i++) {
System.out.println("_________________第" + i + "个文件"
+ "____________________");
FileStatus fileStatus = filelist[i];
System.out.println("Name:" + fileStatus.getPath().getName());
System.out.println("size:" + fileStatus.getLen());
System.out.println("_________________第" + i + "个文件"
+ "____________________");
}
fs.close();
}
/**
* 测试方法
*
* @author 冯琪
*/
public static void main(String[] args) {
try {
// 获取目录下的所有文件,hdfs服务器文件夹路径
getDirectoryFromHdfs("/fengqi/1104/");
// 上传文件,第一个参数为本地要上传的文件路径,第二个参数为HDFS的路径。
// uploadFileToHDFS("D:/456.txt", "/fengqi/1104/");
// 下载文件,第一个参数为HDFS上要下载的文件路径
// downLoadFileFromHDFS("/fengqi/1104/456.txt");
} catch (Exception e) {
// TODO 自动生成的 catch 块
e.printStackTrace();
}
}
}
---------------------