hadoop

package com.sinosoft.splatform.util;
import java.io.*;
import java.util.Properties;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.io.IOUtils;
public class DateMove {

    static Configuration conf = new Configuration(true);

    static FileSystem fs = null;

    static String fileStoragePath = "/testfile";
    static  String hdfsUrl="";
    static  String savePath="";
    static {

        System.out.println("注意:在此jar的同级目录新建 dc.properties 配置信息如下: \n" +
                "#代表只读取这个路径的hadoop文件,只能是一级目录 如 /zjk或者 /   \n" +
                "fileStoragePath =/   \n" +
                "fs.defaultFS =hdfs://localhost:9000   \n" +
                "dfs.replication=2   \n" +
                "savePath=C:\\\\Users\\\\15733\\\\Desktop\\\\hadoop\\\\   " +
                "");
        String path = DateMove.class.getProtectionDomain().getCodeSource().getLocation().getPath();
        try {
            path = java.net.URLDecoder.decode(path, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        Properties p = new Properties();
        System.out.println("获取到的path:"+path);
        path=path.replaceAll("\\\\","/");
        if (path.indexOf(".jar")>-1){
            path=path.substring(0,path.lastIndexOf("/")) ;
        }
        System.out.println("文件路径 :"+path+"/dc.properties");
        InputStream is = null;
        try {
            is = new FileInputStream(path+"/dc.properties");
            p.load(is);
            fileStoragePath = p.getProperty("fileStoragePath");
            hdfsUrl = p.getProperty("fs.defaultFS");
            String  replication = p.getProperty("dfs.replication");
            savePath=p.getProperty("savePath");
            conf = new Configuration();
            conf.set("fs.defaultFS", hdfsUrl);
            conf.set("dfs.replication", replication);//默认为3
            // 不加这个 会报错:  java.io.IOException: No FileSystem for scheme: hdfs
            conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem");
            // 不加这个 会报错:  java.io.IOException: No FileSystem for scheme: file
            conf.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
            fs = FileSystem.get(conf);
            if(!fs.exists(new Path(fileStoragePath))){
                fs.mkdirs(new Path(fileStoragePath));
            }
        }catch (Exception e){
            e.printStackTrace();
        }
    }

    /**
     *
     * * 判断路径是否存在
     * *
     * * @param conf
     * * @param path
     * * @return
     * * @throws IOException
     *
     */
    public static boolean exits(String path) throws IOException {
        return fs.exists(new Path(path));
    }

    /**
     *
     * * 创建文件
     * *
     * * @param conf
     * * @param filePath
     * * @param contents
     * * @throws IOException
     *
     */
    public static void createFile(String filePath, byte[] contents)
            throws IOException {
        Path path = new Path(filePath);
        FSDataOutputStream outputStream = fs.create(path);
        outputStream.write(contents);
        outputStream.close();
        fs.close();
    }

    /**
     *
     * * 创建文件
     * *
     * * @param conf
     * * @param filePath
     * * @param fileContent
     * * @throws IOException
     *
     */
    public static void createFile(String filePath, String fileContent)
            throws IOException {
        createFile(filePath, fileContent.getBytes());
    }

    /**
     *
     * * @param conf
     * * @param localFilePath
     * * @param remoteFilePath
     * * @throws IOException
     *
     */
    public static void copyFromLocalFile(String localFilePath,
                                         String remoteFilePath) throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path localPath = new Path(localFilePath);
        Path remotePath = new Path(remoteFilePath);
        fs.copyFromLocalFile(false, true, localPath, remotePath);
        fs.close();
    }

    /**
     *
     * * 删除目录或文件
     * *
     * * @param conf
     * * @param remoteFilePath
     * * @param recursive
     * * @return
     * * @throws IOException
     *
     */
    public static boolean deleteFile(String remoteFilePath, boolean recursive)
            throws IOException {
        FileSystem fs = FileSystem.get(conf);
        boolean result = fs.delete(new Path(remoteFilePath), recursive);
        fs.close();
        return result;
    }

    /**
     *
     * * 删除目录或文件(如果有子目录,则级联删除)
     * *
     * * @param conf
     * * @param remoteFilePath
     * * @return
     * * @throws IOException
     *
     */
    public static boolean deleteFile(String remoteFilePath) throws IOException {
        return deleteFile(remoteFilePath, true);
    }

    /**
     *
     * * 文件重命名
     * *
     * * @param conf
     * * @param oldFileName
     * * @param newFileName
     * * @return
     * * @throws IOException
     *
     */
    public static boolean renameFile(String oldFileName, String newFileName)
            throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path oldPath = new Path(oldFileName);
        Path newPath = new Path(newFileName);
        boolean result = fs.rename(oldPath, newPath);
        fs.close();
        return result;
    }

    /**
     *
     * * 创建目录
     * *
     * * @param conf
     * * @param dirName
     * * @return
     * * @throws IOException
     *
     */
    public static boolean createDirectory(String dirName) throws IOException {
        FileSystem fs = FileSystem.get(conf);
        Path dir = new Path(dirName);
        boolean result = false;
        if (!fs.exists(dir)) {
            result = fs.mkdirs(dir);
        }
        fs.close();
        return result;
    }

    /**
     *
     * * 列出指定路径下的所有文件(不包含目录)
     * *
     * * @param conf
     * * @param basePath
     * * @param recursive
     *
     */
    public static RemoteIterator<LocatedFileStatus> listFiles(String basePath,
                                                              boolean recursive) throws IOException {
        RemoteIterator<LocatedFileStatus> fileStatusRemoteIterator = fs
                .listFiles(new Path(basePath), recursive);

        return fileStatusRemoteIterator;
    }

    /**
     *
     * * 列出指定目录下的文件\子目录信息(非递归)
     * *
     * * @param conf
     * * @param dirPath
     * * @return
     * * @throws IOException
     *
     */
    public static FileStatus[] listStatus(String dirPath) throws IOException {
        FileStatus[] fileStatuses = fs.listStatus(new Path(dirPath));
        return fileStatuses;
    }

    /**
     *
     * * 读取文件内容
     * *
     * * @param conf
     * * @param filePath
     * * @return
     * * @throws IOException
     *
     */
    public static byte[] readFile(String filePath) throws IOException {
        byte[] fileContent = null;
        Path path = new Path(filePath);
        if (fs.exists(path)) {
            InputStream inputStream = null;
            ByteArrayOutputStream outputStream = null;
            try {
                inputStream = fs.open(path);
                outputStream = new ByteArrayOutputStream(
                        inputStream.available());
                IOUtils.copyBytes(inputStream, outputStream, conf);
                fileContent = outputStream.toByteArray();
            } finally {
                IOUtils.closeStream(inputStream);
                IOUtils.closeStream(outputStream);
            }
        }
        return fileContent;
    }

    /**
     *
     * * 下载 hdfs上的文件
     * *
     * * @param conf
     * * @param uri
     * * @param remote
     * * @param local
     * * @throws IOException
     *
     */
    public static void download(String remote, String local) throws IOException {
        Path path = new Path(remote);
        fs.copyToLocalFile(path, new Path(local));
        System.out.println("download: from" + remote + " to " + local);
        fs.close();
    }

    /**
     * hadoop 文件下载到本地
     * @throws IOException
     */
    public void hadoopToLocal() throws IOException {

        long c1=System.currentTimeMillis();
        RemoteIterator<LocatedFileStatus> listIterator = DateMove.listFiles("/", true);
        File fileLocal=new File(savePath);
        if (!fileLocal.exists()){
            fileLocal.mkdirs();
        }
        if(StringUtils.isNotBlank(fileStoragePath) && fileStoragePath.length()>2 ){
            System.out.println("要获取的hadoop根路径为:"+fileStoragePath);
        }
        while (listIterator.hasNext()) {
            String localPath=savePath;
            LocatedFileStatus te = listIterator.next();
            // 文件名 如509F44BF-1E43-6281-3F4F-A957FC42312E.pdf
            String hdfsfileName=te.getPath().getName();
            // 得到上面那个文件的文件夹名称 如 20201210
            String hdfsFilePath=te.getPath().getParent().getName();
            // 这个是文件的全路径  hdfs://localhost:9000/zlk/20201210/CE88468D-8B76-118E-6C69-64066A6E3185.doc
            String hdfsPath=te.getPath().toString();
            if((StringUtils.isNotBlank(fileStoragePath) && fileStoragePath.length()>2 && hdfsPath.indexOf(fileStoragePath)>-1) || (fileStoragePath.equals("/"))){
                String  ss=(hdfsPath.split(hdfsUrl)[1]).split(hdfsfileName)[0];
                String[] paths=ss.split("/");
                for (int i = 0; i < paths.length; i++) {
                    File file=new File(localPath+""+paths[i]);
                    if (!file.exists()){
                        file.mkdirs();
                    }
                    localPath=localPath.concat(paths[i]).concat("\\");
                }
                File _file=new File(localPath+"\\"+hdfsfileName);
                if (!_file.exists()){
                    Path path = new Path(hdfsPath);
                    System.out.println("download: from" + hdfsPath + " to " + localPath);
                    fs.copyToLocalFile(path, new Path(localPath));
                }else{
                    System.out.println("~~have exist:"+hdfsPath);
                }
            }
        }
        long c2=System.currentTimeMillis();
        System.out.println( "time: "+(c2-c1)/1000);
        fs.close();
    }


//  1:进入到hadoop的bin下面。
//  2:hadoop namenode -format 打开NameNode(HDFS服务器)
//  3:hadoop fs -ls 查看文件
//如果出现ls: `.': No such file or directory则 hadoop fs -ls /
//  4:hadoop fs -mkdir /user/input 创建目录
//  5:hadoop fs -put /home/file.txt /user/input插入文件
//  6:hadoop fs -cat /user/output/outfile查看文件内容
//  7:hadoop fs -get /user/output/ /home/hadoop_tp/ 取得文件
//  8:stop-dfs.sh关闭

    public static void main(String[] args) throws Exception {
        //  System.out.println(DateMove.exits("/testfile"));
        //  DateMove.createFile("eee.txt","23132123123123213".getBytes());
        // System.out.println(fs.getHomeDirectory());
      FileStatus[] dFileStatus=DateMove.listStatus("/");
      System.out.println(dFileStatus.length);
    }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

大鑫不列迭

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值