package com.sinosoft.splatform.util; import java.io.*; import java.util.Properties; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.io.IOUtils; public class DateMove { static Configuration conf = new Configuration(true); static FileSystem fs = null; static String fileStoragePath = "/testfile"; static String hdfsUrl=""; static String savePath=""; static { System.out.println("注意:在此jar的同级目录新建 dc.properties 配置信息如下: \n" + "#代表只读取这个路径的hadoop文件,只能是一级目录 如 /zjk或者 / \n" + "fileStoragePath =/ \n" + "fs.defaultFS =hdfs://localhost:9000 \n" + "dfs.replication=2 \n" + "savePath=C:\\\\Users\\\\15733\\\\Desktop\\\\hadoop\\\\ " + ""); String path = DateMove.class.getProtectionDomain().getCodeSource().getLocation().getPath(); try { path = java.net.URLDecoder.decode(path, "UTF-8"); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } Properties p = new Properties(); System.out.println("获取到的path:"+path); path=path.replaceAll("\\\\","/"); if (path.indexOf(".jar")>-1){ path=path.substring(0,path.lastIndexOf("/")) ; } System.out.println("文件路径 :"+path+"/dc.properties"); InputStream is = null; try { is = new FileInputStream(path+"/dc.properties"); p.load(is); fileStoragePath = p.getProperty("fileStoragePath"); hdfsUrl = p.getProperty("fs.defaultFS"); String replication = p.getProperty("dfs.replication"); savePath=p.getProperty("savePath"); conf = new Configuration(); conf.set("fs.defaultFS", hdfsUrl); conf.set("dfs.replication", replication);//默认为3 // 不加这个 会报错: java.io.IOException: No FileSystem for scheme: hdfs conf.set("fs.hdfs.impl", "org.apache.hadoop.hdfs.DistributedFileSystem"); // 不加这个 会报错: java.io.IOException: No FileSystem for scheme: file conf.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); fs = FileSystem.get(conf); if(!fs.exists(new Path(fileStoragePath))){ fs.mkdirs(new Path(fileStoragePath)); } }catch (Exception e){ e.printStackTrace(); } } /** * * * 判断路径是否存在 * * * * @param conf * * @param path * * @return * * @throws IOException * */ public static boolean exits(String path) throws IOException { return fs.exists(new Path(path)); } /** * * * 创建文件 * * * * @param conf * * @param filePath * * @param contents * * @throws IOException * */ public static void createFile(String filePath, byte[] contents) throws IOException { Path path = new Path(filePath); FSDataOutputStream outputStream = fs.create(path); outputStream.write(contents); outputStream.close(); fs.close(); } /** * * * 创建文件 * * * * @param conf * * @param filePath * * @param fileContent * * @throws IOException * */ public static void createFile(String filePath, String fileContent) throws IOException { createFile(filePath, fileContent.getBytes()); } /** * * * @param conf * * @param localFilePath * * @param remoteFilePath * * @throws IOException * */ public static void copyFromLocalFile(String localFilePath, String remoteFilePath) throws IOException { FileSystem fs = FileSystem.get(conf); Path localPath = new Path(localFilePath); Path remotePath = new Path(remoteFilePath); fs.copyFromLocalFile(false, true, localPath, remotePath); fs.close(); } /** * * * 删除目录或文件 * * * * @param conf * * @param remoteFilePath * * @param recursive * * @return * * @throws IOException * */ public static boolean deleteFile(String remoteFilePath, boolean recursive) throws IOException { FileSystem fs = FileSystem.get(conf); boolean result = fs.delete(new Path(remoteFilePath), recursive); fs.close(); return result; } /** * * * 删除目录或文件(如果有子目录,则级联删除) * * * * @param conf * * @param remoteFilePath * * @return * * @throws IOException * */ public static boolean deleteFile(String remoteFilePath) throws IOException { return deleteFile(remoteFilePath, true); } /** * * * 文件重命名 * * * * @param conf * * @param oldFileName * * @param newFileName * * @return * * @throws IOException * */ public static boolean renameFile(String oldFileName, String newFileName) throws IOException { FileSystem fs = FileSystem.get(conf); Path oldPath = new Path(oldFileName); Path newPath = new Path(newFileName); boolean result = fs.rename(oldPath, newPath); fs.close(); return result; } /** * * * 创建目录 * * * * @param conf * * @param dirName * * @return * * @throws IOException * */ public static boolean createDirectory(String dirName) throws IOException { FileSystem fs = FileSystem.get(conf); Path dir = new Path(dirName); boolean result = false; if (!fs.exists(dir)) { result = fs.mkdirs(dir); } fs.close(); return result; } /** * * * 列出指定路径下的所有文件(不包含目录) * * * * @param conf * * @param basePath * * @param recursive * */ public static RemoteIterator<LocatedFileStatus> listFiles(String basePath, boolean recursive) throws IOException { RemoteIterator<LocatedFileStatus> fileStatusRemoteIterator = fs .listFiles(new Path(basePath), recursive); return fileStatusRemoteIterator; } /** * * * 列出指定目录下的文件\子目录信息(非递归) * * * * @param conf * * @param dirPath * * @return * * @throws IOException * */ public static FileStatus[] listStatus(String dirPath) throws IOException { FileStatus[] fileStatuses = fs.listStatus(new Path(dirPath)); return fileStatuses; } /** * * * 读取文件内容 * * * * @param conf * * @param filePath * * @return * * @throws IOException * */ public static byte[] readFile(String filePath) throws IOException { byte[] fileContent = null; Path path = new Path(filePath); if (fs.exists(path)) { InputStream inputStream = null; ByteArrayOutputStream outputStream = null; try { inputStream = fs.open(path); outputStream = new ByteArrayOutputStream( inputStream.available()); IOUtils.copyBytes(inputStream, outputStream, conf); fileContent = outputStream.toByteArray(); } finally { IOUtils.closeStream(inputStream); IOUtils.closeStream(outputStream); } } return fileContent; } /** * * * 下载 hdfs上的文件 * * * * @param conf * * @param uri * * @param remote * * @param local * * @throws IOException * */ public static void download(String remote, String local) throws IOException { Path path = new Path(remote); fs.copyToLocalFile(path, new Path(local)); System.out.println("download: from" + remote + " to " + local); fs.close(); } /** * hadoop 文件下载到本地 * @throws IOException */ public void hadoopToLocal() throws IOException { long c1=System.currentTimeMillis(); RemoteIterator<LocatedFileStatus> listIterator = DateMove.listFiles("/", true); File fileLocal=new File(savePath); if (!fileLocal.exists()){ fileLocal.mkdirs(); } if(StringUtils.isNotBlank(fileStoragePath) && fileStoragePath.length()>2 ){ System.out.println("要获取的hadoop根路径为:"+fileStoragePath); } while (listIterator.hasNext()) { String localPath=savePath; LocatedFileStatus te = listIterator.next(); // 文件名 如509F44BF-1E43-6281-3F4F-A957FC42312E.pdf String hdfsfileName=te.getPath().getName(); // 得到上面那个文件的文件夹名称 如 20201210 String hdfsFilePath=te.getPath().getParent().getName(); // 这个是文件的全路径 hdfs://localhost:9000/zlk/20201210/CE88468D-8B76-118E-6C69-64066A6E3185.doc String hdfsPath=te.getPath().toString(); if((StringUtils.isNotBlank(fileStoragePath) && fileStoragePath.length()>2 && hdfsPath.indexOf(fileStoragePath)>-1) || (fileStoragePath.equals("/"))){ String ss=(hdfsPath.split(hdfsUrl)[1]).split(hdfsfileName)[0]; String[] paths=ss.split("/"); for (int i = 0; i < paths.length; i++) { File file=new File(localPath+""+paths[i]); if (!file.exists()){ file.mkdirs(); } localPath=localPath.concat(paths[i]).concat("\\"); } File _file=new File(localPath+"\\"+hdfsfileName); if (!_file.exists()){ Path path = new Path(hdfsPath); System.out.println("download: from" + hdfsPath + " to " + localPath); fs.copyToLocalFile(path, new Path(localPath)); }else{ System.out.println("~~have exist:"+hdfsPath); } } } long c2=System.currentTimeMillis(); System.out.println( "time: "+(c2-c1)/1000); fs.close(); } // 1:进入到hadoop的bin下面。 // 2:hadoop namenode -format 打开NameNode(HDFS服务器) // 3:hadoop fs -ls 查看文件 //如果出现ls: `.': No such file or directory则 hadoop fs -ls / // 4:hadoop fs -mkdir /user/input 创建目录 // 5:hadoop fs -put /home/file.txt /user/input插入文件 // 6:hadoop fs -cat /user/output/outfile查看文件内容 // 7:hadoop fs -get /user/output/ /home/hadoop_tp/ 取得文件 // 8:stop-dfs.sh关闭 public static void main(String[] args) throws Exception { // System.out.println(DateMove.exits("/testfile")); // DateMove.createFile("eee.txt","23132123123123213".getBytes()); // System.out.println(fs.getHomeDirectory()); FileStatus[] dFileStatus=DateMove.listStatus("/"); System.out.println(dFileStatus.length); } }
hadoop
最新推荐文章于 2024-04-07 11:44:10 发布