hadoop fs -ls / 查看根目录下的文件列表
hadoop fs -mkdir -p /aaa/bbb 递归创建文件目录
hadoop fs -cp /xxx /xxxx文件拷贝
hadoop fs -cp ir /xxx/ /xxxxx递归拷贝
hadoop fs -put /xxx / 上传本地文件 至文件系统的根目录
hadoop fs -get /xxxx /xxxx 下载文件至本地系统的制定目录
hadoop fs -rm /xxx.txt 删除文件
hadoop fs -rm -r /xxx 递归删除该目录下的文件
hadoop fs -mv /xxx /xxx 移动文件
hadoop fs -cat /xxx 查看文件
hadoop fs -tail /xxxx.log 查看文件内容
1 hadoop练习(开发环境jdk1.8 eclipse)
(1)采用TimeTask定时任务模拟日志生成
a log4j配置文件
log4j.rootLogger=ALL,logRollingFile
log4j.appender.logRollingFile=org.apache.log4j.RollingFileAppender
log4j.appender.logRollingFile.layout=org.apache.log4j.PatternLayout
log4j.appender.logRollingFile.layout.ConversionPattern=%m%n
log4j.appender.logRollingFile.Threshold=INFO
log4j.appender.logRollingFile.ImmediateFlush=TRUE
log4j.appender.logRollingFile.Append=TRUE
log4j.appender.logRollingFile.File=d://logs//access.log
log4j.appender.logRollingFile.MaxFileSize=5MB
log4j.appender.logRollingFile.MaxBackupIndex=50
log4j.appender.logRollingFile.Encoding=UTF-8
b 模拟日志产生定时任务
package com.gc.hadoop.logProduct;
import java.util.Random;
import java.util.TimerTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 1 模拟生成日志文件
* a 定义字符数组
* b 生成随机数
* @author guochao
*
*/
public class LogProduct extends TimerTask {
Logger log = LoggerFactory.getLogger(LogProduct.class);
String[] str= {"ab","cd","ef","gg","hi","word","count","java",
"html","css","jsp","jquery","js","servlet","easyUI",
"springmvc","spring","springboot","springcloud","dubbo","redis","solr","rabbitmq","hadoop"
,"mysql","thread","list","set","map","hashmap","hashTable","hello"};
@Override
public void run() {
Random r = new Random(System.currentTimeMillis());
StringBuilder builder=null;
for (int i = 0; i < 10000; i++) {
builder =new StringBuilder();
int random =Math.abs(r.nextInt());
builder.append(str[random%str.length]+" "+str[i%str.length]);
log.info(builder.toString());
}
}
}
(2)编写logCollect日志采集定时任务
package com.gc.hadoop.logProduct;
import java.io.File;
import java.io.FilenameFilter;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.TimerTask;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
/**
* 日志采集定时任务
* a 读取日志存放目录下已生成完的日志文件
* b 将文件拷贝至待上传目录
* c 将上传完成的文件拷贝至backUp备份目录
* @author guochao
*
*/
public class DataCollection extends TimerTask {
@Override
public void run() {
//日志很目录
File logDir = new File("D:\\logs");
if(!logDir.exists()) {
System.out.println("目录不存在!");
return;
}
File[] logFiles = logDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if(!name.endsWith(".log")) {//生成成功的日志以.数字结尾
return true;
}
return false;
}
});
// 拷贝生成完的日志到上传目录 d://upload//
SimpleDateFormat simple = new SimpleDateFormat("yyyyMMddhhmmss");
String format = simple.format(new Date());
String uploadDir ="d://upload//";
File upLoad = new File(uploadDir);
if(!upLoad.exists()) {
upLoad.mkdir();//创建文件目录
}
for (File file : logFiles) {
file.renameTo(new File(uploadDir+file.getName().substring(0, file.getName().length()-2)+format+".log"));
}
// 开始读取待上传目录中的文件
File[] uploadLogFiles = upLoad.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
if(name.startsWith("access")) {
return true;
}
return false;
}
});
//构建备份目录
simple = new SimpleDateFormat("yyyy-MM-dd");
String backUpDir ="d://backup//"+simple.format(new Date());
File backup = new File(backUpDir);
if(!backup.exists()) {
backup.mkdirs();
}
if(uploadLogFiles!=null&&uploadLogFiles.length>0) {
Configuration conf = new Configuration();
try {
FileSystem fs=FileSystem.get(new URI("hdfs://centos01:9000/"), conf, "root");
Path logPath= new Path("/logs/"+simple.format(new Date()));
if(!fs.exists(logPath)) {
fs.mkdirs(logPath);
}
for (File upload : uploadLogFiles) {
//上传日志文件至hdfs
fs.copyFromLocalFile(new Path(upload.getPath()), new Path(logPath.toString()+"/"+upload.getName()+UUID.randomUUID()));
upload.renameTo(new File(backUpDir+"\\"+upload.getName()));
}
fs.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
3 编写启动类
package com.gc.hadoop.dataCollection;
import java.util.Timer;
import java.util.TimerTask;
import com.gc.hadoop.logProduct.CleanBackUpFile;
import com.gc.hadoop.logProduct.DataCollection;
import com.gc.hadoop.logProduct.LogProduct;
/*
* a 模拟生成日志文件
* b采集文件
* c 清理文件
* @author guochao
*
*/
public class logDataCollection {
public static void main(String[] args) {
Timer timer = new Timer();
LogProduct task = new LogProduct();
timer.schedule(task,0,1*1000);
//定时任务开启采集文件
timer.schedule(new DataCollection(), 0,2*60*1000);
// //定时任务开启清理备份文件 1分钟后执行 每5分钟执行一次
timer.schedule(new CleanBackUpFile(), 1, 5*60*1000);
}
}
4 编写简单api查询文件
package com.gc.hadoop.api;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.junit.Test;
/**
* 查询上传完成的日志文件
* @author guochao
*
*/
public class HadoopTest {
/**
* 查询所有的文件
* @throws Exception
*/
@Test
public void listFsFile() throws Exception{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://centos01:9000/"), conf,"root");
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("hdfs://centos01:9000/"), true);
while(listFiles.hasNext()) {
LocatedFileStatus file = listFiles.next();
System.out.println("文件大小:"+file.getBlockSize());;
System.out.println("文件路径:"+file.getPath().toString());;
}
}
/**
* 删除指定目录下的文件
*
*/
@Test
public void deleteFile() throws Exception{
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://centos01:9000/"), conf,"root");
boolean delete = fs.delete(new Path("hdfs://centos01:9000/logs"), true);
System.out.println(delete);
}
}
结果展示:
文件路径:hdfs://centos01:9000/logs/2019-03-16/access.log.120190316011625.log0f37d549-9903-4e8a-8092-3a675fc54f30
文件路径:hdfs://centos01:9000/logs/2019-03-16/access.log.320190316011625.loge969e645-c7b6-43b3-b616-40087f947b20
文件路径:hdfs://centos01:9000/logs/2019-03-16/access.log20190316011839.log138106ab-c8c0-4bef-a82a-9ff0fe6075f8
第一天的小练习。