一、客户端环境准备
(1)将Hadoop-2.9.2安装包解压到非中文路径
(例如:E:\hadoop-2.9.2)
(2)配置HADOOP_HOME环境变量
(3)创建一个Maven工程,添加依赖
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>2.8.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.9.2</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.9.2</version>
</dependency>
</dependencies>
二、编码(实现hdfs创建、上传、下载等)
public class HdfsClient {
FileSystem fs;
Configuration configuration;
@Before
public void init() throws Exception {
// 1 获取文件系统
configuration = new Configuration();
configuration.set("dfs.replication", "2");
fs = FileSystem.get(new URI("hdfs://linux126:9000"),
configuration, "root");
}
@After
public void destory() throws IOException {
// 3 关闭资源
fs.close();
}
/**
* 在hdfs上创建文夹
*
* @throws Exception
*/
@Test
public void testMkdirs() throws Exception {
Path path = new Path("/java_test");
// 2 创建目录
boolean exists = fs.exists(path);
if(!exists){
fs.mkdirs(new Path("/java_test"));
}
}
//上传
@Test
public void copyFromLocalToHdfs() throws IOException {
fs.copyFromLocalFile(new Path("D://lagou.txt"), new Path("/java01/lagou.txt"));
}
//下载
@Test
public void copyFromHdfsToLocal() throws IOException {
fs.copyToLocalFile(new Path("/java01/lagou.txt"), new Path("D://lagou02.txt"));
}
//删除文件
@Test
public void delete() throws IOException {
fs.delete(new Path("/logCollect"), true);
}
//遍历查看hdfs的某文件夹文件(详情)
@Test
public void listFile() throws IOException {
RemoteIterator<LocatedFileStatus> locatedFileStatusRemoteIterator = fs.listFiles(new Path("/java01"), true);
while (locatedFileStatusRemoteIterator.hasNext()) {
LocatedFileStatus next = locatedFileStatusRemoteIterator.next();
String name = next.getPath().getName();//文件名
long len = next.getLen();//大小
FsPermission permission = next.getPermission();//权限
String group = next.getGroup();//分组
String owner = next.getOwner();//所属组
System.out.println(name + "\t" + len + "\t" + permission + "\t" + group + "\t" + owner);
//输出块信息
BlockLocation[] blockLocations = next.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
String[] hosts = blockLocation.getHosts();
for (String host : hosts) {
System.out.println("主机名称" + host);
}
}
}
}
//遍历hdfs上某文件夹和所有文件
@Test
public void listFiles() throws IOException {
FileStatus[] fileStatuses = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : fileStatuses) {
boolean directory = fileStatus.isDirectory();
if (directory) {
System.out.println("文件夹" + fileStatus.getPath().getName());
} else {
System.out.println("文件" + fileStatus.getPath().getName());
}
}
}
//io流上传
@Test
public void ioToHdfs() throws IOException {
FileInputStream input = new FileInputStream(new File("d://lagou.txt"));
FSDataOutputStream output = fs.create(new Path("/io/test.txt"));
IOUtils.copyBytes(input, output, configuration);
}
//io下载hdfs文件
@Test
public void toLocal() throws IOException {
FSDataInputStream input = fs.open(new Path("/io/test.txt"));
FileOutputStream out = new FileOutputStream(new File("d://lagou_io_tolocal.txt"));
IOUtils.copyBytes(input, out, configuration);
}
//seek 读取部分文件
@Test
public void seekReadFile() throws IOException {
final FSDataInputStream in = fs.open(new Path("/io/test.txt"));
IOUtils.copyBytes(in, System.out, 1024, false);
System.out.println();
in.seek(12);
IOUtils.copyBytes(in, System.out, 1024, false);
IOUtils.closeStream(in);
}
//验证Packet= 64kb为单位上传
@Test
public void ToHdfsPacket() throws IOException {
FileInputStream input = new FileInputStream(new File("d://hgli2.txt"));
FSDataOutputStream output = fs.create(new Path("/io/Packet/test.txt"), new Progressable() {
public void progress() {
System.out.println("&");//这个progress方法就是每传输64KB(packet)就会执行一次,
}
});
IOUtils.copyBytes(input, output, configuration);
}
}
三、定时任务监听日志,上传到hdfs
3.1定时任务进行日志采集
public class LogCollector {
public static void main(String[] args) {
Timer timer = new Timer();
timer.schedule(new LogCollectTask(), 0, 3600 * 1000);
}
}
3.2实现hdfs存储日志采集文件,以及本地备份
LOG.DIR=d://bigdata/log/ #目标文件夹
LOG.PRE=log. #要操作的文件前缀
LOG.TMP.DIR=d://bigdata/tmp/ #临时目录
LOG.BAK.DIR=d://bigdata/bak/ #备份目录
LOG.HDFS.DIR=/logCollect/ #hdfs目录
配置文件工具类
public class PropTool {
//饿汉式
private static Properties prop = null;
static {
prop = new Properties();
try {
prop.load(LogCollectTask.class.getClassLoader().getResourceAsStream("log.properties"));
} catch (IOException e) {
e.printStackTrace();
}
}
public static Properties getProp() {
return prop;
}
}
常量类
public class Constant {
public static final String LOG_DIR = "LOG.DIR";//日志文件目录
public static final String LOG_PRE = "LOG.PRE";//要进行操作的日志文件前缀
public static final String LOG_TMP_DIR = "LOG.TMP.DIR";//临时目录
public static final String LOG_BAK_DIR = "LOG.BAK.DIR";//日志备份目录
public static final String LOG_HDFS_DIR = "LOG.HDFS.DIR";//hdfs存放目录
}
定时任务类
public class LogCollectTask extends TimerTask {
private static Logger logger = Logger.getLogger(LogCollectTask.class.getName());
public static final String strDate = new SimpleDateFormat("yyyy-MM-dd").format(new Date());
//1,读取属性
static Properties prop = PropTool.getProp();
public void run() {
//1,记录转移文件日期
logger.info("===记录转移文件日期===" + strDate);
//2,找到目标文件夹,获取符合的文件
File[] list = getSourceFiles();
//3,把文件放入临时目录
File tmp = mvFilesToTargetDir(prop.get(Constant.LOG_TMP_DIR), list);
File[] files = tmp.listFiles();
try {
//4,把临时目录的文件,上传hdfs
toHdfs(files);
//5,把文件放入备份目录
mvFilesToTargetDir(prop.get(Constant.LOG_BAK_DIR)+strDate,files);
} catch (IOException e) {
e.printStackTrace();
}
}
/**
* 上传hdfs
* @param files
* @throws IOException
*/
private void toHdfs(File[] files) throws IOException {
//4,把临时目录的文件,上传hdfs
Configuration config = new Configuration();
FileSystem fs = null;
try {
Path path = new Path(prop.get(Constant.LOG_HDFS_DIR) + strDate);
fs = FileSystem.get(new URI("hdfs://linux126:9000"), config, "root");
if (!fs.exists(path)) {
fs.mkdirs(path);
}
//4.1遍历临时目录文件,准备上传
for (File tmpFile : files) {
//上传hdfs
fs.copyFromLocalFile(new Path(tmpFile.getPath()), new Path(prop.get(Constant.LOG_HDFS_DIR) + strDate + "/" + tmpFile.getName()));
}
} catch (Exception e) {
} finally {
if (fs != null) {
try {
fs.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
/**
* 把符合条件的日志转移到目标目录
* @param filePath 目标目录
* @param list 符合条件的日志
* @return
*/
private File mvFilesToTargetDir(Object filePath, File[] list) {
File tmp = new File(filePath.toString());
if (!tmp.exists()) {
tmp.mkdirs();
}
for (File fileToDo : list) {
logger.info("==="+tmp.getPath() + File.separator + fileToDo.getName());
fileToDo.renameTo(new File(tmp.getPath() + File.separator + fileToDo.getName()));
}
logger.info("===临时文件创建完毕===");
return tmp;
}
/**
* 过滤符合条件的日志
* @return
*/
private File[] getSourceFiles() {
File file = new File(prop.getProperty(Constant.LOG_DIR));
final String logPre = prop.getProperty(Constant.LOG_PRE);
return file.listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.startsWith(logPre);
}
});
}
}
注:以上为本人小小总结,如果对您起到了一点点帮助,请给予我一点鼓励,在下方点个小小的赞,谢谢,如有错误之处,望不吝指出,非常感谢!