最近需要对HDFS进行读写操作,参考hdfs.h头文件里面的注解,编写了一个例子。详细的说明在代码的注释中,如发现问题欢迎批评指正~
#include <iostream>
#include <stdio.h>
#include "hdfs.h"
#include <string.h>
#include <stdlib.h>
#define PRINT(args) printf("[%s] - %s,%d,%s", __FILE__, __FUNCTION__, __LINE__, args);
#define INVALID_POINT(p) p == NULL
// desc: 读文件
// param: 参数一/文件系统的句柄 参数二/hdfs上的路径
// return: 读取成功返回true,读取失败返回false
bool hdfs_read(const hdfsFS *pfsdist, const char *hadoop_Path)
{
//获取hdfs上的文件信息(文件大小)
hdfsFileInfo *hdfs_info = (hdfsFileInfo *)malloc(sizeof(hdfsFileInfo));
hdfs_info = hdfsGetPathInfo(*pfsdist, hadoop_Path);
if (INVALID_POINT(hdfs_info))
{
PRINT("file does not exist\n");
return false;
}
long int file_size = (hdfs_info->mSize) + 1;
free(hdfs_info);
hdfs_info = NULL;
//分配内存,准备读取文件
void *buffer = malloc(file_size);
if (INVALID_POINT(buffer))
{
PRINT("malloc rror\n");
return false;
}
else
{
memset(buffer, (int)'\0', file_size);
//打开文件
hdfsFile handle_hdfsFile_w = hdfsOpenFile(*pfsdist, hadoop_Path, O_RDONLY, 0, 0, 0);
if (INVALID_POINT(handle_hdfsFile_w))
{
PRINT("Failed to open file!\n");
free(buffer);
buffer = NULL;
return false;
}
else
{
//读取文件
tSize num_read_bytes = hdfsRead(*pfsdist, handle_hdfsFile_w, buffer, file_size);
if ((file_size - 1) == num_read_bytes){
PRINT("read file successful\n");
}else{
PRINT("read file failure\n");
}
printf("data -> %s\n", (char *)buffer);
free(buffer);
buffer = NULL;
hdfsCloseFile(*pfsdist, handle_hdfsFile_w);
return true;
}
}
}
// desc: 写文件
// param: 参数一/文件系统的句柄 参数二/hdfs上的路径 参数三/要写入文件的内容
// return: 写入成功返回true,写入失败返回false
bool hdfs_write(const hdfsFS *pfsdist, const char *hadoop_Path, const char *buffer)
{
//打开文件
hdfsFile handle_hdfsFile = hdfsOpenFile(*pfsdist, hadoop_Path, O_WRONLY | O_CREAT, 0, 0, 0);
if (INVALID_POINT(handle_hdfsFile))
{
PRINT("Failed to open file!\n");
return false;
}
//读取文件
tSize num_written_bytes = hdfsWrite(*pfsdist, handle_hdfsFile, (void *)buffer, strlen(buffer));
if (num_written_bytes == (strlen(buffer))){
PRINT("write file successful\n");
}else{
PRINT("write file failure\n");
}
hdfsCloseFile(*pfsdist, handle_hdfsFile);
return true;
}
int main()
{
PRINT("start the application\n");
hdfsFS pfsdist = hdfsConnect("hdfs://mycluster", 8020);
if (INVALID_POINT(pfsdist))
{
PRINT("hdfsConnect error\n");
}else{
PRINT("connect successful\n");
}
const char *hadoop_Path = "/data/bigdata/input/xsy_t.txt";
hdfs_write(&pfsdist,hadoop_Path,"xsy_t -> xsy_t");
hdfs_read(&pfsdist, hadoop_Path);
if (!hdfsDisconnect(pfsdist))
PRINT("disconnect successful\n");
return 0;
}
参考:
/hadoop-2.7.2/include/hdfs.h