HDFS文件读写

最新推荐文章于 2022-05-24 10:01:39 发布

aidayei

最新推荐文章于 2022-05-24 10:01:39 发布

阅读量6.6k

点赞数

分类专栏：云计算与云存储文章标签： buffer big data file api null struct

本文链接：https://blog.csdn.net/aidayei/article/details/6586514

版权

云计算与云存储专栏收录该内容

12 篇文章 0 订阅

订阅专栏

1.用haddop提供的C API to HDFS来实现文件写入到HDFS中。过程中主要是在配置环境花了点时间

参考官网：http://hadoop.apache.org/common/docs/r0.20.0/libhdfs.html

API主要可以去hadoop软件包解压目录中查看hdfs.h定义的一些已实现的函数

一门语言的初学入门例子，一般都是“hello,world”，下面看写hdfs文件代码：

#include "hdfs.h" 

int main(int argc, char **argv) {

    hdfsFS fs = hdfsConnect("192.168.1.8", 9000);//default是连本地文件
    const char* writePath = "/tmp/testfile.txt";
    hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0);
    if(!writeFile) {
          fprintf(stderr, "Failed to open %s for writing!\n", writePath);
          exit(-1);
    }
    char* buffer = "Hello, World!";
    tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
    if (hdfsFlush(fs, writeFile)) {
           fprintf(stderr, "Failed to 'flush' %s\n", writePath); 
          exit(-1);
    }
   hdfsCloseFile(fs, writeFile);
}

然后可以ssh 192.168.1.8机器上查看文件是否写入，bin/hadoop fs -ls /tmp，或直接拷到本地以便查看：bin/hadoop fs -get /tmp/testfile.txt /home/test

下面是读hdfs文件代码：

#include "hdfs.h"

int main(int argc, char **argv) {

	hdfsFS fs = hdfsConnect("192.168.1.8", 9000);
if (!fs) {
printf("Failed to connect to hdfs!\n");
exit(-1);
} 

const char* rfile = "/tmp/testfile.txt";

hdfsFile readFile = hdfsOpenFile(fs, rfile, O_RDONLY, 0, 0, 0);
if (!readFile) {
printf("Failed to open %s for writing!\n", rfile);
exit(-2);
}

char* buffer = "Hello, World!";
// data to be written to the file
char* buffer = malloc(sizeof(char) * (strlen(buffer)+1));
if(buffer == NULL) {
return -2;
}

// read from the file
tSize curSize = fileSize;
for (; curSize == fileSize;) {
curSize = hdfsRead(fs, readFile, (void*)buffer, curSize);
}

printf("%s\n",buffer);

free(buffer);
hdfsCloseFile(fs, readFile);
hdfsDisconnect(fs);

return 0;
}

参考资料：

http://www.itpub.net/thread-1423369-1-1.html#

2.用Java API实现对HDFS文件的读取。

可以具体参考利用JavaAPI访问HDFS的文件
其实不管用哪种实现方式，都可以实现对HDFS文件的读写，下面是实习公司的需求。

每次写文件后，就通知去读文件，这个必须文件关闭后，才能读得到内容，所以以追加的方式写文件

void WriteToHDFS(void* buffer,int readLength,int structLength)
{

  long dataSize = readLength*structLength;//从队列中取出的结构体个数和结构体大小
  printf("WriteIucsToHDFS : dataSize is %d,structLength is %d\n",dataSize,structLength);

  iucsWriteFile = hdfsOpenFile(fs, iucsWritePath, O_WRONLY|O_APPEND, 0, 0, 0);
  int fileSize = hdfsTell(fs,iucsWriteFile);
  int num = (BLOCKSIZE-fileSize)/structLength;//看当前打开文件还能容纳几个结构体，BLOCKSIZE=64N
  
  if(dataSize<BLOCKSIZE){
      if(num>readLength){//当容纳个数大于取出的结构体个数时，直接将取出来的内容写进原文件中
        hdfsWrite(fs, iucsWriteFile, (void*)buffer, dataSize);
         if (hdfsFlush(fs, iucsWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", iucsWritePath);
				    	     return;
				    	}
    		hdfsCloseFile(fs, iucsWriteFile);	
    		PutRequestIntoQueue(requestqueue,iucsWritePath,fileSize+dataSize,structLength,IUCSCDRTYPE,readLength);//通知读队列
      }else{//当容纳个数小于取出来的结构体个数时，先把能容纳的内容写进原文件
        //write old file
      	hdfsWrite(fs, iucsWriteFile, (void*)buffer, num*structLength);
      	if (hdfsFlush(fs, iucsWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", iucsWritePath);
				    	     return;
				    	}
    		hdfsCloseFile(fs, iucsWriteFile);	
    		PutRequestIntoQueue(requestqueue,iucsWritePath,fileSize+num*structLength,structLength,IUCSCDRTYPE,num);
    		
    		//write new file
    		if((readLength-num)*structLength>BLOCKSIZE){
    			   printf("--------icus big data come--------\n");
    			}
    		memset(iucsWritePath,0,sizeof(iucsWritePath)/sizeof(char));
    		strcat(iucsWritePath,IUCS_FILEPATH);
    		getCurrTime();
      	strcat(iucsWritePath,currTime);
      	strcat(iucsWritePath,".dat");
    		iucsWriteFile = hdfsOpenFile(fs, iucsWritePath, O_WRONLY|O_CREAT, 0, 0, 0);
      	if(!iucsWriteFile) 
      		    {
          			printf( "Failed to open %s for writing!\n", iucsWritePath);
          			return;
       		    }//再把剩下的文件写到一个新文件中
       hdfsWrite(fs, iucsWriteFile, (void*)((char*)buffer+num*structLength), (readLength-num)*structLength);
       if (hdfsFlush(fs, iucsWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", iucsWritePath);
				    	     return;
				    	}
    		hdfsCloseFile(fs, iucsWriteFile);	
    		PutRequestIntoQueue(requestqueue,iucsWritePath,fileSize+(readLength-num)*structLength,structLength,IUCSCDRTYPE,(readLength-num));
      }
  }
  else
    {//如果取出来的结构体大小超过64M,是直接报提示，其实也可以用一个递归来实现将大文件分解存放在小文件中
        printf("icus big data come\n");
       	//WriteBlockHDFS(buffer,readLength,structLength); 
  	}
    
}

如果是大文件，则递归将内容逐一写到文件中

//datasize>blocksize
void WriteBlockHDFS(void* buffer,int readLength,int structLength){

    long dataSize = readLength*structLength;
	  biccWriteFile = hdfsOpenFile(fs, biccWritePath, O_WRONLY|O_APPEND, 0, 0, 0);
    int fileSize = hdfsTell(fs,biccWriteFile);
    num = (BLOCKSIZE-fileSize)/structLength;
    
    //write old file
    hdfsWrite(fs, biccWriteFile, (void*)buffer, num*structLength);
		if (hdfsFlush(fs, biccWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", biccWritePath);
				    	     return;
				    	}
    hdfsCloseFile(fs, biccWriteFile);	
    
    //write new file
    cntFile++;
    memset(biccWritePath,0,sizeof(biccWritePath)/sizeof(char));
    strcat(biccWritePath,BICC_FILEPATH);
    char str[5] = {0};
    sprintf(str,"%d",cntFile);
    strcat(biccWritePath,str);
    strcat(biccWritePath,".txt");
		biccWriteFile = hdfsOpenFile(fs, biccWritePath, O_WRONLY|O_CREAT, 0, 0, 0);
    if(!biccWriteFile) 
      				{
          			printf( "Failed to open %s for writing!\n", biccWritePath);
          			return;
       				}
    
    if((datasize-num*structLength)>BLOCKSIZE){
      hdfsCloseFile(fs, biccWriteFile);		
    	WriteBlockHDFS((char*)(buffer+num*structLength),readLength-num,structLength);
    }	
    else
    {
         hdfsWrite(fs, biccWriteFile, (void*)buffer, num*m);
				 if (hdfsFlush(fs, biccWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", biccWritePath);
				    	     return;
				    	}
         hdfsCloseFile(fs, biccWriteFile);	
    }
}

注意：结构体数据是以二进制存放，所以在读的时候，也要以结构体的形式读出来，否则读出来的数据就会出错

文件以时间命名，下面是取得当前时间的代码：

char currTime[20] = {0};

void intTochar(int n){
	char tempTime[4]  = {0};
	sprintf(tempTime,"%d",n);
	strcat(currTime,tempTime);
}

void getCurrTime(){
	time_t nowTime = time(NULL);
  struct tm * timeinfo;
  timeinfo = localtime (&nowTime);
  memset(currTime,0,sizeof(currTime)/sizeof(char));
  intTochar(1900+timeinfo->tm_year);
  intTochar(1+timeinfo->tm_mon);
  intTochar(timeinfo->tm_mday);
  intTochar(timeinfo->tm_hour);
  intTochar(timeinfo->tm_min);
  intTochar(timeinfo->tm_sec);
}

aidayei

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
2
评论
HDFS文件读写

1.用haddop提供的C API to HDFS来实现文件写入到HDFS中。过程中主要是在配置环境花了点时间2.用Java API实现对HDFS文件的读取。留个位，先列出主要内容，有时间再将其详细展开其实不管用哪种实现方式，都可以实现对HDFS文件的读写
复制链接

扫一扫