HDFS文件读写

1.用haddop提供的C API to HDFS来实现文件写入到HDFS中。过程中主要是在配置环境花了点时间

参考官网:http://hadoop.apache.org/common/docs/r0.20.0/libhdfs.html

API主要可以去hadoop软件包解压目录中查看hdfs.h定义的一些已实现的函数

一门语言的初学入门例子,一般都是“hello,world”,下面看写hdfs文件代码:

#include "hdfs.h" 

int main(int argc, char **argv) {

    hdfsFS fs = hdfsConnect("192.168.1.8", 9000);//default是连本地文件
    const char* writePath = "/tmp/testfile.txt";
    hdfsFile writeFile = hdfsOpenFile(fs, writePath, O_WRONLY|O_CREAT, 0, 0, 0);
    if(!writeFile) {
          fprintf(stderr, "Failed to open %s for writing!\n", writePath);
          exit(-1);
    }
    char* buffer = "Hello, World!";
    tSize num_written_bytes = hdfsWrite(fs, writeFile, (void*)buffer, strlen(buffer)+1);
    if (hdfsFlush(fs, writeFile)) {
           fprintf(stderr, "Failed to 'flush' %s\n", writePath); 
          exit(-1);
    }
   hdfsCloseFile(fs, writeFile);
}

然后可以ssh 192.168.1.8机器上查看文件是否写入,bin/hadoop fs -ls /tmp,或直接拷到本地以便查看:bin/hadoop fs -get /tmp/testfile.txt /home/test


下面是读hdfs文件代码:

#include "hdfs.h"

int main(int argc, char **argv) {

	hdfsFS fs = hdfsConnect("192.168.1.8", 9000);
if (!fs) {
printf("Failed to connect to hdfs!\n");
exit(-1);
} 

const char* rfile = "/tmp/testfile.txt";

hdfsFile readFile = hdfsOpenFile(fs, rfile, O_RDONLY, 0, 0, 0);
if (!readFile) {
printf("Failed to open %s for writing!\n", rfile);
exit(-2);
}

char* buffer = "Hello, World!";
// data to be written to the file
char* buffer = malloc(sizeof(char) * (strlen(buffer)+1));
if(buffer == NULL) {
return -2;
}

// read from the file
tSize curSize = fileSize;
for (; curSize == fileSize;) {
curSize = hdfsRead(fs, readFile, (void*)buffer, curSize);
}

printf("%s\n",buffer);

free(buffer);
hdfsCloseFile(fs, readFile);
hdfsDisconnect(fs);

return 0;
}

 

参考资料:

http://www.itpub.net/thread-1423369-1-1.html#

 

2.用Java API实现对HDFS文件的读取。

可以具体参考 利用JavaAPI访问HDFS的文件
其实不管用哪种实现方式,都可以实现对HDFS文件的读写,下面是实习公司的需求。

每次写文件后,就通知去读文件,这个必须文件关闭后,才能读得到内容,所以以追加的方式写文件

void WriteToHDFS(void* buffer,int readLength,int structLength)
{

  long dataSize = readLength*structLength;//从队列中取出的结构体个数和结构体大小
  printf("WriteIucsToHDFS : dataSize is %d,structLength is %d\n",dataSize,structLength);

  iucsWriteFile = hdfsOpenFile(fs, iucsWritePath, O_WRONLY|O_APPEND, 0, 0, 0);
  int fileSize = hdfsTell(fs,iucsWriteFile);
  int num = (BLOCKSIZE-fileSize)/structLength;//看当前打开文件还能容纳几个结构体,BLOCKSIZE=64N
  
  if(dataSize<BLOCKSIZE){
      if(num>readLength){//当容纳个数大于取出的结构体个数时,直接将取出来的内容写进原文件中
        hdfsWrite(fs, iucsWriteFile, (void*)buffer, dataSize);
         if (hdfsFlush(fs, iucsWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", iucsWritePath);
				    	     return;
				    	}
    		hdfsCloseFile(fs, iucsWriteFile);	
    		PutRequestIntoQueue(requestqueue,iucsWritePath,fileSize+dataSize,structLength,IUCSCDRTYPE,readLength);//通知读队列
      }else{//当容纳个数小于取出来的结构体个数时,先把能容纳的内容写进原文件
        //write old file
      	hdfsWrite(fs, iucsWriteFile, (void*)buffer, num*structLength);
      	if (hdfsFlush(fs, iucsWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", iucsWritePath);
				    	     return;
				    	}
    		hdfsCloseFile(fs, iucsWriteFile);	
    		PutRequestIntoQueue(requestqueue,iucsWritePath,fileSize+num*structLength,structLength,IUCSCDRTYPE,num);
    		
    		//write new file
    		if((readLength-num)*structLength>BLOCKSIZE){
    			   printf("--------icus big data come--------\n");
    			}
    		memset(iucsWritePath,0,sizeof(iucsWritePath)/sizeof(char));
    		strcat(iucsWritePath,IUCS_FILEPATH);
    		getCurrTime();
      	strcat(iucsWritePath,currTime);
      	strcat(iucsWritePath,".dat");
    		iucsWriteFile = hdfsOpenFile(fs, iucsWritePath, O_WRONLY|O_CREAT, 0, 0, 0);
      	if(!iucsWriteFile) 
      		    {
          			printf( "Failed to open %s for writing!\n", iucsWritePath);
          			return;
       		    }//再把剩下的文件写到一个新文件中
       hdfsWrite(fs, iucsWriteFile, (void*)((char*)buffer+num*structLength), (readLength-num)*structLength);
       if (hdfsFlush(fs, iucsWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", iucsWritePath);
				    	     return;
				    	}
    		hdfsCloseFile(fs, iucsWriteFile);	
    		PutRequestIntoQueue(requestqueue,iucsWritePath,fileSize+(readLength-num)*structLength,structLength,IUCSCDRTYPE,(readLength-num));
      }
  }
  else
    {//如果取出来的结构体大小超过64M,是直接报提示,其实也可以用一个递归来实现将大文件分解存放在小文件中
        printf("icus big data come\n");
       	//WriteBlockHDFS(buffer,readLength,structLength); 
  	}
    
}


如果是大文件,则递归将内容逐一写到文件中

//datasize>blocksize
void WriteBlockHDFS(void* buffer,int readLength,int structLength){

    long dataSize = readLength*structLength;
	  biccWriteFile = hdfsOpenFile(fs, biccWritePath, O_WRONLY|O_APPEND, 0, 0, 0);
    int fileSize = hdfsTell(fs,biccWriteFile);
    num = (BLOCKSIZE-fileSize)/structLength;
    
    //write old file
    hdfsWrite(fs, biccWriteFile, (void*)buffer, num*structLength);
		if (hdfsFlush(fs, biccWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", biccWritePath);
				    	     return;
				    	}
    hdfsCloseFile(fs, biccWriteFile);	
    
    //write new file
    cntFile++;
    memset(biccWritePath,0,sizeof(biccWritePath)/sizeof(char));
    strcat(biccWritePath,BICC_FILEPATH);
    char str[5] = {0};
    sprintf(str,"%d",cntFile);
    strcat(biccWritePath,str);
    strcat(biccWritePath,".txt");
		biccWriteFile = hdfsOpenFile(fs, biccWritePath, O_WRONLY|O_CREAT, 0, 0, 0);
    if(!biccWriteFile) 
      				{
          			printf( "Failed to open %s for writing!\n", biccWritePath);
          			return;
       				}
    
    if((datasize-num*structLength)>BLOCKSIZE){
      hdfsCloseFile(fs, biccWriteFile);		
    	WriteBlockHDFS((char*)(buffer+num*structLength),readLength-num,structLength);
    }	
    else
    {
         hdfsWrite(fs, biccWriteFile, (void*)buffer, num*m);
				 if (hdfsFlush(fs, biccWriteFile)) 
				      {
				    	     printf("Failed to 'flush' %s\n", biccWritePath);
				    	     return;
				    	}
         hdfsCloseFile(fs, biccWriteFile);	
    }
}


注意:结构体数据是以二进制存放,所以在读的时候,也要以结构体的形式读出来,否则读出来的数据就会出错

文件以时间命名,下面是取得当前时间的代码:

char currTime[20] = {0};

void intTochar(int n){
	char tempTime[4]  = {0};
	sprintf(tempTime,"%d",n);
	strcat(currTime,tempTime);
}

void getCurrTime(){
	time_t nowTime = time(NULL);
  struct tm * timeinfo;
  timeinfo = localtime (&nowTime);
  memset(currTime,0,sizeof(currTime)/sizeof(char));
  intTochar(1900+timeinfo->tm_year);
  intTochar(1+timeinfo->tm_mon);
  intTochar(timeinfo->tm_mday);
  intTochar(timeinfo->tm_hour);
  intTochar(timeinfo->tm_min);
  intTochar(timeinfo->tm_sec);
}


 

 

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值