概要:
hadoop上以streaming方式运行map任务,map任务为socke client端,和在外部系统运行的server交互
代码:
server.cpp
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <sys/wait.h>
#include <iostream>
#define SERVPORT 9899 /*服务器监听端口号 */
#define BACKLOG 1000 /* 最大同时连接请求数 */
#define MAXDATASIZE 100 /*每次最大数据传输量 */
using namespace std;
int main()
{
int sock_fd,client_fd; /*sock_fd:监听socket;client_fd:数据传输socket */
int sin_size;
char buf[MAXDATASIZE];
struct sockaddr_in my_addr; /* 本机地址信息 */
struct sockaddr_in remote_addr; /* 客户端地址信息 */
if((sock_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1)
{
perror("socket创建出错!");
exit(1);
}
my_addr.sin_family=AF_INET;
my_addr.sin_port=htons(SERVPORT);
my_addr.sin_addr.s_addr = INADDR_ANY;
bzero(&(my_addr.sin_zero),8);
if(bind(sock_fd, (struct sockaddr *)&my_addr, sizeof(struct sockaddr)) == -1)
{
perror("bind出错!");
exit(1);
}
if(listen(sock_fd, BACKLOG) == -1)
{
perror("listen出错!");
exit(1);
}
while(1)
{
sin_size = sizeof(struct sockaddr_in);
if((client_fd = accept(sock_fd, (struct sockaddr *)&remote_addr, (socklen_t*)&sin_size)) == -1)
{
perror("accept出错");
continue;
}
printf("received a connection from %s\n", inet_ntoa(remote_addr.sin_addr));
if(!fork())
{
/* 子进程代码段 */
if(send(client_fd, "Hello, you are connected!\n", 26, 0) == -1)
{
perror("send出错!");
}
int recvbytes = 0;
if((recvbytes=recv(client_fd, buf, MAXDATASIZE, 0)) == -1)
{
perror("recv出错!");
exit(1);
}
cout << buf << endl;
close(client_fd);
exit(0);
}
close(client_fd);
}
}
client.cpp
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <string.h>
#include <netdb.h>
#include <unistd.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <sys/socket.h>
#include <iostream>
#define SERVPORT 9899
#define MAXDATASIZE 100 /*每次最大数据传输量 */
using namespace std;
int main(int argc, char *argv[])
{
int sock_fd, recvbytes;
char buf[MAXDATASIZE];
struct hostent *host;
struct sockaddr_in serv_addr;
//读取输入
string input;
while(cin >> input)
{
cout << input << endl;
}
if(argc< 2)
{
fprintf(stderr,"Please enter the server's hostname!\n");
exit(1);
}
if((host=gethostbyname(argv[1])) == NULL)
{
herror("gethostbyname出错!");
exit(1);
}
//初始化socket
if((sock_fd = socket(AF_INET, SOCK_STREAM, 0)) == -1)
{
perror("socket创建出错!");
exit(1);
}
serv_addr.sin_family=AF_INET;
serv_addr.sin_port=htons(SERVPORT);
serv_addr.sin_addr = *((struct in_addr *)host->h_addr);
bzero(&(serv_addr.sin_zero),8);
//连接
if(connect(sock_fd, (struct sockaddr *)&serv_addr, sizeof(struct sockaddr)) == -1)
{
perror("connect出错!");
exit(1);
}
//接受数据
if((recvbytes=recv(sock_fd, buf, MAXDATASIZE, 0)) == -1)
{
perror("recv出错!");
exit(1);
}
//发送数据
if(send(sock_fd, "hello server", 13, 0) == -1)
{
perror("send出错!");
}
buf[recvbytes] = '\0';
printf("Received: %s",buf);
close(sock_fd);
return 0;
}
编译程序:
$ g++ -o client client.cpp
$ g++ -o server server.cpp
测试:
测试机上运行server
$./server
hadoop上运行任务:
$ hadoop streaming -input /user/test.txt -output /user/result -mapper "./client **domain**" -reducer "cat" -file client-jobconf mapred.reduce.tasks=1 -jobconf mapred.map.tasks=5 -jobconf mapre.job.name="socket_test"
input内容/user/test.txt 为1324
-jobconf mapred.reduce.tasks=1 -jobconf mapred.map.tasks=5 :mapper任务5个,reduce任务1个
./server输出:
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
received a connection from 10.*.*.*
hello server
运行结果:
$ hadoop fs -cat /user/result/part-00000
1324
Received: Hello, you are connected!
Received: Hello, you are connected!
Received: Hello, you are connected!
Received: Hello, you are connected!
Received: Hello, you are connected!