场景
嵌入式设备经常用到tcp通信,设备端内部负责维护一个长连接的socket,来与服务端通信,但是有时候网络环境会发生波动导致网络异常,这时候就需要设备有一个断线重连机制。有时候客户端和服务端之间未做应用层心跳保活,无法修改服务端内容,这时候就需要设备自己检测网络是否正常了。
方案
当服务端关闭服务或者服务端异常退出时,我们通过recv和send的返回值能判断链接已经关闭,这时候就可以进入重连流程了。但是如果连接过程中网络断开(不一定是设备端网络断了,可能是路由器,可能是远端服务器),我们recv和send会在很久的一段时间内依旧正常返回(因为没收到相应的关闭握手的数据,依旧发包,重传多次后会进行wait),这不能忍受。
于是我们可是建立一个独立的监控线程确保服务正常。有两种方法:
- 客服端对原来的服务创建测试套接字。
- 优点:方法简单,可控探测间隔,服务不可用可迅速检测到
- 缺点:比较频繁的创建关闭套接字,加大了服务端的压力
- 通过popen调用ping指令测试服务器是否可用
-优点: 对服务器影响较小,ping指令发送icmp报文,造成的流量不大
-缺点:超时时间设置不灵敏,会通过管道调用ping,引入额外的进程。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <time.h>
#include <pthread.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/prctl.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#define RECONNECT_INTERVAL 5
#define PROBE_INTERVAL 5
#define PING_TIMEOUT 10
#define RECV_BUFFER_SIZE 512
char ip[32] = "192.168.6.127";
char port[8] = "777";
int sok_fd = -1;
int status = 0;
char buffer[RECV_BUFFER_SIZE];
int buf_useable_len = RECV_BUFFER_SIZE;
int open_tcp_client_socket_fd(char* serveraddr, char* rport)
{
int nvar;
short int nCloseConFlag = 0;
struct sockaddr_in stSrvAddr;
int nSendBuf = 32*1024;
int netmon_fd;
int iKeepAlive = 1;
struct timeval tv_out;
fd_set fdsWrite;
//printf("Connecting to server %s port %s \n",serveraddr, rport);
if((netmon_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
return -1;
}
//启用tcp协议自带的保活协议,效果比较差,因为为了防止keep-alive流量过大,系统设置的保活间隔很长很长
setsockopt(netmon_fd, SOL_SOCKET, SO_KEEPALIVE, (void *)&iKeepAlive, sizeof(iKeepAlive));
memset(&stSrvAddr, 0, sizeof(struct sockaddr_in));
stSrvAddr.sin_family = AF_INET;
stSrvAddr.sin_addr.s_addr = inet_addr(serveraddr);
stSrvAddr.sin_port = htons(atoi(rport));
/* 获取当前socket的属性, 并设置 noblocking 属性(connect noblock)*/
if((nvar = fcntl(netmon_fd,F_GETFL,0 )) < 0)
{
close(netmon_fd);
return -1;
}
nvar |= O_NONBLOCK;
if(fcntl(netmon_fd,F_SETFL,nvar) < 0)
{
close(netmon_fd);
return -1;
}
/*请求连接*/
nvar = connect(netmon_fd,(struct sockaddr*)&stSrvAddr ,sizeof(stSrvAddr));
if(nvar != 0)
{
/* it is in the connect process*/
if (EINPROGRESS == errno)
{
tv_out.tv_sec = 0;
tv_out.tv_usec = 300000;
FD_ZERO(&fdsWrite);
FD_SET(netmon_fd, &fdsWrite);
/*确定socket可读写数据*/
if(select(netmon_fd+1, NULL, &fdsWrite,NULL, &tv_out) > 0)
{
socklen_t nLen;
nLen=sizeof(nvar);
/*下面的一句一定要,主要针对防火墙*/
getsockopt(netmon_fd, SOL_SOCKET, SO_ERROR, &nvar, &nLen);
/*connect failed*/
if(nvar != 0)
{
nCloseConFlag = 1;
}
}
else
{
nCloseConFlag = 1;
}
}
/* #define EISCONN 106 : Transport endpoint is already connected */
else if(EISCONN != errno)
{
nCloseConFlag = 1;
}
if(nCloseConFlag)
{
printf("errno:%d\n", errno);
shutdown(netmon_fd, SHUT_RDWR);
close(netmon_fd);
netmon_fd = -1;
}
}
return netmon_fd;
}
int get_socket_link_status(void)
{
return status;
}
int set_socket_link_status(int sta)
{
status = sta;
return 0;
}
void *net_cable_disconnected_monitor1(void *arg)
{
int ret;
while(1)
{
sleep(PROBE_INTERVAL);
//if socket not linked we don't do probe, because we are try to reconnected
if(get_socket_link_status == 0)
{
continue;
}
//create a new socket. If failed, it indicate link is down, but we haven't received close handshake
ret = open_tcp_client_socket_fd(ip, port);
if(ret == -1)
{
set_socket_link_status(0);
}
else
{
close(ret);
}
}
return NULL;
}
void *net_cable_disconnected_monitor2(void *arg)
{
FILE *fp;
char buffer[128] = {0};
char needle[32] = "Destination Host Unreachable";
int flag;
sprintf(buffer, "ping -i %d -W %d %s", PROBE_INTERVAL, PING_TIMEOUT, ip);
fp = popen(buffer, "r");
while(1)
{
memset(buffer, 0, 128);
fgets(buffer, 127, fp);
if(strstr(buffer, needle) != NULL)
{
set_socket_link_status(0);
}
//now = time(NULL);
//printf("buffer:[%s], time: %s, flag: %d\n", buffer, ctime(&now), flag);
usleep(500000);
}
pclose(fp);
return NULL;
}
void *connect_to_server_loop(void *arg)
{
int ret;
pthread_t tid;
pthread_create(&tid, NULL, net_cable_disconnected_monitor1, NULL);
pthread_detach(tid);
RE_CONNECT:
do{
sok_fd = open_tcp_client_socket_fd(ip, port);
if(sok_fd == -1)
{
sleep(RECONNECT_INTERVAL);
printf("connected error, sleep %d second try to reconnected!\n", RECONNECT_INTERVAL);
}
else
{
set_socket_link_status(1);
printf("connected success!\n");
}
}while(sok_fd == -1);
while(1)
{
ret = recv(sok_fd, buffer, RECV_BUFFER_SIZE, MSG_DONTWAIT);
if(ret == -1)
{
// socket nonblocking normal returned EAGAIN error
if(errno != EAGAIN)
{
printf("error occured, errno is %d/n", errno);
}
}
else if (ret == 0)
{
set_socket_link_status(0);
printf("connectecd stream shutdown or received zero byte request\n!");
}
else
{
printf("get buffer:\n[%s]\n", buffer);
memset(buffer, 0, RECV_BUFFER_SIZE);
}
if(status == 0)
{
printf("link down, try to reconnect!\n");
close(sok_fd);
goto RE_CONNECT;
}
sleep(1);
}
printf("can't get here!\n");
return NULL;
}
int send_data()
{
int ret;
char data[128] = "Hello!";
//link is down, we don't send data
if(get_socket_link_status() == 0)
{
//printf("not connceted to server, don't send data\n");
return -1;
}
ret = send(sok_fd, data, strlen(data), 0);
if(ret == -1)
{
set_socket_link_status(0);
printf("send data failed, set connect down!\n");
}
return ret;
}
int main(int argc, char *argv[])
{
pthread_t tid;
int ret;
time_t now;
ret = pthread_create(&tid, NULL, connect_to_server_loop, NULL);
if(ret != 0)
{
printf("thread create failed!\n");
}
while(1)
{
ret = send_data();
if(ret != -1)
{
now = time(NULL);
printf("send data len %d, %s", ret, ctime(&now));
}
sleep(1);
}
printf("man function exit!\n");
return 0;
}