最近在工作上遇到一个问题,两台PC机建立了TCP链路进行通信,假设有一端PC出现非正常断开TCP链路,比如PC被拔网线或者死机等。另一段是不会被通知的,除非另一端PC机正常关闭端口。
解决方法:
1、自己做心跳,但是心跳需要两端都有心跳处理,比较麻烦!
2、TCP自带的keepalive。
这里讲第二种方法:
keepalive的处理机制:keepalive在Linux的TCP应用中是默认关闭,因为Keepalive会在链路上发包,会产生网络负载,虽然影响不大,但是不用keepalive的时候就不用更好。
如果在TCP链路上,当两端没有数据传送的时候,将keepalive打开的那段会间隔一段时间会探测链路是否断开。这里有几个系统参数
/proc/sys/net/ipv4/tcp_keepalive_time //每次确认包发送的间隔时间
/proc/sys/net/ipv4/tcp_keepalive_intvl //重试间隔
/proc/sys/net/ipv4/tcp_keepalive_probes //每次确认最多重发次数
本人在应用中设置的值是:
echo 60 > /proc/sys/net/ipv4/tcp_keepalive_time
echo 2 > /proc/sys/net/ipv4/tcp_keepalive_intvl
echo 3 > /proc/sys/net/ipv4/tcp_keepalive_probes
该设置在系统重启后消息,所以要将其上设置增添到系统启动中。
表示60秒发一次探测,每次探测会发3次包,没发一次包,间隔是2秒,但是如果TCP链路是通的,只要探测发1次包,或者2次包(抓包测试的时候,有时候两次,有时候一次)。
在监听的端口上增添keepalive的属性。
int iKeepAlive = 1;
setsockopt(sockfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&iKeepAlive, sizeof(iKeepAlive));
然后进行select的轮询sock端口是否可读,如果网络出现故障,即TCP链路断开,select会返回大于0,证明端口可读,然后对端口进行recv,recv会发生错误,返回-1,其errno是ETIMEDOUT。
具体代码:
server.c
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <signal.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/ip.h>
#include <errno.h>
#define SERVER_PORT 6666
int main(int argc, char *argv[])
{
signal(SIGPIPE, SIG_IGN);
int nSock;
char buf[32];
struct sockaddr_in serverAddr;
memset(&buf, 0, sizeof(buf));
bzero(&serverAddr, sizeof(serverAddr));
serverAddr.sin_family = AF_INET;
serverAddr.sin_addr.s_addr = INADDR_ANY;
serverAddr.sin_port = htons(atoi(argv[1]));
// inet_pton(AF_INET, "192.168.88.227", (void *)&serverAddr.sin_addr);
if ((nSock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
printf("Creat socket failed!\n");
exit(-1);
}
int opt = 1;
setsockopt(nSock, SOL_SOCKET, SO_REUSEADDR, (char *)&opt,sizeof(opt));
if (bind(nSock, (struct sockaddr *)&serverAddr, sizeof(struct sockaddr)) < 0)
{
printf("Bind error!\n");
exit(-1);
}
if (listen(nSock, 5) < 0)
{
printf("Listen failed!\n");
exit(-1);
}
/* int keepalive = 1;
int keepidle = 10;
int keepintvl = 5;
int keepcnt = 3; */
int iKeepAlive = 1;
struct sockaddr_in clientAddr;
int sizes = sizeof(clientAddr);
fd_set maxFd;
struct timeval tm;
int new_nSock = accept(nSock, (struct sockaddr *)&clientAddr, &sizes);
if (new_nSock < 0)
{
printf("Accept error!\n");
exit(-1);
}
if (setsockopt(new_nSock, SOL_SOCKET, SO_KEEPALIVE, (void *)&iKeepAlive, sizeof(iKeepAlive)) < 0)
{
printf("setsockopt error!\n");
exit(-1);
}
while(1)
{
FD_ZERO(&maxFd);
FD_SET(new_nSock, &maxFd);
tm.tv_sec = 3;
tm.tv_usec = 0;
int ret = select(new_nSock+1, &maxFd, NULL, NULL, &tm);
if (ret < 0)
{
return -1;
}
else if (ret == 0)
{
printf("time out!\n");
}
else
{
struct tcp_info info;
int len=sizeof(info);
getsockopt(new_nSock, IPPROTO_TCP, TCP_INFO, &info, (socklen_t *)&len);
if((info.tcpi_state == 1))
{
printf("TCP connect\n");
}
else if ((info.tcpi_state == 7))
{
printf("TCP disconnect\n");
}
printf("getsockopt:%d\n", info.tcpi_state);
printf("Select return value:%d\n", ret);
if (FD_ISSET(new_nSock, &maxFd))
{
int size = recv(new_nSock, buf, 15, 0);
if (size > 0)
{
printf("Message:%s, size=%d\n", buf, size);
}
if (size == -1)
{
if (errno == ETIMEDOUT)
{
printf("close!\n");
}
}
memset(&buf, 0, sizeof(buf));
strcpy(buf, "Hello, Client!");
sizes = strlen(buf);
size = send(new_nSock, buf, sizes, 0);
if (size > 0)
{
printf("Send message:%d\n", size);
}
}
}
}
return 0;
}
client.c
#include <netinet/in.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <linux/tcp.h>
#include <linux/ip.h>
#include <linux/ip.h>
#include <errno.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <net/if.h>
#include <linux/sockios.h>
#include <sys/socket.h>
#define Client_PORT 6666
/*int IsNetworkLinked(const char * interfaceName)
{
#define LINKTEST_GLINK 0x0000000a
struct linktest_value
{
unsigned int cmd;
unsigned int data;
};
int sd = -1;
struct ifreq ifr;
struct linktest_value edata;
memset(&edata,0,sizeof(struct linktest_value));
sd = socket(AF_INET, SOCK_STREAM, 0);
if (sd < 0)
{
perror("get network status: open socket error\n");
return -1;
}
edata.cmd = LINKTEST_GLINK;
strncpy(ifr.ifr_name, interfaceName, sizeof(ifr.ifr_name)-1);
ifr.ifr_data = (char *) &edata;
if (ioctl(sd, SIOCETHTOOL, &ifr) == -1)
{
printf("ETHTOOL_GLINK failed: %s\n", strerror(errno));
close(sd);
return -1;
}
if(edata.data)
{
close(sd);
return 0;
}
close(sd);
return -1;
} */
int main(int argc, char *argv[])
{
int nSock;
char buf[32];
struct timeval tm;
struct sockaddr_in clientAddr;
int keepalive = 1;
memset(&buf, 0, sizeof(buf));
/* bzero(&clientAddr, sizeof(clientAddr));
clientAddr.sin_family = AF_INET;
clientAddr.sin_port = htons(Client_PORT);
clientAddr.sin_addr.s_addr = htons(INADDR_ANY);
//inet_pton(AF_INET, "192.168.88.226", (void *)&(clientAddr.sin_addr));*/
if ((nSock = socket(AF_INET, SOCK_STREAM, 0)) < 0)
{
printf("Creat socket failed!\n");
}
/*if (bind(nSock, (struct sockaddr *)&clientAddr, sizeof(struct sockaddr)) < 0)
{
printf("Bind error!\n");
exit(-1);
} */
struct sockaddr_in serverAddr;
bzero(&serverAddr, sizeof(serverAddr));
serverAddr.sin_family = AF_INET;
serverAddr.sin_port = htons(atoi(argv[2]));
inet_pton(AF_INET, argv[1], (void *)&serverAddr.sin_addr);
if (connect(nSock, (struct sockaddr *)&serverAddr, sizeof(struct sockaddr)) < 0)
{
printf("connect error!\n");
}
/* if (setsockopt(nSock, SOL_SOCKET, SO_KEEPALIVE, (void *)&keepalive, sizeof(keepalive)) == -1)
{
printf("SO_KEEPALIVE error!\n");
} */
fd_set maxFd;
memset(&buf, 0, sizeof(buf));
strcpy(buf, "Hello, Server!");
int sizes = strlen(buf);
int size = send(nSock, buf, sizes, 0);
printf("Send message:%d\n", size);
while(1)
{
memset(&buf, 0, sizeof(buf));
FD_ZERO(&maxFd);
FD_SET(nSock, &maxFd);
tm.tv_sec = 3;
tm.tv_usec = 0;
printf("liu ahai test reback!\n");
int ret = select(nSock+1, &maxFd, NULL, NULL, &tm);
if (ret < 0)
{
printf("select error!\n");
return -1;
}
else if (ret == 0)
{
/* if (0 == IsNetworkLinked("eth0"))
{
printf("connect\n");
}
else
{
printf("disconnet\n");
} */
printf("time out!\n");
}
else
{
printf("liu ahai test ret > 0!\n");
/* struct tcp_info info;
int len=sizeof(info);
getsockopt(nSock, IPPROTO_TCP, TCP_INFO, &info, (socklen_t *)&len);
if((info.tcpi_state == 1))
{
printf("TCP connect\n");
}
else if ((info.tcpi_state == 7))
{
printf("TCP disconnect\n");
} */
if (FD_ISSET(nSock, &maxFd))
{
int size = recv(nSock, buf, 15, 0);
if (size > 0)
{
printf("Message:%s, size=%d\n", buf, size);
}
else if (size == -1)
{
if (errno == ETIMEDOUT)
{
printf("server close!\n");
}
}
}
}
}
return 0;
}
以上有不对的地方,可以和我交流,我的联系方式:liuahai515@163.com。