#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#define PORT 80
#define BUFSIZE 8184
static FILE *frontier;
void parse(char *buf)
{
//解析所get的存储于buf的信息,从中解析出将要爬的超链接,存储于frontier.txt文件中
char *pts=buf,*qts;
while((pts=strstr(pts,"a href="http:"))&&(qts=strstr(pts+9,"""))) //从buf中查找"a =href"http:"
{
fwrite(pts+15,qts-pts-15,1,frontier);//向frontier.txt文件中写找到的超链接
putc('n',frontier);
fflush(frontier);
pts=qts;
}
}
//函数封装
int httpget(char *url)
{
FILE *fp;
char *host_id;
struct hostent *host;
int sockfd, ret, i, h;
struct sockaddr_in servaddr;
char str1[4096],buf[8184],*str;
socklen_t len;
fd_set t_set1;
struct timeval tv;
if((host = gethostbyname(url)) == NULL)
{
printf("gethostbyname error");
exit(-1);
}
host_id = inet_ntoa(*((struct in_addr*)host -> h_addr));
printf("ip adress %sn",host_id);
if ((sockfd = socket(AF_INET, SOCK_STREAM, 0)) < 0 ) {
printf("socket error!n");
exit(0);
}
bzero(&servaddr, sizeof(servaddr));
servaddr.sin_family = AF_INET;
servaddr.sin_port = htons(PORT);
if (inet_pton(AF_INET,host_id,&servaddr.sin_addr) <= 0 ) {
printf("inet_pton error!n");
exit(0);
}
if (connect(sockfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0)
{
printf("connect error!n");
exit(0);
}
printf("connect success n");
memset(str1, 0, 4096);
//初始get请求信息
strcat(str1, "GET / HTTP/1.0rn");
strcat(str1, "Accept: */*rn");
strcat(str1, "Accept-Language: zh-CNrn");
strcat(str1, "User-Agent: Mozilla/4.0rn");
sprintf(str1,"HOST: %srn",url);
strcat(str1,"Connection: Keep-Alivern");
strcat(str1, "rnrn");
printf("%sn",str1);
ret = send(sockfd,(void *)str1,strlen(str1),0);
if (ret < 0) {
printf("send error %d,Error message'%s'n",errno, strerror(errno));
exit(0);
}else{
printf("send success ,total send %d n", ret);
}
while(1){
sleep(2);
printf("******n");
tv.tv_sec= 0;
tv.tv_usec= 0;
h= 0;
FD_ZERO(&t_set1);
FD_SET(sockfd, &t_set1);
printf("--------------->1n");
h= select(sockfd +1, &t_set1, NULL, NULL, &tv);
printf("--------------->2n");
if (h == 0) continue;
if (h < 0) {
close(sockfd);
printf("some thing read error!n");
return -1;
};
if (h > 0){
memset(buf, 0, 8184);
i= recv(sockfd, (void *)buf, 8184,0);
printf("i = %dn",i);
if (i==0){
close(sockfd);
printf("read message find error,stop!n");
return -1;
}
// fwrite(buf,sizeof(char),strlen(buf),fp);
// fflush(fp);
parse(buf);
printf("%sn", buf);
}
}
close(sockfd);
return 0;
}
int main(int argc, char *argv[])
{
FILE *fp;
int res;
char buf[BUFSIZE], *str;
if(argc != 2)
{
fprintf(stderr,"input domain name");
exit(-1);
}
frontier=fopen("frontier.txt","a+");
if(frontier==NULL)
{
printf("open error");
return 1;
}
if((fp = fopen("111.txt","a+")) < 0)
{
printf("fopen error");
exit(-1);
}
if((res = httpget(argv[1])) == 0)
{
printf("httpget success\n");
exit(-1);
}
return 0;
}