浏览器怎么抓取html源码,模拟浏览器获取web服务器网页源代码的程序

//---------------------------------------------------------------------------------------------

//--name: iesource.c                                                  -------------------------------

//-- to get web sourcecode                                        -------------------------------

//----------------------------------------------------------------------------------------------

#i nclude #i nclude #i nclude #i nclude #i nclude

int port =80;

#define BUF_SIZE   65536

void getip(char *url)

{

struct sockaddr_in pin;

struct hostent *nlp_host;

int sd=0;

int len=0;

int i,count=0;

int recv_start=0,recv_end=0;

char buf[BUF_SIZE]={0},myurl[100]={0};

char host[100]={0}, GET[100]={0},header[240]={0};

char *pHost = 0;

///get the host name and the relative address from url name!!!

strcpy(myurl,url);

for(pHost = myurl;*pHost !='/'&& *pHost!='\0';++pHost);

if((int)(pHost-myurl)==strlen(myurl))

strcpy(GET,"/");

else

strcpy(GET,pHost);

*pHost = '\0';

strcpy(host,myurl);

printf("%s \n  %s  \n",host,GET);

///setting socket param

if((nlp_host = gethostbyname(host)) == 0)

{

printf("error get host\n");

exit(1);

}

bzero(&pin,sizeof(pin));

pin.sin_family = AF_INET;

pin.sin_addr.s_addr = htonl(INADDR_ANY);

pin.sin_addr.s_addr = ((struct in_addr *)(nlp_host->h_addr))->s_addr;

pin.sin_port = htons(port);

if((sd = socket(AF_INET,SOCK_STREAM,0))== -1)

{

printf("Error opening socket!!!\n");

exit(1);

}

///together the request info that will be sent to web server

///Note: the blank and enter key byte is necessary,please remember!!!

strcat(header,"GET");

strcat(header," ");

strcat(header,GET);

strcat(header," ");

strcat(header,"HTTP/1.1\r\n");

strcat(header,"HOST:");

strcat(header,host);

strcat(header,"\r\n");

strcat(header,"ACCEPT:*/*");

strcat(header,"\r\nConnection: close\r\n\r\n\r\n");

///connect to the webserver,send the header,and receive the web sourcecode if(connect(sd,(void *)&pin,sizeof(pin)) == -1)

printf("error connect to socket\n");

if(send(sd,header,strlen(header),0) == -1)

{

printf("error in send \n");

exit(1);

}

///send the message and wait the response!!!

len=recv(sd,buf,BUF_SIZE,0);

if(len<0)

printf("receive data error!!!\n");

else

printf("%s",buf);

close(sd);

}

int main()

{

char url[256];

printf("http://");

scanf("%s",url);

getip(url);

return 0;

}

///NOTE: test by dotafox on RedHat 9.0 OS with make 3.79 &gcc-2.95.3

///use: gcc -o iesource iesource.c

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值