先上一个抓取百度首页的代码:
#include <stdio.h>
#include <winsock2.h>
#include <string.h>
#pragma comment(lib, "ws2_32.lib")
int main(){
FILE *fp;
fp = fopen("e://test.txt", "w+");
WORD wVersionRequested;
WSADATA wsaData;
int err;
wVersionRequested = MAKEWORD(2, 0);
err = WSAStartup(wVersionRequested, &wsaData );
if(0 != err) {
printf("Socket2.0初始化失败,Exit!");
return 0;
}
int soc;
soc = socket(AF_INET, SOCK_STREAM, 0);
struct sockaddr_in srv_addr;
srv_addr.sin_port=htons(80);
srv_addr.sin_family=AF_INET;
srv_addr.sin_addr.s_addr=inet_addr("61.135.169.125"); // 百度的首页
connect(soc, (LPSOCKADDR)&srv_addr, sizeof(srv_addr));
char sz[] = "GET / HTTP/1.1\r\nConnection:keep-alive\r\n\r\n";
send(soc, sz, strlen(sz), 0);
static char webcon[BUFSIZ];
while(recv(soc, webcon, BUFSIZ, 0) > 0) {
printf("%s", webcon);
fputs(webcon, fp);
memset((void*)webcon, 0, BUFSIZ);
}
fclose(fp);
return 0;
}
通用代码,自己理解吧,很简单
#include <stdio.h>
#include <winsock.h>
#include <string.h>
#pragma comment(lib, "ws2_32.lib")
void geturl(char *url)
{
WSADATA WSAData={0};
SOCKET sockfd;
struct sockaddr_in addr;
struct hostent *pURL;
char myurl[BUFSIZ];
char *pHost = 0, *pGET = 0;
char host[BUFSIZ], GET[BUFSIZ];
char header[BUFSIZ] = "";
static char text[BUFSIZ];
int i;
/*
* windows下使用socket必须用WSAStartup初始化,否则不能调用
*/
if(WSAStartup(MAKEWORD(2,2), &WSAData))
{
printf("WSA failed\n");
return;
}
/*
* 分离url中的主机地址和相对路径
*/
strcpy(myurl, url);
for (pHost = myurl; *pHost != '/' && *pHost != '\0'; ++pHost);
if ( (int)(pHost - myurl) == strlen(myurl) )
strcpy(GET, "/");
else
strcpy(GET, pHost);
*pHost = '\0';
strcpy(host, myurl);
printf("%s\n%s\n", host, GET);
/*
* 设定socket参数,并未真正初始化
*/
sockfd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
pURL = gethostbyname(host);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = *((unsigned long*)pURL->h_addr);
addr.sin_port = htons(80);
/*
* 组织发送到web服务器的信息
* 为何要发送下面的信息请参考HTTP协议的约定
*/
strcat(header, "GET ");
strcat(header, GET);
strcat(header, " HTTP/1.1\r\n");
strcat(header, "HOST: ");
strcat(header, host);
strcat(header, "\r\nConnection:Close\r\n\r\n");
printf("|||||%s|||||||\n", header);
/*
* 连接到服务器,发送请求header,并接受反馈(即网页源代码)
*/
connect(sockfd,(SOCKADDR *)&addr,sizeof(addr));
send(sockfd, header, strlen(header), 0);
while ( recv(sockfd, text, BUFSIZ, 0) > 0)
{
printf("%s", text);
strnset(text, '\0', BUFSIZ);
}
closesocket(sockfd);
WSACleanup();
}
int main()
{
char url[256];
printf("http://");
scanf("%s", url);
geturl(url);
return 0;
}
HTTP消息头的理解:http://www.cnblogs.com/jacktu/archive/2008/01/16/1041710.html