// 转载自 http://blog.csdn.net/xiongyangg/article/details/50767482
// g++ download_demo.cpp -lpthread
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
struct resp_header//保持相应头信息
{
int status_code;//HTTP/1.1 '200' OK
char content_type[128];//Content-Type: application/gzip
long content_length;//Content-Length: 11683079
char file_name[256];
};
struct resp_header resp;
/*通过url解析出域名, 端口, 以及文件名*/
void parse_url(const char *url, char *domain, int *port, char *file_name){
int j = 0;
int start = 0;
*port = 80;
char *patterns[] = {"http://", "https://", NULL};
for (int i = 0; patterns[i]; i++){
if (strncmp(url, patterns[i], strlen(patterns[i])) == 0){
start = strlen(patterns[i]);
}
}
for (int i = start; url[i] != '/' && url[i] != '\0'; i++, j++){//解析域名, 这里处理时域名后面的端口号会保留
domain[j] = url[i];
}
domain[j] = '\0';
char *pos = strstr(domain, ":");//解析端口号, 如果没有, 那么设置端口为80
if (pos){
sscanf(pos, ":%d", port);
}
for (int i = 0; i < (int)strlen(domain); i++){//删除域名端口号
if (domain[i] == ':'){
domain[i] = '\0';
break;
}
}
j = 0;
for (int i = start; url[i] != '\0'; i++){ //获取下载文件名
if (url[i] == '/'){
if (i != strlen(url) - 1){
j = 0;
}
continue;
}else{
file_name[j++] = url[i];
}
}
file_name[j] = '\0';
}
/*获取响应头的信息*/
struct resp_header get_resp_header(const char *response){
struct resp_header resp;
char *pos = (char*)strstr(response, "HTTP/");
if (pos)
sscanf(pos, "%*s %d", &resp.status_code);//返回状态码
pos = (char*)strstr(response, "Content-Type:");//返回内容类型
if (pos)
sscanf(pos, "%*s %s", resp.content_type);
pos = (char*)strstr(response, "Content-Length:");//内容的长度(字节)
if (pos)
sscanf(pos, "%*s %ld", &resp.content_length);
return resp;
}
// 向DNS服务器请求IP地址
void get_ip_addr(char *domain, char *ip_addr){
struct hostent *host = gethostbyname(domain);
if (!host) {
ip_addr = NULL;
return;
}
for (int i = 0; host->h_addr_list[i]; i++){
strcpy(ip_addr, inet_ntoa( * (struct in_addr*) host->h_addr_list[i]));
break;
}
}
/*用于显示下载进度条*/
void progressBar(long cur_size, long total_size){
float percent = (float) cur_size / total_size;
const int numTotal = 50;
int numShow = (int)(numTotal * percent);
if (numShow == 0)
numShow = 1;
if (numShow > numTotal)
numShow = numTotal;
char sign[51] = {0};
memset(sign, '=', numTotal);
printf("\r%.2f%%\t[%-*.*s] %.2f/%.2fMB", percent * 100, numTotal, numShow,
sign, cur_size / 1024.0 / 1024.0, total_size / 1024.0 / 1024.0);
fflush(stdout);
}
// client_socket 指向服务器下载
// fd 指向本地文件保存
void * download(void * socket_d){
int client_socket = *(int *) socket_d;
int length = 0;
int mem_size = 4096;//mem_size might be enlarge, so reset it
int buf_len = mem_size;//read 4k each time
int len;
int fd = open(resp.file_name, O_CREAT | O_WRONLY, S_IRWXG | S_IRWXO | S_IRWXU);
if (fd < 0){
printf("Create file failed\n");
exit(0);
}
char *buf = (char *) malloc(mem_size * sizeof(char));
while ((len = read(client_socket, buf, buf_len)) != 0 && length < resp.content_length){
write(fd, buf, len);
length += len;
progressBar(length, resp.content_length);
if(length == resp.content_length){
break;
}
}
if (length == resp.content_length){
printf("\nDownload successful ^_^\n\n");
}
}
int main(int argc, char const *argv[]){
char url[2048] = "http://seopic.699pic.com/photo/50010/8515.jpg_wh1200.jpg"; // 0.53M
char domain[64] = {0};
char ip_addr[16] = {0};
int port = 80;
char file_name[256] = {0};
char header[2048] = {0};
puts("1: Parsing url...");
parse_url(url, domain, &port, file_name);
if (argc == 3)
strcpy(file_name, argv[2]);
puts("2: Get ip address...");
get_ip_addr(domain, ip_addr);
if (strlen(ip_addr) == 0){
printf("can not get ip address\n");
return 0;
}
puts("\n>>>>Detail<<<
printf("URL: %s\n", url);
printf("DOMAIN: %s\n", domain);
printf("IP: %s\n", ip_addr);
printf("PORT: %d\n", port);
printf("FILENAME: %s\n\n", file_name);
sprintf(header, \
"GET %s HTTP/1.1\r\n"\
"User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\r\n"\
"Accept: */*\r\n"\
"Host:%s\r\n"\
"\r\n"\
,url, domain);
printf("---------------------------\n%s", header);
printf("---------------------------\n\n\n");
//创建套接字
int client_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (client_socket < 0) {
printf("invalid socket descriptor: %d\n", client_socket);
exit(-1);
}
//创建地址结构体
struct sockaddr_in addr;
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = inet_addr(ip_addr);
addr.sin_port = htons(port);
//连接服务器
puts("3: Connect server...");
int res = connect(client_socket, (struct sockaddr *) &addr, sizeof(addr));
if (res == -1){
printf("connect failed, return: %d\n", res);
exit(-1);
}
puts("4: Send request...");//向服务器发送下载请求
write(client_socket, header, strlen(header));
int mem_size = 4096;
int length = 0;
int len;
char *buf = (char *) malloc(mem_size * sizeof(char));
char *response = (char *) malloc(mem_size * sizeof(char));
//每次单个字符读取响应头信息, 仅仅读取的是响应部分的头部, 后面单独开线程下载
while ((len = read(client_socket, buf, 1)) != 0){
if (length + len > mem_size){
//动态内存申请, 因为无法确定响应头内容长度
mem_size *= 2;
char * temp = (char *) realloc(response, sizeof(char) * mem_size);
if (temp == NULL){
printf("realloc failed\n");
exit(-1);
}
response = temp;
}
buf[len] = '\0';
strcat(response, buf);
//找到响应头的头部信息, 两个"\n\r"为分割点
int flag = 0;
for (int i = strlen(response) - 1; response[i] == '\n' || response[i] == '\r'; i--, flag++){
// nothing todo
}
if (flag == 4){
break;
}
length += len;
}
printf("---------------------------\n%s", response);
printf("---------------------------\n\n\n");
resp = get_resp_header(response);
printf("(%d)status_code=%d\n", __LINE__, resp.status_code);
strcpy(resp.file_name, file_name);
printf("5: Start thread to download...\n");
/*开新的线程下载文件*/
pthread_t download_thread;
pthread_create(&download_thread, NULL, download, (void *) &client_socket);
pthread_join(download_thread, NULL);
return 0;
}
运行结果
$ ./a.out
1: Parsing url...
2: Get ip address...
>>>>Detail<<<<
URL: http://seopic.699pic.com/photo/50010/8515.jpg_wh1200.jpg
DOMAIN: seopic.699pic.com
IP: 119.38.141.130
PORT: 80
FILENAME: 8515.jpg_wh1200.jpg
---------------------------
GET http://seopic.699pic.com/photo/50010/8515.jpg_wh1200.jpg HTTP/1.1
User-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)
Accept: */*
Host:seopic.699pic.com
---------------------------
3: Connect server...
4: Send request...
---------------------------
HTTP/1.1 200 OK
Server: marco/1.6
Date: Thu, 17 Aug 2017 08:51:00 GMT
Content-Type: image/jpeg
Content-Length: 560437
Connection: keep-alive
X-Request-Id: 96aaeffc8c0ece839ba5495988d22dc5; 1f6cb296b097a0c7ec2195a20b484587
X-Source: U/304
ETag: "82e871eb8d245fef907c9e5ef8cd8809"
X-Slice-Complete-Length: 560437
Last-Modified: Thu, 06 Apr 2017 12:59:05 GMT
X-Slice-Size: 65536
Expires: Wed, 23 Aug 2017 17:02:27 GMT
Cache-Control: max-age=691200
Accept-Ranges: bytes
Age: 393184
Via: T.2424.H.1, V.mix-gd-can-008, T.141134.R.1, M.cun-gd-zhs-134
---------------------------
(242)status_code=200
5: Start thread to download...
100.00% [==================================================] 0.53/0.53MB
Download successful ^_^
主要注意两个点。
1 组织HTTP协议的应用层数据包发起请求。
2 利用服务器返回的数据格式中连续两次\r\n解析出头部信息(包含文件大小)和文件原始数据(字节流);