使用libcurl多线程下载大文件的基本思想:
首选打开文件,将文件等分为指定的片段,使用http range下载,一个线程下载一个片段,当线程下载片段时,它们将数据写到打开文件的指定位置,类似BT文件下载的方式(这样片段下载完成后不用再合并),当所有的子线程下载完成后,这个大文件也就随之下载完成了。
下面是相关源码:
基本上,每个线程都应该有自己的easy handle用于数据通信(如果需要的话)。千万不要在多线程之间共享同一个easy handle。
待解决的问题:
怎样通过预生成的线程池来下载呢?
参考文献
[1].http://blog.csdn.net/zmy12007/article/details/37675331
首选打开文件,将文件等分为指定的片段,使用http range下载,一个线程下载一个片段,当线程下载片段时,它们将数据写到打开文件的指定位置,类似BT文件下载的方式(这样片段下载完成后不用再合并),当所有的子线程下载完成后,这个大文件也就随之下载完成了。
下面是相关源码:
//g++ -g curl_multithread_demo1.cpp -o curl_multithread_demo1 -lcurl -lpthread
//./curl_multithread_demo1
//说明: 该程序使用指定的线程数N来下载一个大文件,将该大文件等分为N+1个分片,每个线程一个分片, 使用range请求.下载完成,子线程退出, 线程数减一
//主线程等到所有的子线程都退出后, 意味着文件下载完成,就关闭文件等待退出.每个线程下载的数据存放在文件的指定位置, 1个master线程, N+1个work线程
//为啥使用互斥锁?因为它包含线程的计数, 文件的写入.这里对文件的读写比较赞, 虽然是分开下载,但是对文件没有单独存放,省去了最后的合并过程.
//
#include <iostream>
#include <string>
#include <unistd.h>
#include <pthread.h>
#include <curl/curl.h>
using namespace std;
struct tNode
{
FILE *fp;
long startPos;
long endPos;
void *curl;
pthread_t tid;
};
int threadCnt = 0;
static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;
static size_t writeFunc (void *ptr, size_t size, size_t nmemb, void *userdata)
{
tNode *node = (tNode *) userdata;
size_t written = 0;
pthread_mutex_lock (&g_mutex);
if (node->startPos + size * nmemb <= node->endPos)
{
fseek (node->fp, node->startPos, SEEK_SET);
written = fwrite (ptr, size, nmemb, node->fp);
node->startPos += size * nmemb;
}
else
{
fseek (node->fp, node->startPos, SEEK_SET);
written = fwrite (ptr, 1, node->endPos - node->startPos + 1, node->fp);
node->startPos = node->endPos;
}
pthread_mutex_unlock (&g_mutex);
return written;
}
int progressFunc (void *ptr, double totalToDownload, double nowDownloaded, double totalToUpLoad, double nowUpLoaded)
{
int percent = 0;
if (totalToDownload > 0)
{
percent = (int) (nowDownloaded / totalToDownload * 100);
}
if(percent % 20 == 0)
printf ("下载进度%0d%%\n", percent);
return 0;
}
/************************************************************************/
/* 获取要下载的远程文件的大小 */
/************************************************************************/
long getDownloadFileLenth (const char *url)
{
double downloadFileLenth = 0;
CURL *handle = curl_easy_init ();
curl_easy_setopt (handle, CURLOPT_URL, url);
curl_easy_setopt (handle, CURLOPT_HEADER, 1); //只需要header头
curl_easy_setopt (handle, CURLOPT_NOBODY, 1); //不需要body
if (curl_easy_perform (handle) == CURLE_OK)
{
curl_easy_getinfo (handle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &downloadFileLenth);
}
else
{
downloadFileLenth = -1;
}
return downloadFileLenth;
}
void *workThread (void *pData)
{
tNode *pNode = (tNode *) pData;
int res = curl_easy_perform (pNode->curl);
if (res != 0)
{
}
curl_easy_cleanup (pNode->curl);
pthread_mutex_lock (&g_mutex);
threadCnt--;
printf ("thred %ld exit\n", pNode->tid);
pthread_mutex_unlock (&g_mutex);
delete pNode;
pthread_exit (0);
return NULL;
}
bool downLoad (int threadNum, string Url, string Path, string fileName)
{
long fileLength = getDownloadFileLenth (Url.c_str ());
if (fileLength <= 0)
{
printf ("get the file length error...");
return false;
}
// Create a file to save package.
const string outFileName = Path + fileName;
FILE *fp = fopen (outFileName.c_str (), "wb");
if (!fp)
{
return false;
}
long partSize = fileLength / threadNum;
for (int i = 0; i <= threadNum; i++)
{
tNode *pNode = new tNode ();
if (i < threadNum)
{
pNode->startPos = i * partSize;
pNode->endPos = (i + 1) * partSize - 1;
}
else
{
if (fileLength % threadNum != 0)
{
pNode->startPos = i * partSize;
pNode->endPos = fileLength - 1;
}
else
break;
}
CURL *curl = curl_easy_init ();
pNode->curl = curl;
pNode->fp = fp;
char range[64] = { 0 };
snprintf (range, sizeof (range), "%ld-%ld", pNode->startPos, pNode->endPos);
// Download pacakge
curl_easy_setopt (curl, CURLOPT_URL, Url.c_str ());
curl_easy_setopt (curl, CURLOPT_WRITEFUNCTION, writeFunc);
curl_easy_setopt (curl, CURLOPT_WRITEDATA, (void *) pNode);
curl_easy_setopt (curl, CURLOPT_NOPROGRESS, 0L);
curl_easy_setopt (curl, CURLOPT_PROGRESSFUNCTION, progressFunc);
curl_easy_setopt (curl, CURLOPT_NOSIGNAL, 1L);
curl_easy_setopt (curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
curl_easy_setopt (curl, CURLOPT_LOW_SPEED_TIME, 5L);
curl_easy_setopt (curl, CURLOPT_RANGE, range);
pthread_mutex_lock (&g_mutex);
threadCnt++;
pthread_mutex_unlock (&g_mutex);
int rc = pthread_create (&pNode->tid, NULL, workThread, pNode);
}
while (threadCnt > 0)
{
usleep (1000000L);
}
fclose (fp);
printf ("download succed......\n");
return true;
}
int main (int argc, char *argv[])
{
// downLoad (10,
// "http://101.26.37.79/ws.cdn.baidupcs.com/file/2c72878c8a5731a27f6d0a6018173520?xcode=ccef659b8500cc28f5ca86d0cbb8d4c6e6c008630559e8680b2977702d3e6764&fid=335809860-250528-463118344000947&time=1410621706&sign=FDTAXER-DCb740ccc5511e5e8fedcff06b081203-zXbBKRSs5knf%2BKll6uykeWpQoTY%3D&to=cb&fm=Nin,B,U,nc&sta_dx=105&sta_cs=98&sta_ft=mp4&sta_ct=3&newver=1&newfm=1&flow_ver=3&expires=8h&rt=pr&r=487142437&mlogid=3863405498&vuk=335809860&vbdid=140272377&fn=opclass.com-%E4%BA%92%E8%81%94%E7%BD%91%E6%97%B6%E4%BB%A3%E7%AC%AC1%E9%9B%86%EF%BC%9A%E6%97%B6%E4%BB%A3.mp4&wshc_tag=0&wsts_tag=5414610a&wsid_tag=72f52ad0&wsiphost=ipdbm",
// "./", "Network_Age_1.mp4");
downLoad (10,
"http://139.209.90.30/ws.cdn.baidupcs.com/file/03f85133cb241c57cc17f5baf66b9820?xcode=e38bc1881ff679abc91893c2710fc81ac7f100988f506460837047dfb5e85c39&fid=335809860-250528-571147930138020&time=1410622082&sign=FDTAXER-DCb740ccc5511e5e8fedcff06b081203-cJtuWcK6QQghdq9RC%2F%2F4eJQ39gU%3D&to=cb&fm=Nin,B,U,nc&sta_dx=105&sta_cs=89&sta_ft=mp4&sta_ct=3&newver=1&newfm=1&flow_ver=3&expires=8h&rt=pr&r=588575374&mlogid=1816906087&vuk=335809860&vbdid=140272377&fn=opclass.com-%E4%BA%92%E8%81%94%E7%BD%91%E6%97%B6%E4%BB%A3%E7%AC%AC2%E9%9B%86%EF%BC%9A%E6%B5%AA%E6%BD%AE.mp4&wshc_tag=0&wsts_tag=54146282&wsid_tag=72f52ad0&wsiphost=ipdbm",
"./", "Network_Age_2.mp4");
// downLoad (10,
// "http://121.18.230.69/ws.cdn.baidupcs.com/file/e2b36423e8f1cc4019d77598e32870f5?xcode=d60f60ab1a1746111db642272c0f9726fcfce98aadfaf01d0b2977702d3e6764&fid=335809860-250528-544759395965909&time=1410617631&sign=FDTAXER-DCb740ccc5511e5e8fedcff06b081203-eirgEMQqOKx5ssfzMt%2Ft0JEbvM0%3D&to=cb&fm=Nin,B,U,nc&sta_dx=173&sta_cs=74&sta_ft=mp4&sta_ct=3&newver=1&newfm=1&flow_ver=3&expires=8h&rt=pr&r=943427300&mlogid=3977494833&vuk=335809860&vbdid=140272377&fn=opclass.com-%E4%BA%92%E8%81%94%E7%BD%91%E6%97%B6%E4%BB%A3%E7%AC%AC10%E9%9B%86%EF%BC%9A%E7%9C%BA%E6%9C%9B.mp4&wshc_tag=0&wsts_tag=54145120&wsid_tag=72f52ad0&wsiphost=ipdbm",
// "./", "Network_Age_10.mp4");
//downLoad(10, "http://ardownload.adobe.com/pub/adobe/reader/win/11.x/11.0.01/en_US/AdbeRdr11001_en_US.exe", "./", "AdbeRdr11001_en_US.exe");
getchar ();
return 0;
}
使用方法:
在main函数中, 指定要下载的线程数, 下载大文件的url, 存放在本地的文件目录(末尾需要加/), 待存放的文件名, 这几个都写死在代码, 然后编译并运行
g++ -g curl_multithread_demo1.cpp -o curl_multithread_demo1 -lcurl -lpthread
./curl_multithread_demo1
下载后的文件目录
文件完整性验证:
为了验证代码的准确性,我选用百度云盘上的视频文件《互联网时代》纪录片来进行下载,下载完成后,我使用vlc播放器来播放,发现播放是正常的,这说明程序没有问题。
我们也可以下载Ubuntu 14.04.1官网的iso,下载完成后,使用md5check来检查下载文件的md5值与官网提供的是否相同。
要注意的问题基本上,每个线程都应该有自己的easy handle用于数据通信(如果需要的话)。千万不要在多线程之间共享同一个easy handle。
待解决的问题:
怎样通过预生成的线程池来下载呢?
参考文献
[1].http://blog.csdn.net/zmy12007/article/details/37675331