libcurl使用多线程下载大文件源码示例!

使用libcurl多线程下载大文件的基本思想:
首选打开文件,将文件等分为指定的片段,使用http range下载,一个线程下载一个片段,当线程下载片段时,它们将数据写到打开文件的指定位置,类似BT文件下载的方式(这样片段下载完成后不用再合并),当所有的子线程下载完成后,这个大文件也就随之下载完成了。
下面是相关源码:
//g++ -g curl_multithread_demo1.cpp -o curl_multithread_demo1 -lcurl -lpthread
//./curl_multithread_demo1
//说明: 该程序使用指定的线程数N来下载一个大文件,将该大文件等分为N+1个分片,每个线程一个分片, 使用range请求.下载完成,子线程退出, 线程数减一
//主线程等到所有的子线程都退出后, 意味着文件下载完成,就关闭文件等待退出.每个线程下载的数据存放在文件的指定位置, 1个master线程, N+1个work线程
//为啥使用互斥锁?因为它包含线程的计数, 文件的写入.这里对文件的读写比较赞, 虽然是分开下载,但是对文件没有单独存放,省去了最后的合并过程.
//

#include <iostream>
#include <string>
#include <unistd.h>
#include <pthread.h>
#include <curl/curl.h>

using namespace std;

struct tNode
{
	FILE *fp;
	long startPos;
	long endPos;
	void *curl;
	pthread_t tid;
};

int threadCnt = 0;
static pthread_mutex_t g_mutex = PTHREAD_MUTEX_INITIALIZER;

static size_t writeFunc (void *ptr, size_t size, size_t nmemb, void *userdata)
{
	tNode *node = (tNode *) userdata;
	size_t written = 0;
	pthread_mutex_lock (&g_mutex);
	if (node->startPos + size * nmemb <= node->endPos)
	{
		fseek (node->fp, node->startPos, SEEK_SET);
		written = fwrite (ptr, size, nmemb, node->fp);
		node->startPos += size * nmemb;
	}
	else
	{
		fseek (node->fp, node->startPos, SEEK_SET);
		written = fwrite (ptr, 1, node->endPos - node->startPos + 1, node->fp);
		node->startPos = node->endPos;
	}
	pthread_mutex_unlock (&g_mutex);
	return written;
}

int progressFunc (void *ptr, double totalToDownload, double nowDownloaded, double totalToUpLoad, double nowUpLoaded)
{
	int percent = 0;
	if (totalToDownload > 0)
	{
		percent = (int) (nowDownloaded / totalToDownload * 100);
	}

    if(percent % 20 == 0)
	    printf ("下载进度%0d%%\n", percent);
	return 0;
}

/************************************************************************/
/* 获取要下载的远程文件的大小 											*/
/************************************************************************/
long getDownloadFileLenth (const char *url)
{
	double downloadFileLenth = 0;
	CURL *handle = curl_easy_init ();
	curl_easy_setopt (handle, CURLOPT_URL, url);
	curl_easy_setopt (handle, CURLOPT_HEADER, 1);	//只需要header头
	curl_easy_setopt (handle, CURLOPT_NOBODY, 1);	//不需要body
	if (curl_easy_perform (handle) == CURLE_OK)
	{
		curl_easy_getinfo (handle, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &downloadFileLenth);
	}
	else
	{
		downloadFileLenth = -1;
	}
	return downloadFileLenth;
}

void *workThread (void *pData)
{
	tNode *pNode = (tNode *) pData;

	int res = curl_easy_perform (pNode->curl);

	if (res != 0)
	{

	}

	curl_easy_cleanup (pNode->curl);

	pthread_mutex_lock (&g_mutex);
	threadCnt--;
	printf ("thred %ld exit\n", pNode->tid);
	pthread_mutex_unlock (&g_mutex);
	delete pNode;
	pthread_exit (0);

	return NULL;
}

bool downLoad (int threadNum, string Url, string Path, string fileName)
{
	long fileLength = getDownloadFileLenth (Url.c_str ());

	if (fileLength <= 0)
	{
		printf ("get the file length error...");
		return false;
	}

	// Create a file to save package.
	const string outFileName = Path + fileName;
	FILE *fp = fopen (outFileName.c_str (), "wb");
	if (!fp)
	{
		return false;
	}

	long partSize = fileLength / threadNum;

	for (int i = 0; i <= threadNum; i++)
	{
		tNode *pNode = new tNode ();

		if (i < threadNum)
		{
			pNode->startPos = i * partSize;
			pNode->endPos = (i + 1) * partSize - 1;
		}
		else
		{
			if (fileLength % threadNum != 0)
			{
				pNode->startPos = i * partSize;
				pNode->endPos = fileLength - 1;
			}
			else
				break;
		}

		CURL *curl = curl_easy_init ();

		pNode->curl = curl;
		pNode->fp = fp;

		char range[64] = { 0 };
		snprintf (range, sizeof (range), "%ld-%ld", pNode->startPos, pNode->endPos);

		// Download pacakge
		curl_easy_setopt (curl, CURLOPT_URL, Url.c_str ());
		curl_easy_setopt (curl, CURLOPT_WRITEFUNCTION, writeFunc);
		curl_easy_setopt (curl, CURLOPT_WRITEDATA, (void *) pNode);
		curl_easy_setopt (curl, CURLOPT_NOPROGRESS, 0L);
		curl_easy_setopt (curl, CURLOPT_PROGRESSFUNCTION, progressFunc);
		curl_easy_setopt (curl, CURLOPT_NOSIGNAL, 1L);
		curl_easy_setopt (curl, CURLOPT_LOW_SPEED_LIMIT, 1L);
		curl_easy_setopt (curl, CURLOPT_LOW_SPEED_TIME, 5L);
		curl_easy_setopt (curl, CURLOPT_RANGE, range);

		pthread_mutex_lock (&g_mutex);
		threadCnt++;
		pthread_mutex_unlock (&g_mutex);
		int rc = pthread_create (&pNode->tid, NULL, workThread, pNode);
	}

	while (threadCnt > 0)
	{
		usleep (1000000L);
	}

	fclose (fp);

	printf ("download succed......\n");
	return true;
}

int main (int argc, char *argv[])
{
//	downLoad (10,
//		"http://101.26.37.79/ws.cdn.baidupcs.com/file/2c72878c8a5731a27f6d0a6018173520?xcode=ccef659b8500cc28f5ca86d0cbb8d4c6e6c008630559e8680b2977702d3e6764&fid=335809860-250528-463118344000947&time=1410621706&sign=FDTAXER-DCb740ccc5511e5e8fedcff06b081203-zXbBKRSs5knf%2BKll6uykeWpQoTY%3D&to=cb&fm=Nin,B,U,nc&sta_dx=105&sta_cs=98&sta_ft=mp4&sta_ct=3&newver=1&newfm=1&flow_ver=3&expires=8h&rt=pr&r=487142437&mlogid=3863405498&vuk=335809860&vbdid=140272377&fn=opclass.com-%E4%BA%92%E8%81%94%E7%BD%91%E6%97%B6%E4%BB%A3%E7%AC%AC1%E9%9B%86%EF%BC%9A%E6%97%B6%E4%BB%A3.mp4&wshc_tag=0&wsts_tag=5414610a&wsid_tag=72f52ad0&wsiphost=ipdbm",
//		"./", "Network_Age_1.mp4");
	downLoad (10,
		"http://139.209.90.30/ws.cdn.baidupcs.com/file/03f85133cb241c57cc17f5baf66b9820?xcode=e38bc1881ff679abc91893c2710fc81ac7f100988f506460837047dfb5e85c39&fid=335809860-250528-571147930138020&time=1410622082&sign=FDTAXER-DCb740ccc5511e5e8fedcff06b081203-cJtuWcK6QQghdq9RC%2F%2F4eJQ39gU%3D&to=cb&fm=Nin,B,U,nc&sta_dx=105&sta_cs=89&sta_ft=mp4&sta_ct=3&newver=1&newfm=1&flow_ver=3&expires=8h&rt=pr&r=588575374&mlogid=1816906087&vuk=335809860&vbdid=140272377&fn=opclass.com-%E4%BA%92%E8%81%94%E7%BD%91%E6%97%B6%E4%BB%A3%E7%AC%AC2%E9%9B%86%EF%BC%9A%E6%B5%AA%E6%BD%AE.mp4&wshc_tag=0&wsts_tag=54146282&wsid_tag=72f52ad0&wsiphost=ipdbm",
		"./", "Network_Age_2.mp4");
//	downLoad (10,
//		"http://121.18.230.69/ws.cdn.baidupcs.com/file/e2b36423e8f1cc4019d77598e32870f5?xcode=d60f60ab1a1746111db642272c0f9726fcfce98aadfaf01d0b2977702d3e6764&fid=335809860-250528-544759395965909&time=1410617631&sign=FDTAXER-DCb740ccc5511e5e8fedcff06b081203-eirgEMQqOKx5ssfzMt%2Ft0JEbvM0%3D&to=cb&fm=Nin,B,U,nc&sta_dx=173&sta_cs=74&sta_ft=mp4&sta_ct=3&newver=1&newfm=1&flow_ver=3&expires=8h&rt=pr&r=943427300&mlogid=3977494833&vuk=335809860&vbdid=140272377&fn=opclass.com-%E4%BA%92%E8%81%94%E7%BD%91%E6%97%B6%E4%BB%A3%E7%AC%AC10%E9%9B%86%EF%BC%9A%E7%9C%BA%E6%9C%9B.mp4&wshc_tag=0&wsts_tag=54145120&wsid_tag=72f52ad0&wsiphost=ipdbm",
//		"./", "Network_Age_10.mp4");
	//downLoad(10, "http://ardownload.adobe.com/pub/adobe/reader/win/11.x/11.0.01/en_US/AdbeRdr11001_en_US.exe", "./", "AdbeRdr11001_en_US.exe");

	getchar ();
	return 0;
}

使用方法:

在main函数中, 指定要下载的线程数, 下载大文件的url, 存放在本地的文件目录(末尾需要加/), 待存放的文件名, 这几个都写死在代码, 然后编译并运行

g++ -g curl_multithread_demo1.cpp -o curl_multithread_demo1 -lcurl -lpthread
./curl_multithread_demo1

下载后的文件目录


文件完整性验证:

为了验证代码的准确性,我选用百度云盘上的视频文件《互联网时代》纪录片来进行下载,下载完成后,我使用vlc播放器来播放,发现播放是正常的,这说明程序没有问题。

我们也可以下载Ubuntu 14.04.1官网的iso,下载完成后,使用md5check来检查下载文件的md5值与官网提供的是否相同。

要注意的问题
基本上,每个线程都应该有自己的easy handle用于数据通信(如果需要的话)。千万不要在多线程之间共享同一个easy handle。
待解决的问题:
怎样通过预生成的线程池来下载呢?

参考文献
[1].http://blog.csdn.net/zmy12007/article/details/37675331
  • 4
    点赞
  • 37
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值