关于使用curl下载网页源码中文乱码问题!

3 篇文章 0 订阅

 关于使用libcurl下载网页源码中文乱码问题!

参考了这位兄弟的:http://blog.csdn.net/malihong1/article/details/50480420,可能他没继续找到方法。

借用了http://www.cnblogs.com/iRoad/p/4105172.html的函数

直接改官网的demo https://curl.haxx.se/libcurl/c/example.html,https.c 代码如下:

  1. /*************************************************************************** 
  2.  *                                  _   _ ____  _ 
  3.  *  Project                     ___| | | |  _ \| | 
  4.  *                             / __| | | | |_) | | 
  5.  *                            | (__| |_| |  _ <| |___ 
  6.  *                             \___|\___/|_| \_\_____| 
  7.  * 
  8.  * Copyright (C) 1998 - 2015, Daniel Stenberg, <daniel@haxx.se>, et al. 
  9.  * 
  10.  * This software is licensed as described in the file COPYING, which 
  11.  * you should have received as part of this distribution. The terms 
  12.  * are also available at https://curl.haxx.se/docs/copyright.html. 
  13.  * 
  14.  * You may opt to use, copy, modify, merge, publish, distribute and/or sell 
  15.  * copies of the Software, and permit persons to whom the Software is 
  16.  * furnished to do so, under the terms of the COPYING file. 
  17.  * 
  18.  * This software is distributed on an “AS IS” basis, WITHOUT WARRANTY OF ANY 
  19.  * KIND, either express or implied. 
  20.  * 
  21.  ***************************************************************************/  
  22. /* <DESC> 
  23.  * Simple HTTPS GET 
  24.  * </DESC> 
  25.  */  
  26. // C 运行时头文件  
  27. #include <atlstr.h>  
  28. #include <WinINet.h>  
  29. #include <string>  
  30.   
  31. #include <stdio.h>  
  32. #include <curl/curl.h>  
  33. using namespace std;  
  34.   
  35.   
  36.   
  37. string UTF8ToGBK(const std::string& strUTF8)    
  38. {    
  39.     int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);    
  40.     WCHAR* wszGBK = new WCHAR[len+1];  
  41.     memset(wszGBK, 0, len * 2 + 2);    
  42.     MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)(LPCTSTR)strUTF8.c_str(), -1, wszGBK, len);    
  43.   
  44.     len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);    
  45.     char *szGBK = new char[len + 1];    
  46.     memset(szGBK, 0, len + 1);    
  47.     WideCharToMultiByte(CP_ACP,0, wszGBK, -1, szGBK, len, NULL, NULL);     
  48.     std::string strTemp(szGBK);    
  49.     delete[]szGBK;    
  50.     delete[]wszGBK;    
  51.     return strTemp;    
  52. }   
  53.   
  54.   
  55. /** 
  56. @brief  char*数据接收回调函数,仅适用stream实际类型是std::string 
  57. */  
  58. static size_t DataReceiveCallback(void *ptr, size_t size, size_t nmemb, void *stream) {  
  59.     size_t len = size * nmemb;  
  60.   
  61.     std::string* buffer = reinterpret_cast<std::string*>(stream);  
  62.     if (buffer)  
  63.         buffer->append((char*)ptr, len);  
  64.   
  65.     return len;  
  66. }  
  67.   
  68. int main(void)  
  69. {  
  70.   CURL *curl;  
  71.   CURLcode res;  
  72.   
  73.   std::string data;  
  74.   curl_global_init(CURL_GLOBAL_DEFAULT);  
  75.   
  76.   curl = curl_easy_init();  
  77.   if(curl) {  
  78.     curl_easy_setopt(curl, CURLOPT_URL, ”https://v.qq.com/x/cover/jwplwx9ootoigud.html”);  
  79.   
  80. #ifdef SKIP_PEER_VERIFICATION  
  81.     /* 
  82.      * If you want to connect to a site who isn’t using a certificate that is 
  83.      * signed by one of the certs in the CA bundle you have, you can skip the 
  84.      * verification of the server’s certificate. This makes the connection 
  85.      * A LOT LESS SECURE. 
  86.      * 
  87.      * If you have a CA cert for the server stored someplace else than in the 
  88.      * default bundle, then the CURLOPT_CAPATH option might come handy for 
  89.      * you. 
  90.      */  
  91.     curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);  
  92. #endif  
  93.   
  94. #ifdef SKIP_HOSTNAME_VERIFICATION  
  95.     /* 
  96.      * If the site you’re connecting to uses a different host name that what 
  97.      * they have mentioned in their server certificate’s commonName (or 
  98.      * subjectAltName) fields, libcurl will refuse to connect. You can skip 
  99.      * this check, but this will make the connection less secure. 
  100.      */  
  101.     curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);  
  102. #endif  
  103.   
  104.     // 响应数据回调  
  105.     curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, DataReceiveCallback);  
  106.     curl_easy_setopt(curl, CURLOPT_WRITEDATA, &data);  
  107.       
  108.   
  109.     /* Perform the request, res will get the return code */  
  110.     res = curl_easy_perform(curl);  
  111.     /* Check for errors */  
  112.     if(res != CURLE_OK)  
  113.       fprintf(stderr, ”curl_easy_perform() failed: %s\n”,  
  114.               curl_easy_strerror(res));  
  115.   
  116.     string strGBK = UTF8ToGBK(data);  
  117.     /* always cleanup */  
  118.     curl_easy_cleanup(curl);  
  119.   }  
  120.   
  121.   curl_global_cleanup();  
  122.   
  123.   return 0;  
  124. }  
/***************************************************************************
 *                                  _   _ ____  _
 *  Project                     ___| | | |  _ \| |
 *                             / __| | | | |_) | |
 *                            | (__| |_| |  _ <| |___
 *                             \___|\___/|_| \_\_____|
 *
 * Copyright (C) 1998 - 2015, Daniel Stenberg, <daniel@haxx.se>, et al.
 *
 * This software is licensed as described in the file COPYING, which
 * you should have received as part of this distribution. The terms
 * are also available at https://curl.haxx.se/docs/copyright.html.
 *
 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
 * copies of the Software, and permit persons to whom the Software is
 * furnished to do so, under the terms of the COPYING file.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ***************************************************************************/
/* <DESC>
 * Simple HTTPS GET
 * </DESC>
 */
// C 运行时头文件




include <atlstr.h>

include <WinINet.h>

include <string>

include <stdio.h>

include <curl/curl.h>

using namespace std;

string UTF8ToGBK(const std::string& strUTF8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
WCHAR* wszGBK = new WCHAR[len+1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0, (LPCSTR)(LPCTSTR)strUTF8.c_str(), -1, wszGBK, len);

len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL);  
char *szGBK = new char[len + 1];  
memset(szGBK, 0, len + 1);  
WideCharToMultiByte(CP_ACP,0, wszGBK, -1, szGBK, len, NULL, NULL);   
std::string strTemp(szGBK);  
delete[]szGBK;  
delete[]wszGBK;  
return strTemp;  

}

/**
@brief char*数据接收回调函数,仅适用stream实际类型是std::string
*/
static size_t DataReceiveCallback(void *ptr, size_t size, size_t nmemb, void *stream) {
size_t len = size * nmemb;

std::string* buffer = reinterpret_cast&lt;std::string*&gt;(stream);
if (buffer)
    buffer-&gt;append((char*)ptr, len);

return len;

}

int main(void)
{
CURL *curl;
CURLcode res;

std::string data;
curl_global_init(CURL_GLOBAL_DEFAULT);

curl = curl_easy_init();
if(curl) {
curl_easy_setopt(curl, CURLOPT_URL, “https://v.qq.com/x/cover/jwplwx9ootoigud.html“);

ifdef SKIP_PEER_VERIFICATION

/*
 * If you want to connect to a site who isn't using a certificate that is
 * signed by one of the certs in the CA bundle you have, you can skip the
 * verification of the server's certificate. This makes the connection
 * A LOT LESS SECURE.
 *
 * If you have a CA cert for the server stored someplace else than in the
 * default bundle, then the CURLOPT_CAPATH option might come handy for
 * you.
 */
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, 0L);

endif

ifdef SKIP_HOSTNAME_VERIFICATION

/*
 * If the site you're connecting to uses a different host name that what
 * they have mentioned in their server certificate's commonName (or
 * subjectAltName) fields, libcurl will refuse to connect. You can skip
 * this check, but this will make the connection less secure.
 */
curl_easy_setopt(curl, CURLOPT_SSL_VERIFYHOST, 0L);

endif

// 响应数据回调
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, DataReceiveCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &amp;data);


/* Perform the request, res will get the return code */
res = curl_easy_perform(curl);
/* Check for errors */
if(res != CURLE_OK)
  fprintf(stderr, "curl_easy_perform() failed: %s\n",
          curl_easy_strerror(res));

string strGBK = UTF8ToGBK(data);
/* always cleanup */
curl_easy_cleanup(curl);

}

curl_global_cleanup();

return 0;
}


另外因为使用了atl,会报错错误 1 error C1189: #error :  ATL requires C++ compilation (use a .cpp suffix)c:\program files (x86)\microsoft visual studio 10.0\vc\atlmfc\include\atlstr.h16

只需要经.c后缀改为cpp后缀就可以了。

我下载的curl:


官网下载curl,编译后(curl\curl-7.52.1\projects\Windows\下有win项目的解决方案),在这个目录下curl\curl-7.52.1\builds\libcurl-vc10-x86-release-dll-ipv6-sspi-winssl\ 存在三个文件夹

–bin

–curl.exe

–libcurl.dll(需要拷贝到运行目录)

include

–curl(头文件目录)

lib

–libcurl.lib

其他没怎么配置,项目需要配置引用curl路径目录,库目录。




                </div>
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值