数据传输神器cURL
Client URL Library Functions 简称cURL,客户端和服务器数据传输工具,使用URL规则进行交互。一般使用在一下几个方向:
- 网页资源(编写网页爬出,获取网页信息)
- webservice数据接口资源(动态获取接口数据,比如天气、号码归属地)
- FTP服务器里面文件资源(FTP服务器上传或下载)
- 其它资源(所有网络资源可以通过CURL下载和访问)
注意:使用cURL是要确保系统php环境支持该库。
一、cURL操作步骤
- 初始化。curl_init();
- 向服务器发送数据和接受数据。curl_exec();
- 关闭cURL。curl_close();
二、cURL常见实例
1. 简单网页爬虫(获取网页信息)
//简单网页爬虫,获取网页
$curl = curl_init('http://www.baidu.com');
curl_exec($curl);
curl_close($curl);
2. 抓取网页信息部分替换
关键设置参数CURLOPT_RETURNTRANSFER,执行之后不打印
//抓取网页并部分替换
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, 'http://www.baidu.com');
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); //执行之后不直接打印
$output = curl_exec($curl);
echo str_replace('百度','度娘',$output);
3. webservice获取天气
第一种
$data = 'theCityName=北京';
$curlobj = curl_init();
curl_setopt($curlobj, CURLOPT_URL, "http://www.webxml.com.cn/WebServices/WeatherWebService.asmx/getWeatherbyCityName");
curl_setopt($curlobj, CURLOPT_HEADER, 0);
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curlobj, CURLOPT_POST, 1);
curl_setopt($curlobj, CURLOPT_POSTFIELDS, $data);
curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8",
"Content-length: ".strlen($data)
));
$rtn = curl_exec($curlobj);
if(!curl_errno($curlobj)){
// $info = curl_getinfo($curlobj);
// print_r($info);
echo $rtn;
} else {
echo 'Curl error: ' . curl_error($curlobj);
}
curl_close($curlobj);
第二种方式
$data = sprintf('<?xml version="1.0" encoding="utf-8"?>
<soap:Envelope
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:xsd="http://www.w3.org/2001/XMLSchema"
xmlns:soap="http://schemas.xmlsoap.org/soap/envelope/">
<soap:Body>
<getWeatherbyCityName xmlns="http://WebXml.com.cn/">
<theCityName>%s</theCityName>
</getWeatherbyCityName>
</soap:Body>
</soap:Envelope>','北京');
$curlobj = curl_init();
curl_setopt($curlobj, CURLOPT_URL, "http://www.webxml.com.cn/WebServices/WeatherWebService.asmx");
curl_setopt($curlobj, CURLOPT_POST, 1);
curl_setopt($curlobj, CURLOPT_HEADER, 0);
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curlobj, CURLOPT_POSTFIELDS, $data);
curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("Content-Type: application/soap+xml; charset=utf-8",
"Content-length: ".strlen($data),
"SOAPAction:\"http://WebXml.com.cn/getWeatherbyCityName\""));
$rtn = curl_exec($curlobj);
if(!curl_errno($curlobj)){
$info = curl_getinfo($curlobj);
print_r($info);
echo "RETURN: " . $rtn;
} else {
echo 'Curl error: ' . curl_error($curlobj);
}
curl_close($curlobj);
4. cURL实现登录抓取
$data=array('username' => 'promonkey',
'password' => '1q2w3e',
'remember'=>1);
$data='username=zjzhoufy@126.com&password=1q2w3e&remember=1';
$curlobj = curl_init(); // 初始化
curl_setopt($curlobj, CURLOPT_URL, "http://www.imooc.com/user/login"); // 设置访问网页的URL
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true); // 执行之后不直接打印出来
// Cookie相关设置,这部分设置需要在所有会话开始之前设置
date_default_timezone_set('PRC'); // 使用Cookie时,必须先设置时区
curl_setopt($curlobj, CURLOPT_COOKIESESSION, TRUE);
curl_setopt($curlobj, CURLOPT_HEADER, 0);
curl_setopt($curlobj, CURLOPT_FOLLOWLOCATION, 1); // 这样能够让cURL支持页面链接跳转
curl_setopt($curlobj, CURLOPT_POST, 1);
curl_setopt($curlobj, CURLOPT_POSTFIELDS, $data);
curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8",
"Content-length: ".strlen($data)
));
curl_exec($curlobj); // 执行
curl_setopt($curlobj, CURLOPT_URL, "http://www.imooc.com/space/index");
curl_setopt($curlobj, CURLOPT_POST, 0);
curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("Content-type: text/xml"
));
$output=curl_exec($curlobj); // 执行
curl_close($curlobj); // 关闭cURL
echo $output;
5. FTP上传
$curlobj = curl_init();
$localfile = 'ftp01.php';
$fp = fopen($localfile, 'r');
curl_setopt($curlobj, CURLOPT_URL, "ftp://192.168.1.100/ftp01_uploaded.php");
curl_setopt($curlobj, CURLOPT_HEADER, 0);
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curlobj, CURLOPT_TIMEOUT, 300); // times out after 300s
curl_setopt($curlobj, CURLOPT_USERPWD, "peter.zhou:123456");//FTP用户名:密码
curl_setopt($curlobj, CURLOPT_UPLOAD, 1);
curl_setopt($curlobj, CURLOPT_INFILE, $fp);
curl_setopt($curlobj, CURLOPT_INFILESIZE, filesize($localfile));
$rtn = curl_exec($curlobj);
fclose($fp);
if(!curl_errno($curlobj)){
echo "Uploaded successfully.";
} else {
echo 'Curl error: ' . curl_error($curlobj);
}
curl_close($curlobj);
6. FTP下载
$curlobj = curl_init();
curl_setopt($curlobj, CURLOPT_URL, "ftp://192.168.1.100/downloaddemo.txt");
curl_setopt($curlobj, CURLOPT_HEADER, 0);
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($curlobj, CURLOPT_TIMEOUT, 300); // times out after 300s
curl_setopt($curlobj, CURLOPT_USERPWD, "peter.zhou:123456");//FTP用户名:密码
// Sets up the output file
$outfile = fopen('dest.txt', 'wb');//保存到本地的文件名
curl_setopt($curlobj, CURLOPT_FILE, $outfile);
$rtn = curl_exec($curlobj);
fclose($outfile);
if(!curl_errno($curlobj)){
// $info = curl_getinfo($curlobj);
// print_r($info);
echo "RETURN: " . $rtn;
} else {
echo 'Curl error: ' . curl_error($curlobj);
}
curl_close($curlobj);
7. cURL访问https资源
$curlobj = curl_init(); // 初始化
curl_setopt($curlobj, CURLOPT_URL, "https://ajax.aspnetcdn.com/ajax/jquery.validate/1.12.0/jquery.validate.js"); // 设置访问网页的URL
curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true); // 执行之后不直接打印出来
// 设置HTTPS支持
date_default_timezone_set('PRC'); // 使用Cookie时,必须先设置时区
curl_setopt($curlobj, CURLOPT_SSL_VERIFYPEER, 0); // 对认证证书来源的检查从证书中检查SSL加密算法是否存在
curl_setopt($curlobj, CURLOPT_SSL_VERIFYHOST, 2); //
$output=curl_exec($curlobj); // 执行
curl_close($curlobj); // 关闭cURL
echo $output;