C实现PHP扩展《Fetch_Url》类数据抓取

48 篇文章 0 订阅
该扩展是基于libcurl基础实现的网页数据抓取。

一、类文档说明

class FetchUrl{
	function __construct();

	//返回网页内容 常用于fetch()方法返回false时
	function body();

	//将对象的数据重新初始化,用于多次重用一个FetchUrl对象
	function clean();

	//返回错误信息
	function errmsg();

	//返回错误码,>0表示有错误
	function errcode();

	/**
	* 发起请求
	* $url string 请求地址
	* $callback function 匿名函数
	*/
	function fetch(string $url, function $callback);

	//请求返回HTTP Code
	function httpCode();

	//请求返回Cookies数组
	function responseCookies();

	//请求返回头部信息数组
	function responseHeaders();

	//是否允许截断,默认为不允许
	function setAllowRedirect(bool $allow=false);

	//设置连接超时时间
	function setConnectTimeout(int $seconds=5);

	//在发起的请求中,添加cookie数据
	function setCookie(string $name, string $value);

	//在发起的请求中,批量添加cookie数据
	function setCookies(array $cookies);

	//设置请求的方法(POST/GET)
	function setMethod(string $method="get");

	//设置POST方法的数据
	function setPostData(array $data);

	//设置读取超时时间
	function setReadTimeout(int $seconds=60);

	function __destroy();
}

二、使用案例

<?php
/*GET抓取http://www.baidu.com*/
/*
$fetch_url = new FetchUrl();
$fetch_url->setAllowRedirect(true);
$fetch_url->fetch('http://www.baidu.com');
*/

$cookies = array(
	'wei_xin_wb_session'=>'value',
	'wei_xin_wxblog_authcoder'=>'value');

/*POST提交数据*/
/*
$fetch_url = new FetchUrl();
$fetch_url->setMethod('post');
$data  = array(
	'step'=>2,
	'pays[1]'=>0,
	'pays[2]'=>0,
	'pays[3]'=>0
);
$fetch_url->setCookies($cookies);
$fetch_url->setPostData($data);
$fetch_url->fetch('http://test.wx.pp.cc/wb_advs/manage?inajax=1');
*/

//POST上传数据和文件
$fetch_url = new FetchUrl();
$fetch_url->setAllowRedirect(true);
$fetch_url->setMethod('post');
$data = array(
	'nickname'=>'挺好a',
	'wxnickname'=>'good',
	'wxusername'=>'good',
	'intro'=>'good'
);

$fetch_url->setCookies($cookies);
$fetch_url->setPostData($data);
$binary = file_get_contents("http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.gif");
$fetch_url->setBinary("picfile", "demo.jpg", $binary);//上传二进制文件
// $fetch_url->setFile("picfile", "C:/Users/Administrator/Desktop/123.jpg");//上传指定文件

if($fetch_url->errcode() == 0){
	$fetch_url->fetch('http://wx.pp.cc/wb_ajax/addwxuser/0');
	if($fetch_url->httpCode() == 200){
		$html = $fetch_url->body();
		echo $html;
	}
}else{
	echo "errmsg:".$fetch_url->errmsg().", errcode:".$fetch_url->errcode();
}

//返回请求头部信息
print_r($fetch_url->responseHeaders());

//清空之前的请求设置,复用$fetch_url。
$fetch_url->clean();


$fetch_url->fetch("http://www.baidu.com");
print_r($fetch_url->responseHeaders());

三、扩展实现

1.php_fetch_url.h
/*
  +----------------------------------------------------------------------+
  | PHP Version 5                                                        |
  +----------------------------------------------------------------------+
  | Copyright (c) 1997-2012 The PHP Group                                |
  +----------------------------------------------------------------------+
  | This source file is subject to version 3.01 of the PHP license,      |
  | that is bundled with this package in the file LICENSE, and is        |
  | available through the world-wide-web at the following url:           |
  | http://www.php.net/license/3_01.txt                                  |
  | If you did not receive a copy of the PHP license and are unable to   |
  | obtain it through the world-wide-web, please send a note to          |
  | license@php.net so we can mail you a copy immediately.               |
  +----------------------------------------------------------------------+
  | Author:                                                              |
  +----------------------------------------------------------------------+
*/

/* $Id$ */

#ifndef PHP_FETCH_URL_H
#define PHP_FETCH_URL_H

extern zend_module_entry fetch_url_module_entry;
#define phpext_fetch_url_ptr &fetch_url_module_entry

#ifdef PHP_WIN32
#	define PHP_FETCH_URL_API __declspec(dllexport)
#elif defined(__GNUC__) && __GNUC__ >= 4
#	define PHP_FETCH_URL_API __attribute__ ((visibility("default")))
#else
#	define PHP_FETCH_URL_API
#endif

#ifdef PHP_WIN32
  #define FETCH_CURL_MODE CURL_GLOBAL_WIN32
#else
  #define FETCH_CURL_MODE CURL_GLOBAL_ALL
#endif

#ifdef ZTS
#include "TSRM.h"
#endif

#define FETCH_CLASS_NAME  "FetchUrl"
#define FETCH_CLASS_CE    g_fetch_ce
#define FETCH_THIS        Z_OBJCE_P(getThis()), getThis()
#define FETCH_ERROR(errmsg, errno) zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), errmsg, sizeof(errmsg)-1 TSRMLS_CC);\
                                    zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), errno TSRMLS_CC)

PHP_MINIT_FUNCTION(fetch_url);
PHP_MSHUTDOWN_FUNCTION(fetch_url);
PHP_RINIT_FUNCTION(fetch_url);
PHP_RSHUTDOWN_FUNCTION(fetch_url);
PHP_MINFO_FUNCTION(fetch_url);

#ifdef ZTS
#define FETCH_URL_G(v) TSRMG(fetch_url_globals_id, zend_fetch_url_globals *, v)
#else
#define FETCH_URL_G(v) (fetch_url_globals.v)
#endif

#endif	/* PHP_FETCH_URL_H */


2.fetch_url.c
/*
  +----------------------------------------------------------------------+
  | PHP Version 5                                                        |
  +----------------------------------------------------------------------+
  | Copyright (c) 1997-2012 The PHP Group                                |
  +----------------------------------------------------------------------+
  | This source file is subject to version 3.01 of the PHP license,      |
  | that is bundled with this package in the file LICENSE, and is        |
  | available through the world-wide-web at the following url:           |
  | http://www.php.net/license/3_01.txt                                  |
  | If you did not receive a copy of the PHP license and are unable to   |
  | obtain it through the world-wide-web, please send a note to          |
  | license@php.net so we can mail you a copy immediately.               |
  +----------------------------------------------------------------------+
  | Author:                                                              |
  +----------------------------------------------------------------------+
*/

/* $Id$ */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "php.h"
#include "php_ini.h"
#include "main/SAPI.h"
#include "Zend/zend_interfaces.h"
#include "ext/standard/info.h"
#include "ext/standard/php_var.h"
#include "ext/standard/php_string.h"
#include "ext/standard/php_smart_str.h"
#include "ext/standard/url.h"
#include "ext/pcre/php_pcre.h"
#include "php_fetch_url.h"
#include <curl/curl.h>

zend_class_entry *g_fetch_ce;

ZEND_BEGIN_ARG_INFO_EX(void_arginfo, 0, 0, 0)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(fetch_arginfo, 0, 0, 1)
	ZEND_ARG_INFO(0, url)
	ZEND_ARG_INFO(0, callback)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(responseCookies_arginfo, 0, 0, 0)
	ZEND_ARG_INFO(0, all)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(responseHeaders_arginfo, 0, 0, 0)
	ZEND_ARG_INFO(0, parse)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setAllowRedirect_arginfo, 0, 0, 0)
	ZEND_ARG_INFO(0, allow)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setConnectTimeout_arginfo, 0, 0, 0)
	ZEND_ARG_INFO(0, ms)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setCookie_arginfo, 0, 0, 2)
	ZEND_ARG_INFO(0, name)
	ZEND_ARG_INFO(0, value)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setCookies_arginfo, 0, 0, 1)
	ZEND_ARG_INFO(0, cookies)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setHeader_arginfo, 0, 0, 2)
	ZEND_ARG_INFO(0, name)
	ZEND_ARG_INFO(0, value)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setMethod_arginfo, 0, 0, 1)
	ZEND_ARG_INFO(0, method)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setPostData, 0, 0, 1)
	ZEND_ARG_INFO(0, post_data)
	ZEND_ARG_INFO(0, multil)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setReadTimeout_arginfo, 0, 0, 0)
	ZEND_ARG_INFO(0, ms)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setBinary_arginfo, 0, 0, 3)
	ZEND_ARG_INFO(0, post_filed)
	ZEND_ARG_INFO(0, uploadfile_name)
	ZEND_ARG_INFO(0, url)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setFile_arginfo, 0, 0, 2)
	ZEND_ARG_INFO(0, post_filed)
	ZEND_ARG_INFO(0, path)
ZEND_END_ARG_INFO()

ZEND_METHOD(fetch_url, __construct){

}

ZEND_METHOD(fetch_url, setBinary){
	zval *input_filed_name, *binary_data, *uploadfile_name;
	zval *g_binary_data, *item_data;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz", &input_filed_name, &uploadfile_name, &binary_data) == FAILURE){
		RETURN_FALSE;
	}

	if(Z_TYPE_P(input_filed_name) != IS_STRING || Z_TYPE_P(uploadfile_name) != IS_STRING || Z_TYPE_P(binary_data) != IS_STRING){
		RETURN_FALSE;
	}

	g_binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);

	if(Z_TYPE_P(g_binary_data) == IS_NULL){
		MAKE_STD_ZVAL(g_binary_data);
		array_init(g_binary_data);
	}

	MAKE_STD_ZVAL(item_data);
	array_init(item_data);

	add_index_stringl(item_data, 0, Z_STRVAL_P(uploadfile_name), Z_STRLEN_P(uploadfile_name), 1);
	add_index_stringl(item_data, 1, Z_STRVAL_P(binary_data), Z_STRLEN_P(binary_data), 1);

	add_assoc_zval(g_binary_data, Z_STRVAL_P(input_filed_name), item_data);

	zend_update_property(FETCH_THIS, ZEND_STRL("binary_data"), g_binary_data TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setFile){
	zval *file_path, *input_filed_name;
	zval *upload_filepaths;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &input_filed_name, &file_path) == FAILURE){
		RETURN_FALSE;
	}

	if(Z_TYPE_P(file_path) != IS_STRING || Z_TYPE_P(input_filed_name) != IS_STRING){
		RETURN_FALSE;
	}

	upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);

	if(Z_TYPE_P(upload_filepaths) == IS_NULL){
		MAKE_STD_ZVAL(upload_filepaths);
		array_init(upload_filepaths);
	}

	add_assoc_stringl(upload_filepaths, Z_STRVAL_P(input_filed_name), Z_STRVAL_P(file_path), Z_STRLEN_P(file_path), 1);

	zend_update_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), upload_filepaths TSRMLS_CC);
}

ZEND_METHOD(fetch_url, body){
	zval *zval_body;

	zval_body = zend_read_property(FETCH_THIS, ZEND_STRL("body"), 0 TSRMLS_CC);

	RETURN_STRINGL(Z_STRVAL_P(zval_body), Z_STRLEN_P(zval_body), 1);
}

ZEND_METHOD(fetch_url, clean){
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), ZEND_STRL("") TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), ZEND_STRL("") TSRMLS_CC);
	zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
	zend_update_property_null(FETCH_THIS, ZEND_STRL("httpCode") TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), ZEND_STRL("") TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), ZEND_STRL("") TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("send_headers"), ZEND_STRL("") TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), ZEND_STRL("") TSRMLS_CC);
	zend_update_property_null(FETCH_THIS, ZEND_STRL("binary_data") TSRMLS_CC);
	zend_update_property_null(FETCH_THIS, ZEND_STRL("upload_filepaths") TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);
}

ZEND_METHOD(fetch_url, errmsg){
	zval *errmsg = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);

	RETURN_STRINGL(Z_STRVAL_P(errmsg), Z_STRLEN_P(errmsg), 1);
}

ZEND_METHOD(fetch_url, errcode){
	zval *err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);

	RETURN_LONG(Z_LVAL_P(err_no));
}

static size_t read_data(void *buffer, size_t size, size_t nmemb, void *data){
	smart_str *content = (smart_str*)data;

	smart_str_appendl(content, buffer, size*nmemb);

	return size*nmemb;
}

ZEND_METHOD(fetch_url, fetch){
	CURLcode return_code;
	CURL *curl_handler;
	struct curl_slist *http_headers = NULL;
	zval *url, *callback, *cookies, *connect_timeout, *allow_redirect, *method, *post_data, *read_timeout,
		 *send_headers, *err_no, *errmsg, *binary_data, *upload_filepaths;
	smart_str body_str = {0}, header_str = {0};
	struct curl_httppost *post = NULL;
	struct curl_httppost *last = NULL;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &url, &callback) == FAILURE){
		RETURN_FALSE;
	}

	err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
	errmsg  = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);

	if(Z_LVAL_P(err_no) > 0){
		php_printf("errno:%d, errmsg:%s", Z_LVAL_P(err_no), Z_STRVAL_P(errmsg));
		RETURN_FALSE;
	}

	if(Z_TYPE_P(url) != IS_STRING){
		FETCH_ERROR("fetch url must be string.", 500);
		RETURN_FALSE;
	}

	return_code = curl_global_init(FETCH_CURL_MODE);

	if(return_code != CURLE_OK){
		curl_global_cleanup();
		FETCH_ERROR("curl init failed.", 500);
		RETURN_FALSE;
	}

	curl_handler = curl_easy_init();

	if(NULL == curl_handler){
		curl_easy_cleanup(curl_handler);
		curl_global_cleanup();
		FETCH_ERROR("get curl handler failed.", 500);
		RETURN_FALSE;
	}

	cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
	connect_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("connect_timeout"), 0 TSRMLS_CC);
	read_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("read_timeout"), 0 TSRMLS_CC);
	allow_redirect = zend_read_property(FETCH_THIS, ZEND_STRL("allow_redirect"), 0 TSRMLS_CC);
	method = zend_read_property(FETCH_THIS, ZEND_STRL("method"), 0 TSRMLS_CC);
	post_data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);
	send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);
	binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);
	upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);


	curl_easy_setopt(curl_handler, CURLOPT_URL, Z_STRVAL_P(url));
	curl_easy_setopt(curl_handler, CURLOPT_COOKIE, Z_STRVAL_P(cookies));
	curl_easy_setopt(curl_handler, CURLOPT_WRITEFUNCTION, &read_data);
	curl_easy_setopt(curl_handler, CURLOPT_WRITEDATA, &body_str);
	curl_easy_setopt(curl_handler, CURLOPT_HEADERDATA, &header_str);
	curl_easy_setopt(curl_handler, CURLOPT_HEADERFUNCTION, &read_data);
	curl_easy_setopt(curl_handler, CURLOPT_TIMEOUT, Z_LVAL_P(read_timeout));
	curl_easy_setopt(curl_handler, CURLOPT_CONNECTTIMEOUT, Z_LVAL_P(connect_timeout));
	curl_easy_setopt(curl_handler, CURLOPT_AUTOREFERER, Z_LVAL_P(allow_redirect));
	curl_easy_setopt(curl_handler, CURLOPT_MAXREDIRS, 5);

	if(strcmp(Z_STRVAL_P(method), "get") == 0){
		curl_easy_setopt(curl_handler, CURLOPT_HTTPGET, 1);
	}else{
		if(Z_TYPE_P(binary_data) != IS_NULL || Z_TYPE_P(upload_filepaths) != IS_NULL){
			zval *delim, *post_arr, *delim_equal;

			MAKE_STD_ZVAL(delim_equal);
			MAKE_STD_ZVAL(delim);
			MAKE_STD_ZVAL(post_arr);

			ZVAL_STRING(delim, "&", 1);
			ZVAL_STRING(delim_equal, "=", 1);
			array_init(post_arr);
			php_explode(delim, post_data, post_arr, LONG_MAX);

			for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(post_arr));
				zend_hash_has_more_elements(Z_ARRVAL_P(post_arr)) == SUCCESS;
				zend_hash_move_forward(Z_ARRVAL_P(post_arr))){
				zval **data_str;
				zval *temp_data, **post_data_name, **post_data_value, *temp_zval;
				if(zend_hash_get_current_data(Z_ARRVAL_P(post_arr), (void**)&data_str) == FAILURE){
					continue;
				}

				if(Z_STRLEN_PP(data_str) > 0){
					MAKE_STD_ZVAL(temp_data);
					array_init(temp_data);
					temp_zval = *data_str;

					php_explode(delim_equal, temp_zval, temp_data, LONG_MAX);
					
					zend_hash_index_find(Z_ARRVAL_P(temp_data), 0, (void**)&post_data_name);
					zend_hash_index_find(Z_ARRVAL_P(temp_data), 1, (void**)&post_data_value);

					curl_formadd(&post, &last, CURLFORM_COPYNAME, Z_STRVAL_PP(post_data_name), CURLFORM_COPYCONTENTS, Z_STRVAL_PP(post_data_value), CURLFORM_END);

					zval_dtor(temp_data);
				}
			}

			zval_dtor(post_arr);
			zval_dtor(delim);
			zval_dtor(delim_equal);

			if(Z_TYPE_P(binary_data) != IS_NULL)
			for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(binary_data));
				zend_hash_has_more_elements(Z_ARRVAL_P(binary_data)) == SUCCESS;
				zend_hash_move_forward(Z_ARRVAL_P(binary_data))){
				char *input_file_name;
				uint input_file_name_len;
				ulong idx;
				zval **item_data;
				zval **upload_binary_data;
				zval **uploadfile_name;

				if(zend_hash_get_current_key_ex(Z_ARRVAL_P(binary_data), &input_file_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
					continue;
				}

				if(zend_hash_get_current_data(Z_ARRVAL_P(binary_data), (void**)&item_data) == FAILURE){
					continue;
				} 

				zend_hash_index_find(Z_ARRVAL_PP(item_data), 0, (void**)&uploadfile_name);
				zend_hash_index_find(Z_ARRVAL_PP(item_data), 1, (void**)&upload_binary_data);

				curl_formadd(&post, 
							&last, 
							CURLFORM_COPYNAME, 
							input_file_name, 
							CURLFORM_BUFFER, 
							Z_STRVAL_PP(uploadfile_name), //todo:setBinary需要传递文件名参数
							CURLFORM_BUFFERPTR,
							Z_STRVAL_PP(upload_binary_data), 
							CURLFORM_BUFFERLENGTH, 
							Z_STRLEN_PP(upload_binary_data), 
							CURLFORM_END
				);
			}

			if(Z_TYPE_P(upload_filepaths) != IS_NULL)
			for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(upload_filepaths));
				zend_hash_has_more_elements(Z_ARRVAL_P(upload_filepaths)) == SUCCESS;
				zend_hash_move_forward(Z_ARRVAL_P(upload_filepaths))){
				char *input_filed_name;
				uint input_file_name_len;
				ulong idx;
				zval **file_path;

				if(zend_hash_get_current_key_ex(Z_ARRVAL_P(upload_filepaths), &input_filed_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
					continue;
				}

				if(zend_hash_get_current_data(Z_ARRVAL_P(upload_filepaths), (void**)&file_path) == FAILURE){
					continue;
				}

				curl_formadd(&post, &last, CURLFORM_COPYNAME, input_filed_name, CURLFORM_FILE, Z_STRVAL_PP(file_path), CURLFORM_END);
			}

			curl_easy_setopt(curl_handler, CURLOPT_HTTPPOST, post);
			http_headers = curl_slist_append(http_headers, estrdup("Expect:"));//防止出现HTTP 100跳转
		}else{
			curl_easy_setopt(curl_handler, CURLOPT_POSTFIELDS, Z_STRVAL_P(post_data));
			curl_easy_setopt(curl_handler, CURLOPT_POST, 1);
		}
	}

	if(Z_TYPE_P(send_headers) == IS_ARRAY && zend_hash_num_elements(Z_ARRVAL_P(send_headers)) > 0){

		for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(send_headers));
			zend_hash_has_more_elements(Z_ARRVAL_P(send_headers)) == SUCCESS;
			zend_hash_move_forward(Z_ARRVAL_P(send_headers))){
			char *header_key;
			uint header_keylen;
			ulong idx;
			zval **header_val;
			smart_str impl_headers = {0};

			if(zend_hash_get_current_key_ex(Z_ARRVAL_P(send_headers), &header_key, &header_keylen, &idx, 0, NULL) != HASH_KEY_IS_STRING){
				continue;
			}

			if(zend_hash_get_current_data(Z_ARRVAL_P(send_headers), (void**)&header_val) == FAILURE){
				continue;
			}

			smart_str_appendl(&impl_headers, header_key, header_keylen);
			smart_str_appendl(&impl_headers, ": ", 2);
			smart_str_appendl(&impl_headers, Z_STRVAL_PP(header_val), Z_STRLEN_PP(header_val));
			http_headers = curl_slist_append(http_headers, impl_headers.c);
		}
	}

	curl_easy_setopt(curl_handler, CURLOPT_HTTPHEADER, http_headers);

	curl_easy_perform(curl_handler);

	curl_slist_free_all(http_headers);
	curl_formfree(post);
	curl_easy_cleanup(curl_handler);
	curl_global_cleanup();

	smart_str_0(&body_str);
	smart_str_0(&header_str);

	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), header_str.c, header_str.len TSRMLS_CC);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), body_str.c, body_str.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, httpCode){
	pcre_cache_entry *pce;
	zval *headers;
	zval *result_match, *match_long, **http_code;
	char *regex = estrdup("/^HTTP\\/1\\.1\\s(.*)\\sOK/");

	if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){
		RETURN_FALSE;
	}

	MAKE_STD_ZVAL(result_match);
	MAKE_STD_ZVAL(match_long);

	headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

	php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 0, 0, 0, 0 TSRMLS_CC);

	if(Z_LVAL_P(match_long) > 0){
		if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&http_code) == FAILURE){
			RETURN_FALSE;
		}else{
			RETURN_STRINGL(Z_STRVAL_PP(http_code), Z_STRLEN_PP(http_code), 0);
		}
	}else{
		RETURN_FALSE;
	}
}

ZEND_METHOD(fetch_url, responseCookies){
	pcre_cache_entry *pce;
	zval *headers, *result_match, *match_long;
	char *regex = estrdup("/Set-Cookie:\\s(.*?);/");

	headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

	array_init(return_value);

	if(Z_STRLEN_P(headers) > 0){
		if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){
			RETURN_NULL();
		}

		MAKE_STD_ZVAL(result_match);
		MAKE_STD_ZVAL(match_long);

		//void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
		php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 1, 0, 0, 0 TSRMLS_CC);
		
		if(Z_LVAL_P(match_long) > 0){
			zval **result;
			HashTable *result_ht;
			char *found = NULL;
			long found_offset;
			char *cookie_name;
			char *cookie_value;

			if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&result) != FAILURE){
				result_ht = Z_ARRVAL_PP(result);

				for(zend_hash_internal_pointer_reset(result_ht);
					zend_hash_has_more_elements(result_ht) == SUCCESS;
					zend_hash_move_forward(result_ht)){
					zval **tmpzval;

					if(zend_hash_get_current_data(result_ht, (void**)&tmpzval) == FAILURE){
						continue;
					}

					found = php_memnstr(Z_STRVAL_PP(tmpzval), "=", 1, Z_STRVAL_PP(tmpzval) + Z_STRLEN_PP(tmpzval));
					found_offset = found - Z_STRVAL_PP(tmpzval);

					cookie_name = estrndup(Z_STRVAL_PP(tmpzval), found_offset);
					cookie_value= estrndup(found+1, strlen(found)-1);

					add_assoc_stringl(return_value, cookie_name, cookie_value, strlen(cookie_value), 1);

					efree(cookie_name);
					efree(cookie_value);
				}
			}
		}
	}else{
		RETURN_NULL();
	}
}

ZEND_METHOD(fetch_url, responseHeaders){
	zval *headers, *delim;
	uint idx;

	headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

	MAKE_STD_ZVAL(delim);

	array_init(return_value);

	ZVAL_STRING(delim, "\r\n", 1);

	php_explode(delim, headers, return_value, LONG_MAX);

	idx = zend_hash_num_elements(Z_ARRVAL_P(return_value));

	zend_hash_index_del(Z_ARRVAL_P(return_value), idx-1);

	zend_hash_index_del(Z_ARRVAL_P(return_value), idx-2);

	zval_dtor(delim);
}

ZEND_METHOD(fetch_url, setAllowRedirect){
	zval *allow;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &allow) == FAILURE){
		RETURN_FALSE;
	}

	convert_to_long(allow);

	zend_update_property_long(FETCH_THIS, ZEND_STRL("allow_redirect"), Z_LVAL_P(allow) TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setConnectTimeout){
	zval *connect_timeout;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &connect_timeout) == FAILURE){
		RETURN_FALSE;
	}

	convert_to_long(connect_timeout);

	zend_update_property_long(FETCH_THIS, ZEND_STRL("connect_timeout"), Z_LVAL_P(connect_timeout) TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setCookie){
	zval *zval_cookies;
	zval *cookie_name, *cookie_value;
	smart_str impl_cookies = {0};

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &cookie_name, &cookie_value) == FAILURE){
		RETURN_FALSE;
	}

	if(Z_TYPE_P(cookie_name) != IS_STRING || Z_TYPE_P(cookie_value) != IS_STRING){
		RETURN_FALSE;
	}

	zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);

	smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));
	smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_name), Z_STRLEN_P(cookie_name));
	smart_str_appendc(&impl_cookies, '=');
	smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_value), Z_STRLEN_P(cookie_value));
	smart_str_appendc(&impl_cookies, ';');
	smart_str_0(&impl_cookies);

	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setCookies){
	zval *zval_cookies;
	zval *cookie_array;
	smart_str impl_cookies = {0};
	HashTable *cookies_ht;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &cookie_array) == FAILURE){
		RETURN_FALSE;
	}

	if(Z_TYPE_P(cookie_array) != IS_ARRAY){
		RETURN_FALSE;
	}

	zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
	cookies_ht = Z_ARRVAL_P(cookie_array);

	smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));

	for(zend_hash_internal_pointer_reset(cookies_ht);
		zend_hash_has_more_elements(cookies_ht) == SUCCESS;
		zend_hash_move_forward(cookies_ht))
	{
		zval **value;
		char *key;
		uint key_len;
		ulong idx;

		if(zend_hash_get_current_key_ex(cookies_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
			continue;
		}

		if(zend_hash_get_current_data(cookies_ht, (void**)&value) == FAILURE){
			continue;
		}

		convert_to_string(*value);

		if(Z_TYPE_PP(value) != IS_STRING){
			continue;
		}

		smart_str_appendl(&impl_cookies, key, key_len-1);
		smart_str_appendl(&impl_cookies, "=", 1);
		smart_str_appendl(&impl_cookies, Z_STRVAL_PP(value), Z_STRLEN_PP(value));
		smart_str_appendl(&impl_cookies, ";", 1);
	}

	php_url_decode(impl_cookies.c, impl_cookies.len);
	smart_str_0(&impl_cookies);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setHeader){
	zval *headers, *value=NULL, *send_headers;
	HashTable *headers_ht;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &headers) == FAILURE){
		RETURN_FALSE;
	}

	send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);

	if(Z_TYPE_P(send_headers) == IS_NULL){
		MAKE_STD_ZVAL(send_headers);
		array_init(send_headers);
	}

	headers_ht = Z_ARRVAL_P(send_headers);

	if(Z_TYPE_P(headers) == IS_ARRAY){
		for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(headers));
			zend_hash_has_more_elements(Z_ARRVAL_P(headers)) == SUCCESS;
			zend_hash_move_forward(Z_ARRVAL_P(headers))){
			char* key;
			uint key_len;
			ulong idx;
			zval **tmpzval;

			if(zend_hash_get_current_key_ex(Z_ARRVAL_P(headers), &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
				continue;
			}

			if(zend_hash_get_current_data(Z_ARRVAL_P(headers), (void**)&tmpzval) == FAILURE){
				continue;
			}

			add_assoc_stringl(send_headers, key, Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), 1);
		}
	}else if(Z_TYPE_P(headers) == IS_STRING && Z_TYPE_P(value) == IS_STRING){
		add_assoc_stringl(send_headers, Z_STRVAL_P(headers), Z_STRVAL_P(value), Z_STRLEN_P(value), 1);
	}else{
		zend_error(E_WARNING, "param error.");
	}

	zend_update_property(FETCH_THIS, ZEND_STRL("send_headers"), send_headers);
}

ZEND_METHOD(fetch_url, setMethod){
	zval *zval_method;
	char *method;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &zval_method) == FAILURE){
		RETURN_FALSE;
	}

	if(Z_TYPE_P(zval_method) != IS_STRING){
		RETURN_FALSE;
	}

	method = php_strtolower(Z_STRVAL_P(zval_method), Z_STRLEN_P(zval_method));

	if(strcmp(method, "get") == 0){
		zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);
	}else if(strcmp(method, "post") == 0){
		zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("post") TSRMLS_CC);
	}else{
		FETCH_ERROR("Not support method.", 404);
		RETURN_FALSE;
	}

	RETURN_TRUE;
}

ZEND_METHOD(fetch_url, setPostData){
	zval *data, *post_data;
	HashTable *post_data_ht;
	smart_str temp = {0};

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &post_data) == FAILURE){
		RETURN_FALSE;
	}

	data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);

	if(Z_TYPE_P(post_data) != IS_ARRAY){
		zend_error(E_WARNING, "post data must be array.");
		RETURN_FALSE;
	}

	post_data_ht = Z_ARRVAL_P(post_data);
	smart_str_appendl(&temp, Z_STRVAL_P(data), Z_STRLEN_P(data));

	for(zend_hash_internal_pointer_reset(post_data_ht);
		zend_hash_has_more_elements(post_data_ht) == SUCCESS;
		zend_hash_move_forward(post_data_ht)){
		zval **current_data;
		char *key;
		uint key_len;
		ulong idx;

		if(zend_hash_get_current_key_ex(post_data_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
			continue;
		}

		if(zend_hash_get_current_data(post_data_ht, (void**)¤t_data) == FAILURE){
			continue;
		}

		convert_to_string(*current_data);

		smart_str_appendl(&temp, key, key_len-1);
		smart_str_appendc(&temp, '=');
		smart_str_appendl(&temp, Z_STRVAL_PP(current_data), Z_STRLEN_PP(current_data));
		smart_str_appendc(&temp, '&');
	}

	smart_str_0(&temp);
	zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), temp.c, temp.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setReadTimeout){
	zval *read_timeout;

	if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &read_timeout) == FAILURE){
		RETURN_FALSE;
	}

	if(Z_TYPE_P(read_timeout) != IS_LONG){
		zend_error(E_WARNING, "readtimeout must be integer.");
		RETURN_FALSE;
	}

	zend_update_property_long(FETCH_THIS, ZEND_STRL("read_timeout"), Z_LVAL_P(read_timeout) TSRMLS_CC);
}

ZEND_METHOD(fetch_url, __destruct){}

static zend_function_entry fetch_url_method[] = {
	ZEND_ME(fetch_url, __construct, void_arginfo, ZEND_ACC_CTOR|ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setBinary, setBinary_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setFile, setFile_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, body, void_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, clean, void_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, errmsg, void_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, errcode, void_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, fetch, fetch_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, httpCode, void_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, responseCookies, responseCookies_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, responseHeaders, responseHeaders_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setAllowRedirect, setAllowRedirect_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setConnectTimeout, setConnectTimeout_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setCookie, setCookie_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setCookies, setCookies_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setHeader, setHeader_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setMethod, setMethod_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setPostData, setPostData, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, setReadTimeout, setReadTimeout_arginfo, ZEND_ACC_PUBLIC)
	ZEND_ME(fetch_url, __destruct, void_arginfo, ZEND_ACC_DTOR|ZEND_ACC_PUBLIC)
	{NULL, NULL, NULL}
};
/* If you declare any globals in php_fetch_url.h uncomment this:
ZEND_DECLARE_MODULE_GLOBALS(fetch_url)
*/

/* True global resources - no need for thread safety here */
static int le_fetch_url;

/* {{{ fetch_url_functions[]
 *
 * Every user visible function must have an entry in fetch_url_functions[].
 */
const zend_function_entry fetch_url_functions[] = {
	PHP_FE_END	/* Must be the last line in fetch_url_functions[] */
};
/* }}} */

/* {{{ fetch_url_module_entry
 */
zend_module_entry fetch_url_module_entry = {
#if ZEND_MODULE_API_NO >= 20010901
	STANDARD_MODULE_HEADER,
#endif
	"fetch_url",
	fetch_url_functions,
	PHP_MINIT(fetch_url),
	PHP_MSHUTDOWN(fetch_url),
	PHP_RINIT(fetch_url),		/* Replace with NULL if there's nothing to do at request start */
	PHP_RSHUTDOWN(fetch_url),	/* Replace with NULL if there's nothing to do at request end */
	PHP_MINFO(fetch_url),
#if ZEND_MODULE_API_NO >= 20010901
	"0.1", /* Replace with version number for your extension */
#endif
	STANDARD_MODULE_PROPERTIES
};
/* }}} */

#ifdef COMPILE_DL_FETCH_URL
ZEND_GET_MODULE(fetch_url)
#endif

/* {{{ PHP_INI
 */
/* Remove comments and fill if you need to have entries in php.ini
PHP_INI_BEGIN()
    STD_PHP_INI_ENTRY("fetch_url.global_value",      "42", PHP_INI_ALL, OnUpdateLong, global_value, zend_fetch_url_globals, fetch_url_globals)
    STD_PHP_INI_ENTRY("fetch_url.global_string", "foobar", PHP_INI_ALL, OnUpdateString, global_string, zend_fetch_url_globals, fetch_url_globals)
PHP_INI_END()
*/
/* }}} */

/* {{{ php_fetch_url_init_globals
 */
/* Uncomment this function if you have INI entries
static void php_fetch_url_init_globals(zend_fetch_url_globals *fetch_url_globals)
{
	fetch_url_globals->global_value = 0;
	fetch_url_globals->global_string = NULL;
}
*/
/* }}} */

/* {{{ PHP_MINIT_FUNCTION
 */
PHP_MINIT_FUNCTION(fetch_url)
{
	/* If you have INI entries, uncomment these lines 
	REGISTER_INI_ENTRIES();
	*/
	zend_class_entry fetch_ce;
	INIT_CLASS_ENTRY(fetch_ce, FETCH_CLASS_NAME, fetch_url_method);

	g_fetch_ce = zend_register_internal_class(&fetch_ce TSRMLS_CC);
	zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("body"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_null(g_fetch_ce, ZEND_STRL("errmsg"), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_long(g_fetch_ce, ZEND_STRL("errno"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_null(g_fetch_ce, ZEND_STRL("httpCode"), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("cookies"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("headers"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_null(g_fetch_ce, ZEND_STRL("send_headers"), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_long(g_fetch_ce, ZEND_STRL("allow_redirect"), 1, ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_long(g_fetch_ce, ZEND_STRL("connect_timeout"), 5, ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("method"), ZEND_STRL("get"), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_long(g_fetch_ce, ZEND_STRL("multilpart"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("data"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_long(g_fetch_ce, ZEND_STRL("read_timeout"), 60, ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_null(g_fetch_ce, ZEND_STRL("binary_data"), ZEND_ACC_PROTECTED TSRMLS_CC);
	zend_declare_property_null(g_fetch_ce, ZEND_STRL("upload_filepaths"), ZEND_ACC_PROTECTED TSRMLS_CC);
	return SUCCESS;
}
/* }}} */

/* {{{ PHP_MSHUTDOWN_FUNCTION
 */
PHP_MSHUTDOWN_FUNCTION(fetch_url)
{
	/* uncomment this line if you have INI entries
	UNREGISTER_INI_ENTRIES();
	*/
	return SUCCESS;
}
/* }}} */

/* Remove if there's nothing to do at request start */
/* {{{ PHP_RINIT_FUNCTION
 */
PHP_RINIT_FUNCTION(fetch_url)
{
	return SUCCESS;
}
/* }}} */

/* Remove if there's nothing to do at request end */
/* {{{ PHP_RSHUTDOWN_FUNCTION
 */
PHP_RSHUTDOWN_FUNCTION(fetch_url)
{
	return SUCCESS;
}
/* }}} */

/* {{{ PHP_MINFO_FUNCTION
 */
PHP_MINFO_FUNCTION(fetch_url)
{
	php_info_print_table_start();
	php_info_print_table_header(2, "fetch_url support", "enabled");
	php_info_print_table_end();

	/* Remove comments if you have entries in php.ini
	DISPLAY_INI_ENTRIES();
	*/
}
/* }}} */

源码下载地址: http://git.oschina.net/365690485/php-class-fetch_url
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值