服务器之家:专注于服务器技术及软件下载分享
分类导航

PHP教程|ASP.NET教程|Java教程|ASP教程|编程技术|正则表达式|C/C++|IOS|C#|Swift|Android|VB|R语言|JavaScript|易语言|vb.net|

服务器之家 - 编程语言 - C/C++ - 用C实现PHP扩展 Fetch_Url 类数据抓取的方法

用C实现PHP扩展 Fetch_Url 类数据抓取的方法

2020-11-20 11:01C语言教程网 C/C++

用C实现PHP扩展 Fetch_Url 类数据抓取的方法,该扩展是基于libcurl基础实现的网页数据抓取

一、类文档说明

复制代码 代码如下:


class FetchUrl{
 function __construct();

 

 //返回网页内容 常用于fetch()方法返回false时
 function body();

 //将对象的数据重新初始化,用于多次重用一个FetchUrl对象
 function clean();

 //返回错误信息
 function errmsg();

 //返回错误码,>0表示有错误
 function errcode();

 /**
 * 发起请求
 * $url string 请求地址
 * $callback function 匿名函数
 */
 function fetch(string $url, function $callback);

 //请求返回HTTP Code
 function httpCode();

 //请求返回Cookies数组
 function responseCookies();

 //请求返回头部信息数组
 function responseHeaders();

 //是否允许截断,默认为不允许
 function setAllowRedirect(bool $allow=false);

 //设置连接超时时间
 function setConnectTimeout(int $seconds=5);

 //在发起的请求中,添加cookie数据
 function setCookie(string $name, string $value);

 //在发起的请求中,批量添加cookie数据
 function setCookies(array $cookies);

 //设置请求的方法(POST/GET)
 function setMethod(string $method="get");

 //设置POST方法的数据
 function setPostData(array $data);

 //设置读取超时时间
 function setReadTimeout(int $seconds=60);

 function __destroy();
}


二、使用案例

复制代码 代码如下:


<?php
/*GET抓取http://www.baidu.com*/
/*
$fetch_url = new FetchUrl();
$fetch_url->setAllowRedirect(true);
$fetch_url->fetch('http://www.baidu.com');
*/

 

$cookies = array(
 'wei_xin_wb_session'=>'value',
 'wei_xin_wxblog_authcoder'=>'value');

/*POST提交数据*/
/*
$fetch_url = new FetchUrl();
$fetch_url->setMethod('post');
$data  = array(
 'step'=>2,
 'pays[1]'=>0,
 'pays[2]'=>0,
 'pays[3]'=>0
);
$fetch_url->setCookies($cookies);
$fetch_url->setPostData($data);
$fetch_url->fetch('http://test.wx.pp.cc/wb_advs/manage?inajax=1');
*/

//POST上传数据和文件
$fetch_url = new FetchUrl();
$fetch_url->setAllowRedirect(true);
$fetch_url->setMethod('post');
$data = array(
 'nickname'=>'挺好a',
 'wxnickname'=>'good',
 'wxusername'=>'good',
 'intro'=>'good'
);

$fetch_url->setCookies($cookies);
$fetch_url->setPostData($data);
$binary = file_get_contents("http://www.baidu.com/img/shouye_b5486898c692066bd2cbaeda86d74448.gif");
$fetch_url->setBinary("picfile", "demo.jpg", $binary);//上传二进制文件
// $fetch_url->setFile("picfile", "C:/Users/Administrator/Desktop/123.jpg");//上传指定文件

if($fetch_url->errcode() == 0){
 $fetch_url->fetch('http://wx.pp.cc/wb_ajax/addwxuser/0');
 if($fetch_url->httpCode() == 200){
  $html = $fetch_url->body();
  echo $html;
 }
}else{
 echo "errmsg:".$fetch_url->errmsg().", errcode:".$fetch_url->errcode();
}

//返回请求头部信息
print_r($fetch_url->responseHeaders());

//清空之前的请求设置,复用$fetch_url。
$fetch_url->clean();


$fetch_url->fetch("http://www.baidu.com");
print_r($fetch_url->responseHeaders());


三、扩展实现
1.php_fetch_url.h

复制代码 代码如下:


/*
  +----------------------------------------------------------------------+
  | PHP Version 5                                                        |
  +----------------------------------------------------------------------+
  | Copyright (c) 1997-2012 The PHP Group                                |
  +----------------------------------------------------------------------+
  | This source file is subject to version 3.01 of the PHP license,      |
  | that is bundled with this package in the file LICENSE, and is        |
  | available through the world-wide-web at the following url:           |
  | http://www.php.net/license/3_01.txt                                  |
  | If you did not receive a copy of the PHP license and are unable to   |
  | obtain it through the world-wide-web, please send a note to          |
  | license@php.net so we can mail you a copy immediately.               |
  +----------------------------------------------------------------------+
  | Author:                                                              |
  +----------------------------------------------------------------------+
*/

 

/* $Id$ */

#ifndef PHP_FETCH_URL_H
#define PHP_FETCH_URL_H

extern zend_module_entry fetch_url_module_entry;
#define phpext_fetch_url_ptr &fetch_url_module_entry

#ifdef PHP_WIN32
# define PHP_FETCH_URL_API __declspec(dllexport)
#elif defined(__GNUC__) && __GNUC__ >= 4
# define PHP_FETCH_URL_API __attribute__ ((visibility("default")))
#else
# define PHP_FETCH_URL_API
#endif

#ifdef PHP_WIN32
  #define FETCH_CURL_MODE CURL_GLOBAL_WIN32
#else
  #define FETCH_CURL_MODE CURL_GLOBAL_ALL
#endif

#ifdef ZTS
#include "TSRM.h"
#endif

#define FETCH_CLASS_NAME  "FetchUrl"
#define FETCH_CLASS_CE    g_fetch_ce
#define FETCH_THIS        Z_OBJCE_P(getThis()), getThis()
#define FETCH_ERROR(errmsg, errno) zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), errmsg, sizeof(errmsg)-1 TSRMLS_CC);\
                                    zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), errno TSRMLS_CC)

PHP_MINIT_FUNCTION(fetch_url);
PHP_MSHUTDOWN_FUNCTION(fetch_url);
PHP_RINIT_FUNCTION(fetch_url);
PHP_RSHUTDOWN_FUNCTION(fetch_url);
PHP_MINFO_FUNCTION(fetch_url);

#ifdef ZTS
#define FETCH_URL_G(v) TSRMG(fetch_url_globals_id, zend_fetch_url_globals *, v)
#else
#define FETCH_URL_G(v) (fetch_url_globals.v)
#endif

#endif /* PHP_FETCH_URL_H */


2.fetch_url.c

复制代码 代码如下:


/*
  +----------------------------------------------------------------------+
  | PHP Version 5                                                        |
  +----------------------------------------------------------------------+
  | Copyright (c) 1997-2012 The PHP Group                                |
  +----------------------------------------------------------------------+
  | This source file is subject to version 3.01 of the PHP license,      |
  | that is bundled with this package in the file LICENSE, and is        |
  | available through the world-wide-web at the following url:           |
  | http://www.php.net/license/3_01.txt                                  |
  | If you did not receive a copy of the PHP license and are unable to   |
  | obtain it through the world-wide-web, please send a note to          |
  | license@php.net so we can mail you a copy immediately.               |
  +----------------------------------------------------------------------+
  | Author:                                                              |
  +----------------------------------------------------------------------+
*/

 

/* $Id$ */

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

#include "php.h"
#include "php_ini.h"
#include "main/SAPI.h"
#include "Zend/zend_interfaces.h"
#include "ext/standard/info.h"
#include "ext/standard/php_var.h"
#include "ext/standard/php_string.h"
#include "ext/standard/php_smart_str.h"
#include "ext/standard/url.h"
#include "ext/pcre/php_pcre.h"
#include "php_fetch_url.h"
#include <curl/curl.h>

zend_class_entry *g_fetch_ce;

ZEND_BEGIN_ARG_INFO_EX(void_arginfo, 0, 0, 0)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(fetch_arginfo, 0, 0, 1)
 ZEND_ARG_INFO(0, url)
 ZEND_ARG_INFO(0, callback)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(responseCookies_arginfo, 0, 0, 0)
 ZEND_ARG_INFO(0, all)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(responseHeaders_arginfo, 0, 0, 0)
 ZEND_ARG_INFO(0, parse)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setAllowRedirect_arginfo, 0, 0, 0)
 ZEND_ARG_INFO(0, allow)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setConnectTimeout_arginfo, 0, 0, 0)
 ZEND_ARG_INFO(0, ms)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setCookie_arginfo, 0, 0, 2)
 ZEND_ARG_INFO(0, name)
 ZEND_ARG_INFO(0, value)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setCookies_arginfo, 0, 0, 1)
 ZEND_ARG_INFO(0, cookies)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setHeader_arginfo, 0, 0, 2)
 ZEND_ARG_INFO(0, name)
 ZEND_ARG_INFO(0, value)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setMethod_arginfo, 0, 0, 1)
 ZEND_ARG_INFO(0, method)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setPostData, 0, 0, 1)
 ZEND_ARG_INFO(0, post_data)
 ZEND_ARG_INFO(0, multil)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setReadTimeout_arginfo, 0, 0, 0)
 ZEND_ARG_INFO(0, ms)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setBinary_arginfo, 0, 0, 3)
 ZEND_ARG_INFO(0, post_filed)
 ZEND_ARG_INFO(0, uploadfile_name)
 ZEND_ARG_INFO(0, url)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(setFile_arginfo, 0, 0, 2)
 ZEND_ARG_INFO(0, post_filed)
 ZEND_ARG_INFO(0, path)
ZEND_END_ARG_INFO()

ZEND_METHOD(fetch_url, __construct){

}

ZEND_METHOD(fetch_url, setBinary){
 zval *input_filed_name, *binary_data, *uploadfile_name;
 zval *g_binary_data, *item_data;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zzz", &input_filed_name, &uploadfile_name, &binary_data) == FAILURE){
  RETURN_FALSE;
 }

 if(Z_TYPE_P(input_filed_name) != IS_STRING || Z_TYPE_P(uploadfile_name) != IS_STRING || Z_TYPE_P(binary_data) != IS_STRING){
  RETURN_FALSE;
 }

 g_binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);

 if(Z_TYPE_P(g_binary_data) == IS_NULL){
  MAKE_STD_ZVAL(g_binary_data);
  array_init(g_binary_data);
 }

 MAKE_STD_ZVAL(item_data);
 array_init(item_data);

 add_index_stringl(item_data, 0, Z_STRVAL_P(uploadfile_name), Z_STRLEN_P(uploadfile_name), 1);
 add_index_stringl(item_data, 1, Z_STRVAL_P(binary_data), Z_STRLEN_P(binary_data), 1);

 add_assoc_zval(g_binary_data, Z_STRVAL_P(input_filed_name), item_data);

 zend_update_property(FETCH_THIS, ZEND_STRL("binary_data"), g_binary_data TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setFile){
 zval *file_path, *input_filed_name;
 zval *upload_filepaths;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &input_filed_name, &file_path) == FAILURE){
  RETURN_FALSE;
 }

 if(Z_TYPE_P(file_path) != IS_STRING || Z_TYPE_P(input_filed_name) != IS_STRING){
  RETURN_FALSE;
 }

 upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);

 if(Z_TYPE_P(upload_filepaths) == IS_NULL){
  MAKE_STD_ZVAL(upload_filepaths);
  array_init(upload_filepaths);
 }

 add_assoc_stringl(upload_filepaths, Z_STRVAL_P(input_filed_name), Z_STRVAL_P(file_path), Z_STRLEN_P(file_path), 1);

 zend_update_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), upload_filepaths TSRMLS_CC);
}

ZEND_METHOD(fetch_url, body){
 zval *zval_body;

 zval_body = zend_read_property(FETCH_THIS, ZEND_STRL("body"), 0 TSRMLS_CC);

 RETURN_STRINGL(Z_STRVAL_P(zval_body), Z_STRLEN_P(zval_body), 1);
}

ZEND_METHOD(fetch_url, clean){
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), ZEND_STRL("") TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("errmsg"), ZEND_STRL("") TSRMLS_CC);
 zend_update_property_long(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
 zend_update_property_null(FETCH_THIS, ZEND_STRL("httpCode") TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), ZEND_STRL("") TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), ZEND_STRL("") TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("send_headers"), ZEND_STRL("") TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), ZEND_STRL("") TSRMLS_CC);
 zend_update_property_null(FETCH_THIS, ZEND_STRL("binary_data") TSRMLS_CC);
 zend_update_property_null(FETCH_THIS, ZEND_STRL("upload_filepaths") TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);
}

ZEND_METHOD(fetch_url, errmsg){
 zval *errmsg = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);

 RETURN_STRINGL(Z_STRVAL_P(errmsg), Z_STRLEN_P(errmsg), 1);
}

ZEND_METHOD(fetch_url, errcode){
 zval *err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);

 RETURN_LONG(Z_LVAL_P(err_no));
}

static size_t read_data(void *buffer, size_t size, size_t nmemb, void *data){
 smart_str *content = (smart_str*)data;

 smart_str_appendl(content, buffer, size*nmemb);

 return size*nmemb;
}

ZEND_METHOD(fetch_url, fetch){
 CURLcode return_code;
 CURL *curl_handler;
 struct curl_slist *http_headers = NULL;
 zval *url, *callback, *cookies, *connect_timeout, *allow_redirect, *method, *post_data, *read_timeout,
   *send_headers, *err_no, *errmsg, *binary_data, *upload_filepaths;
 smart_str body_str = {0}, header_str = {0};
 struct curl_httppost *post = NULL;
 struct curl_httppost *last = NULL;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &url, &callback) == FAILURE){
  RETURN_FALSE;
 }

 err_no = zend_read_property(FETCH_THIS, ZEND_STRL("errno"), 0 TSRMLS_CC);
 errmsg  = zend_read_property(FETCH_THIS, ZEND_STRL("errmsg"), 0 TSRMLS_CC);

 if(Z_LVAL_P(err_no) > 0){
  php_printf("errno:%d, errmsg:%s", Z_LVAL_P(err_no), Z_STRVAL_P(errmsg));
  RETURN_FALSE;
 }

 if(Z_TYPE_P(url) != IS_STRING){
  FETCH_ERROR("fetch url must be string.", 500);
  RETURN_FALSE;
 }

 return_code = curl_global_init(FETCH_CURL_MODE);

 if(return_code != CURLE_OK){
  curl_global_cleanup();
  FETCH_ERROR("curl init failed.", 500);
  RETURN_FALSE;
 }

 curl_handler = curl_easy_init();

 if(NULL == curl_handler){
  curl_easy_cleanup(curl_handler);
  curl_global_cleanup();
  FETCH_ERROR("get curl handler failed.", 500);
  RETURN_FALSE;
 }

 cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
 connect_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("connect_timeout"), 0 TSRMLS_CC);
 read_timeout = zend_read_property(FETCH_THIS, ZEND_STRL("read_timeout"), 0 TSRMLS_CC);
 allow_redirect = zend_read_property(FETCH_THIS, ZEND_STRL("allow_redirect"), 0 TSRMLS_CC);
 method = zend_read_property(FETCH_THIS, ZEND_STRL("method"), 0 TSRMLS_CC);
 post_data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);
 send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);
 binary_data = zend_read_property(FETCH_THIS, ZEND_STRL("binary_data"), 0 TSRMLS_CC);
 upload_filepaths = zend_read_property(FETCH_THIS, ZEND_STRL("upload_filepaths"), 0 TSRMLS_CC);


 curl_easy_setopt(curl_handler, CURLOPT_URL, Z_STRVAL_P(url));
 curl_easy_setopt(curl_handler, CURLOPT_COOKIE, Z_STRVAL_P(cookies));
 curl_easy_setopt(curl_handler, CURLOPT_WRITEFUNCTION, &read_data);
 curl_easy_setopt(curl_handler, CURLOPT_WRITEDATA, &body_str);
 curl_easy_setopt(curl_handler, CURLOPT_HEADERDATA, &header_str);
 curl_easy_setopt(curl_handler, CURLOPT_HEADERFUNCTION, &read_data);
 curl_easy_setopt(curl_handler, CURLOPT_TIMEOUT, Z_LVAL_P(read_timeout));
 curl_easy_setopt(curl_handler, CURLOPT_CONNECTTIMEOUT, Z_LVAL_P(connect_timeout));
 curl_easy_setopt(curl_handler, CURLOPT_AUTOREFERER, Z_LVAL_P(allow_redirect));
 curl_easy_setopt(curl_handler, CURLOPT_MAXREDIRS, 5);

 if(strcmp(Z_STRVAL_P(method), "get") == 0){
  curl_easy_setopt(curl_handler, CURLOPT_HTTPGET, 1);
 }else{
  if(Z_TYPE_P(binary_data) != IS_NULL || Z_TYPE_P(upload_filepaths) != IS_NULL){
   zval *delim, *post_arr, *delim_equal;

   MAKE_STD_ZVAL(delim_equal);
   MAKE_STD_ZVAL(delim);
   MAKE_STD_ZVAL(post_arr);

   ZVAL_STRING(delim, "&", 1);
   ZVAL_STRING(delim_equal, "=", 1);
   array_init(post_arr);
   php_explode(delim, post_data, post_arr, LONG_MAX);

   for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(post_arr));
    zend_hash_has_more_elements(Z_ARRVAL_P(post_arr)) == SUCCESS;
    zend_hash_move_forward(Z_ARRVAL_P(post_arr))){
    zval **data_str;
    zval *temp_data, **post_data_name, **post_data_value, *temp_zval;
    if(zend_hash_get_current_data(Z_ARRVAL_P(post_arr), (void**)&data_str) == FAILURE){
     continue;
    }

    if(Z_STRLEN_PP(data_str) > 0){
     MAKE_STD_ZVAL(temp_data);
     array_init(temp_data);
     temp_zval = *data_str;

     php_explode(delim_equal, temp_zval, temp_data, LONG_MAX);

     zend_hash_index_find(Z_ARRVAL_P(temp_data), 0, (void**)&post_data_name);
     zend_hash_index_find(Z_ARRVAL_P(temp_data), 1, (void**)&post_data_value);

     curl_formadd(&post, &last, CURLFORM_COPYNAME, Z_STRVAL_PP(post_data_name), CURLFORM_COPYCONTENTS, Z_STRVAL_PP(post_data_value), CURLFORM_END);

     zval_dtor(temp_data);
    }
   }

   zval_dtor(post_arr);
   zval_dtor(delim);
   zval_dtor(delim_equal);

   if(Z_TYPE_P(binary_data) != IS_NULL)
   for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(binary_data));
    zend_hash_has_more_elements(Z_ARRVAL_P(binary_data)) == SUCCESS;
    zend_hash_move_forward(Z_ARRVAL_P(binary_data))){
    char *input_file_name;
    uint input_file_name_len;
    ulong idx;
    zval **item_data;
    zval **upload_binary_data;
    zval **uploadfile_name;

    if(zend_hash_get_current_key_ex(Z_ARRVAL_P(binary_data), &input_file_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
     continue;
    }

    if(zend_hash_get_current_data(Z_ARRVAL_P(binary_data), (void**)&item_data) == FAILURE){
     continue;
    }

    zend_hash_index_find(Z_ARRVAL_PP(item_data), 0, (void**)&uploadfile_name);
    zend_hash_index_find(Z_ARRVAL_PP(item_data), 1, (void**)&upload_binary_data);

    curl_formadd(&post,
       &last,
       CURLFORM_COPYNAME,
       input_file_name,
       CURLFORM_BUFFER,
       Z_STRVAL_PP(uploadfile_name), //todo:setBinary需要传递文件名参数
       CURLFORM_BUFFERPTR,
       Z_STRVAL_PP(upload_binary_data),
       CURLFORM_BUFFERLENGTH,
       Z_STRLEN_PP(upload_binary_data),
       CURLFORM_END
    );
   }

   if(Z_TYPE_P(upload_filepaths) != IS_NULL)
   for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(upload_filepaths));
    zend_hash_has_more_elements(Z_ARRVAL_P(upload_filepaths)) == SUCCESS;
    zend_hash_move_forward(Z_ARRVAL_P(upload_filepaths))){
    char *input_filed_name;
    uint input_file_name_len;
    ulong idx;
    zval **file_path;

    if(zend_hash_get_current_key_ex(Z_ARRVAL_P(upload_filepaths), &input_filed_name, &input_file_name_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
     continue;
    }

    if(zend_hash_get_current_data(Z_ARRVAL_P(upload_filepaths), (void**)&file_path) == FAILURE){
     continue;
    }

    curl_formadd(&post, &last, CURLFORM_COPYNAME, input_filed_name, CURLFORM_FILE, Z_STRVAL_PP(file_path), CURLFORM_END);
   }

   curl_easy_setopt(curl_handler, CURLOPT_HTTPPOST, post);
   http_headers = curl_slist_append(http_headers, estrdup("Expect:"));//防止出现HTTP 100跳转
  }else{
   curl_easy_setopt(curl_handler, CURLOPT_POSTFIELDS, Z_STRVAL_P(post_data));
   curl_easy_setopt(curl_handler, CURLOPT_POST, 1);
  }
 }

 if(Z_TYPE_P(send_headers) == IS_ARRAY && zend_hash_num_elements(Z_ARRVAL_P(send_headers)) > 0){

  for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(send_headers));
   zend_hash_has_more_elements(Z_ARRVAL_P(send_headers)) == SUCCESS;
   zend_hash_move_forward(Z_ARRVAL_P(send_headers))){
   char *header_key;
   uint header_keylen;
   ulong idx;
   zval **header_val;
   smart_str impl_headers = {0};

   if(zend_hash_get_current_key_ex(Z_ARRVAL_P(send_headers), &header_key, &header_keylen, &idx, 0, NULL) != HASH_KEY_IS_STRING){
    continue;
   }

   if(zend_hash_get_current_data(Z_ARRVAL_P(send_headers), (void**)&header_val) == FAILURE){
    continue;
   }

   smart_str_appendl(&impl_headers, header_key, header_keylen);
   smart_str_appendl(&impl_headers, ": ", 2);
   smart_str_appendl(&impl_headers, Z_STRVAL_PP(header_val), Z_STRLEN_PP(header_val));
   http_headers = curl_slist_append(http_headers, impl_headers.c);
  }
 }

 curl_easy_setopt(curl_handler, CURLOPT_HTTPHEADER, http_headers);

 curl_easy_perform(curl_handler);

 curl_slist_free_all(http_headers);
 curl_formfree(post);
 curl_easy_cleanup(curl_handler);
 curl_global_cleanup();

 smart_str_0(&body_str);
 smart_str_0(&header_str);

 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("headers"), header_str.c, header_str.len TSRMLS_CC);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("body"), body_str.c, body_str.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, httpCode){
 pcre_cache_entry *pce;
 zval *headers;
 zval *result_match, *match_long, **http_code;
 char *regex = estrdup("/^HTTP\\/1\\.1\\s(.*)\\sOK/");

 if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){
  RETURN_FALSE;
 }

 MAKE_STD_ZVAL(result_match);
 MAKE_STD_ZVAL(match_long);

 headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

 php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 0, 0, 0, 0 TSRMLS_CC);

 if(Z_LVAL_P(match_long) > 0){
  if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&http_code) == FAILURE){
   RETURN_FALSE;
  }else{
   RETURN_STRINGL(Z_STRVAL_PP(http_code), Z_STRLEN_PP(http_code), 0);
  }
 }else{
  RETURN_FALSE;
 }
}

ZEND_METHOD(fetch_url, responseCookies){
 pcre_cache_entry *pce;
 zval *headers, *result_match, *match_long;
 char *regex = estrdup("/Set-Cookie:\\s(.*?);/");

 headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

 array_init(return_value);

 if(Z_STRLEN_P(headers) > 0){
  if((pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC)) == NULL){
   RETURN_NULL();
  }

  MAKE_STD_ZVAL(result_match);
  MAKE_STD_ZVAL(match_long);

  //void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value, zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
  php_pcre_match_impl(pce, Z_STRVAL_P(headers), Z_STRLEN_P(headers), match_long, result_match, 1, 0, 0, 0 TSRMLS_CC);

  if(Z_LVAL_P(match_long) > 0){
   zval **result;
   HashTable *result_ht;
   char *found = NULL;
   long found_offset;
   char *cookie_name;
   char *cookie_value;

   if(zend_hash_index_find(Z_ARRVAL_P(result_match), 1, (void**)&result) != FAILURE){
    result_ht = Z_ARRVAL_PP(result);

    for(zend_hash_internal_pointer_reset(result_ht);
     zend_hash_has_more_elements(result_ht) == SUCCESS;
     zend_hash_move_forward(result_ht)){
     zval **tmpzval;

     if(zend_hash_get_current_data(result_ht, (void**)&tmpzval) == FAILURE){
      continue;
     }

     found = php_memnstr(Z_STRVAL_PP(tmpzval), "=", 1, Z_STRVAL_PP(tmpzval) + Z_STRLEN_PP(tmpzval));
     found_offset = found - Z_STRVAL_PP(tmpzval);

     cookie_name = estrndup(Z_STRVAL_PP(tmpzval), found_offset);
     cookie_value= estrndup(found+1, strlen(found)-1);

     add_assoc_stringl(return_value, cookie_name, cookie_value, strlen(cookie_value), 1);

     efree(cookie_name);
     efree(cookie_value);
    }
   }
  }
 }else{
  RETURN_NULL();
 }
}

ZEND_METHOD(fetch_url, responseHeaders){
 zval *headers, *delim;
 uint idx;

 headers = zend_read_property(FETCH_THIS, ZEND_STRL("headers"), 0 TSRMLS_CC);

 MAKE_STD_ZVAL(delim);

 array_init(return_value);

 ZVAL_STRING(delim, "\r\n", 1);

 php_explode(delim, headers, return_value, LONG_MAX);

 idx = zend_hash_num_elements(Z_ARRVAL_P(return_value));

 zend_hash_index_del(Z_ARRVAL_P(return_value), idx-1);

 zend_hash_index_del(Z_ARRVAL_P(return_value), idx-2);

 zval_dtor(delim);
}

ZEND_METHOD(fetch_url, setAllowRedirect){
 zval *allow;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &allow) == FAILURE){
  RETURN_FALSE;
 }

 convert_to_long(allow);

 zend_update_property_long(FETCH_THIS, ZEND_STRL("allow_redirect"), Z_LVAL_P(allow) TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setConnectTimeout){
 zval *connect_timeout;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &connect_timeout) == FAILURE){
  RETURN_FALSE;
 }

 convert_to_long(connect_timeout);

 zend_update_property_long(FETCH_THIS, ZEND_STRL("connect_timeout"), Z_LVAL_P(connect_timeout) TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setCookie){
 zval *zval_cookies;
 zval *cookie_name, *cookie_value;
 smart_str impl_cookies = {0};

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "zz", &cookie_name, &cookie_value) == FAILURE){
  RETURN_FALSE;
 }

 if(Z_TYPE_P(cookie_name) != IS_STRING || Z_TYPE_P(cookie_value) != IS_STRING){
  RETURN_FALSE;
 }

 zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);

 smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));
 smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_name), Z_STRLEN_P(cookie_name));
 smart_str_appendc(&impl_cookies, '=');
 smart_str_appendl(&impl_cookies, Z_STRVAL_P(cookie_value), Z_STRLEN_P(cookie_value));
 smart_str_appendc(&impl_cookies, ';');
 smart_str_0(&impl_cookies);

 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setCookies){
 zval *zval_cookies;
 zval *cookie_array;
 smart_str impl_cookies = {0};
 HashTable *cookies_ht;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &cookie_array) == FAILURE){
  RETURN_FALSE;
 }

 if(Z_TYPE_P(cookie_array) != IS_ARRAY){
  RETURN_FALSE;
 }

 zval_cookies = zend_read_property(FETCH_THIS, ZEND_STRL("cookies"), 0 TSRMLS_CC);
 cookies_ht = Z_ARRVAL_P(cookie_array);

 smart_str_appendl(&impl_cookies, Z_STRVAL_P(zval_cookies), Z_STRLEN_P(zval_cookies));

 for(zend_hash_internal_pointer_reset(cookies_ht);
  zend_hash_has_more_elements(cookies_ht) == SUCCESS;
  zend_hash_move_forward(cookies_ht))
 {
  zval **value;
  char *key;
  uint key_len;
  ulong idx;

  if(zend_hash_get_current_key_ex(cookies_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
   continue;
  }

  if(zend_hash_get_current_data(cookies_ht, (void**)&value) == FAILURE){
   continue;
  }

  convert_to_string(*value);

  if(Z_TYPE_PP(value) != IS_STRING){
   continue;
  }

  smart_str_appendl(&impl_cookies, key, key_len-1);
  smart_str_appendl(&impl_cookies, "=", 1);
  smart_str_appendl(&impl_cookies, Z_STRVAL_PP(value), Z_STRLEN_PP(value));
  smart_str_appendl(&impl_cookies, ";", 1);
 }

 php_url_decode(impl_cookies.c, impl_cookies.len);
 smart_str_0(&impl_cookies);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("cookies"), impl_cookies.c, impl_cookies.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setHeader){
 zval *headers, *value=NULL, *send_headers;
 HashTable *headers_ht;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z|z", &headers) == FAILURE){
  RETURN_FALSE;
 }

 send_headers = zend_read_property(FETCH_THIS, ZEND_STRL("send_headers"), 0 TSRMLS_CC);

 if(Z_TYPE_P(send_headers) == IS_NULL){
  MAKE_STD_ZVAL(send_headers);
  array_init(send_headers);
 }

 headers_ht = Z_ARRVAL_P(send_headers);

 if(Z_TYPE_P(headers) == IS_ARRAY){
  for(zend_hash_internal_pointer_reset(Z_ARRVAL_P(headers));
   zend_hash_has_more_elements(Z_ARRVAL_P(headers)) == SUCCESS;
   zend_hash_move_forward(Z_ARRVAL_P(headers))){
   char* key;
   uint key_len;
   ulong idx;
   zval **tmpzval;

   if(zend_hash_get_current_key_ex(Z_ARRVAL_P(headers), &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
    continue;
   }

   if(zend_hash_get_current_data(Z_ARRVAL_P(headers), (void**)&tmpzval) == FAILURE){
    continue;
   }

   add_assoc_stringl(send_headers, key, Z_STRVAL_PP(tmpzval), Z_STRLEN_PP(tmpzval), 1);
  }
 }else if(Z_TYPE_P(headers) == IS_STRING && Z_TYPE_P(value) == IS_STRING){
  add_assoc_stringl(send_headers, Z_STRVAL_P(headers), Z_STRVAL_P(value), Z_STRLEN_P(value), 1);
 }else{
  zend_error(E_WARNING, "param error.");
 }

 zend_update_property(FETCH_THIS, ZEND_STRL("send_headers"), send_headers);
}

ZEND_METHOD(fetch_url, setMethod){
 zval *zval_method;
 char *method;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &zval_method) == FAILURE){
  RETURN_FALSE;
 }

 if(Z_TYPE_P(zval_method) != IS_STRING){
  RETURN_FALSE;
 }

 method = php_strtolower(Z_STRVAL_P(zval_method), Z_STRLEN_P(zval_method));

 if(strcmp(method, "get") == 0){
  zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("get") TSRMLS_CC);
 }else if(strcmp(method, "post") == 0){
  zend_update_property_stringl(FETCH_THIS, ZEND_STRL("method"), ZEND_STRL("post") TSRMLS_CC);
 }else{
  FETCH_ERROR("Not support method.", 404);
  RETURN_FALSE;
 }

 RETURN_TRUE;
}

ZEND_METHOD(fetch_url, setPostData){
 zval *data, *post_data;
 HashTable *post_data_ht;
 smart_str temp = {0};

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &post_data) == FAILURE){
  RETURN_FALSE;
 }

 data = zend_read_property(FETCH_THIS, ZEND_STRL("data"), 0 TSRMLS_CC);

 if(Z_TYPE_P(post_data) != IS_ARRAY){
  zend_error(E_WARNING, "post data must be array.");
  RETURN_FALSE;
 }

 post_data_ht = Z_ARRVAL_P(post_data);
 smart_str_appendl(&temp, Z_STRVAL_P(data), Z_STRLEN_P(data));

 for(zend_hash_internal_pointer_reset(post_data_ht);
  zend_hash_has_more_elements(post_data_ht) == SUCCESS;
  zend_hash_move_forward(post_data_ht)){
  zval **current_data;
  char *key;
  uint key_len;
  ulong idx;

  if(zend_hash_get_current_key_ex(post_data_ht, &key, &key_len, &idx, 0, NULL) != HASH_KEY_IS_STRING){
   continue;
  }

  if(zend_hash_get_current_data(post_data_ht, (void**)¤t_data) == FAILURE){
   continue;
  }

  convert_to_string(*current_data);

  smart_str_appendl(&temp, key, key_len-1);
  smart_str_appendc(&temp, '=');
  smart_str_appendl(&temp, Z_STRVAL_PP(current_data), Z_STRLEN_PP(current_data));
  smart_str_appendc(&temp, '&');
 }

 smart_str_0(&temp);
 zend_update_property_stringl(FETCH_THIS, ZEND_STRL("data"), temp.c, temp.len TSRMLS_CC);
}

ZEND_METHOD(fetch_url, setReadTimeout){
 zval *read_timeout;

 if(zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "z", &read_timeout) == FAILURE){
  RETURN_FALSE;
 }

 if(Z_TYPE_P(read_timeout) != IS_LONG){
  zend_error(E_WARNING, "readtimeout must be integer.");
  RETURN_FALSE;
 }

 zend_update_property_long(FETCH_THIS, ZEND_STRL("read_timeout"), Z_LVAL_P(read_timeout) TSRMLS_CC);
}

ZEND_METHOD(fetch_url, __destruct){}

static zend_function_entry fetch_url_method[] = {
 ZEND_ME(fetch_url, __construct, void_arginfo, ZEND_ACC_CTOR|ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setBinary, setBinary_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setFile, setFile_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, body, void_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, clean, void_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, errmsg, void_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, errcode, void_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, fetch, fetch_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, httpCode, void_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, responseCookies, responseCookies_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, responseHeaders, responseHeaders_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setAllowRedirect, setAllowRedirect_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setConnectTimeout, setConnectTimeout_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setCookie, setCookie_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setCookies, setCookies_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setHeader, setHeader_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setMethod, setMethod_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setPostData, setPostData, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, setReadTimeout, setReadTimeout_arginfo, ZEND_ACC_PUBLIC)
 ZEND_ME(fetch_url, __destruct, void_arginfo, ZEND_ACC_DTOR|ZEND_ACC_PUBLIC)
 {NULL, NULL, NULL}
};
/* If you declare any globals in php_fetch_url.h uncomment this:
ZEND_DECLARE_MODULE_GLOBALS(fetch_url)
*/

/* True global resources - no need for thread safety here */
static int le_fetch_url;

/* {{{ fetch_url_functions[]
 *
 * Every user visible function must have an entry in fetch_url_functions[].
 */
const zend_function_entry fetch_url_functions[] = {
 PHP_FE_END /* Must be the last line in fetch_url_functions[] */
};
/* }}} */

/* {{{ fetch_url_module_entry
 */
zend_module_entry fetch_url_module_entry = {
#if ZEND_MODULE_API_NO >= 20010901
 STANDARD_MODULE_HEADER,
#endif
 "fetch_url",
 fetch_url_functions,
 PHP_MINIT(fetch_url),
 PHP_MSHUTDOWN(fetch_url),
 PHP_RINIT(fetch_url),  /* Replace with NULL if there's nothing to do at request start */
 PHP_RSHUTDOWN(fetch_url), /* Replace with NULL if there's nothing to do at request end */
 PHP_MINFO(fetch_url),
#if ZEND_MODULE_API_NO >= 20010901
 "0.1", /* Replace with version number for your extension */
#endif
 STANDARD_MODULE_PROPERTIES
};
/* }}} */

#ifdef COMPILE_DL_FETCH_URL
ZEND_GET_MODULE(fetch_url)
#endif

/* {{{ PHP_INI
 */
/* Remove comments and fill if you need to have entries in php.ini
PHP_INI_BEGIN()
    STD_PHP_INI_ENTRY("fetch_url.global_value",      "42", PHP_INI_ALL, OnUpdateLong, global_value, zend_fetch_url_globals, fetch_url_globals)
    STD_PHP_INI_ENTRY("fetch_url.global_string", "foobar", PHP_INI_ALL, OnUpdateString, global_string, zend_fetch_url_globals, fetch_url_globals)
PHP_INI_END()
*/
/* }}} */

/* {{{ php_fetch_url_init_globals
 */
/* Uncomment this function if you have INI entries
static void php_fetch_url_init_globals(zend_fetch_url_globals *fetch_url_globals)
{
 fetch_url_globals->global_value = 0;
 fetch_url_globals->global_string = NULL;
}
*/
/* }}} */

/* {{{ PHP_MINIT_FUNCTION
 */
PHP_MINIT_FUNCTION(fetch_url)
{
 /* If you have INI entries, uncomment these lines
 REGISTER_INI_ENTRIES();
 */
 zend_class_entry fetch_ce;
 INIT_CLASS_ENTRY(fetch_ce, FETCH_CLASS_NAME, fetch_url_method);

 g_fetch_ce = zend_register_internal_class(&fetch_ce TSRMLS_CC);
 zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("body"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_null(g_fetch_ce, ZEND_STRL("errmsg"), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_long(g_fetch_ce, ZEND_STRL("errno"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_null(g_fetch_ce, ZEND_STRL("httpCode"), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("cookies"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("headers"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_null(g_fetch_ce, ZEND_STRL("send_headers"), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_long(g_fetch_ce, ZEND_STRL("allow_redirect"), 1, ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_long(g_fetch_ce, ZEND_STRL("connect_timeout"), 5, ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("method"), ZEND_STRL("get"), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_long(g_fetch_ce, ZEND_STRL("multilpart"), 0, ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_stringl(g_fetch_ce, ZEND_STRL("data"), ZEND_STRL(""), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_long(g_fetch_ce, ZEND_STRL("read_timeout"), 60, ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_null(g_fetch_ce, ZEND_STRL("binary_data"), ZEND_ACC_PROTECTED TSRMLS_CC);
 zend_declare_property_null(g_fetch_ce, ZEND_STRL("upload_filepaths"), ZEND_ACC_PROTECTED TSRMLS_CC);
 return SUCCESS;
}
/* }}} */

/* {{{ PHP_MSHUTDOWN_FUNCTION
 */
PHP_MSHUTDOWN_FUNCTION(fetch_url)
{
 /* uncomment this line if you have INI entries
 UNREGISTER_INI_ENTRIES();
 */
 return SUCCESS;
}
/* }}} */

/* Remove if there's nothing to do at request start */
/* {{{ PHP_RINIT_FUNCTION
 */
PHP_RINIT_FUNCTION(fetch_url)
{
 return SUCCESS;
}
/* }}} */

/* Remove if there's nothing to do at request end */
/* {{{ PHP_RSHUTDOWN_FUNCTION
 */
PHP_RSHUTDOWN_FUNCTION(fetch_url)
{
 return SUCCESS;
}
/* }}} */

/* {{{ PHP_MINFO_FUNCTION
 */
PHP_MINFO_FUNCTION(fetch_url)
{
 php_info_print_table_start();
 php_info_print_table_header(2, "fetch_url support", "enabled");
 php_info_print_table_end();

 /* Remove comments if you have entries in php.ini
 DISPLAY_INI_ENTRIES();
 */
}
/* }}} */


源码下载地址:http://l9.yunpan.cn/lk/QEcqErTnKnHIy

延伸 · 阅读

精彩推荐