php curl get网页内容

5 11 月
function get_url_contents($url)
{
    $ch = curl_init();
    $timeout = 5;  
    curl_setopt ($ch, CURLOPT_URL,$url);
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);  // 遇到302自动跳转
    $file_contents = curl_exec($ch);
    curl_close($ch);
    return $file_contents;
}

遇到curl_setopt(): CURLOPT_FOLLOWLOCATION cannot be activated when in safe_mode or an open_basedir is set in 错误,可以还用下面的函数:

function get_url($url)//获得url地址的网页内容
{
    $ch = curl_init();
    $timeout = 5;  
    curl_setopt ($ch, CURLOPT_URL,$url);
    curl_setopt ($ch, CURLOPT_RETURNTRANSFER, 1); 
    curl_setopt ($ch, CURLOPT_CONNECTTIMEOUT, $timeout);
    $file_contents = curl_redir_exec($ch); // 用curl_redir_exec替代curl_exec
    curl_close($ch);
    return $file_contents;
}

/* 获取内容的长度 */
function get_content_length($str)
{
    $matches = array();
    preg_match('/Content-Length:(.*?)\n/', $str, $matches);
    $len = @trim(array_pop($matches));
    if (!$len) {
        $len = 0;
    }
    return (int)$len;
}

function curl_redir_exec($ch)
{
    static $curl_loops = 0;
    static $curl_max_loops = 20;  // 最大循环次数

    if ($curl_loops++ >= $curl_max_loops)
    {
        $curl_loops = 0;
        return FALSE;
    }
    curl_setopt($ch, CURLOPT_HEADER, true);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    $data = curl_exec($ch);
/* 分离header和content */
    $content_len = get_content_length($data);
    $header = substr($data, 0, strlen($data) - $content_len);
    $data = substr($data, strlen($header));

    $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
    if ($http_code == 301 || $http_code == 302) {
        $matches = array();
        preg_match('/Location:(.*?)\n/', $header, $matches);
        $url = @parse_url(trim(array_pop($matches)));
        if (!$url)
        {
            $curl_loops = 0;
            return $data;
        }
        $last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
        $new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . ($url['query']?'?'.$url['query']:'');
        curl_setopt($ch, CURLOPT_URL, $new_url);
        return curl_redir_exec($ch);
    } else {
        $curl_loops=0;
        return $data;
    }
}

 

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注