PHP从网页中提取关键字等经典代码片段 <转>

最新推荐文章于 2021-03-17 07:31:10 发布

Jiaobengg

最新推荐文章于 2021-03-17 07:31:10 发布

阅读量1k

点赞数

分类专栏： php web 文章标签： php

本文链接：https://blog.csdn.net/jiaobeng/article/details/14128705

版权

web 同时被 2 个专栏收录

2 篇文章 0 订阅

订阅专栏

php

1 篇文章 0 订阅

订阅专栏

查看邮件是否已被阅读

<?
error_reporting(0);
Header("Content-Type: image/jpeg");
 
//Get IP
if (!empty($_SERVER['HTTP_CLIENT_IP']))
{
  $ip=$_SERVER['HTTP_CLIENT_IP'];
}
elseif (!empty($_SERVER['HTTP_X_FORWARDED_FOR']))
{
  $ip=$_SERVER['HTTP_X_FORWARDED_FOR'];
}
else
{
  $ip=$_SERVER['REMOTE_ADDR'];
}
 
//Time
$actual_time = time();
$actual_day = date('Y.m.d', $actual_time);
$actual_day_chart = date('d/m/y', $actual_time);
$actual_hour = date('H:i:s', $actual_time);
 
//GET Browser
$browser = $_SERVER['HTTP_USER_AGENT'];
     
//LOG
$myFile = "log.txt";
$fh = fopen($myFile, 'a+');
$stringData = $actual_day . ' ' . $actual_hour . ' ' . $ip . ' ' . $browser . ' ' . "\r\n";
fwrite($fh, $stringData);
fclose($fh);
 
//Generate Image (Es. dimesion is 1x1)
$newimage = ImageCreate(1,1);
$grigio = ImageColorAllocate($newimage,255,255,255);
ImageJPEG($newimage);
ImageDestroy($newimage);
     
?>

从网页中提取关键字

$meta = get_meta_tags('http://www.emoticode.net/');
$keywords = $meta['keywords'];
// Split keywords
$keywords = explode(',', $keywords );
// Trim them
$keywords = array_map( 'trim', $keywords );
// Remove empty values
$keywords = array_filter( $keywords );
 
print_r( $keywords );

查找页面上的所有链接

$html = file_get_contents('http://www.example.com');
 
$dom = new DOMDocument();
@$dom->loadHTML($html);
 
// grab all the on the page
$xpath = new DOMXPath($dom);
$hrefs = $xpath->evaluate("/html/body//a");
 
for ($i = 0; $i < $hrefs->length; $i++) {
       $href = $hrefs->item($i);
       $url = $href->getAttribute('href');
       echo $url.'<br />';
}

自动转换URL，跳转至超链接

function _make_url_clickable_cb($matches) {
    $ret = '';
    $url = $matches[2];
  
    if ( empty($url) )
        return $matches[0];
    
// removed trailing [.,;:] from URL
    if ( in_array(substr($url, -1), array('.', ',', ';', ':')) === true ) {
        $ret = substr($url, -1);
        $url = substr($url, 0, strlen($url)-1);
    }
    return $matches[1] . "<a href=\"$url\" rel=\"nofollow\">$url</a>" . $ret;
}
  
function _make_web_ftp_clickable_cb($matches) {
    $ret = '';
    $dest = $matches[2];
    $dest = 'http://' . $dest;
  
    if ( empty($dest) )
        return $matches[0];
    
// removed trailing [,;:] from URL
    if ( in_array(substr($dest, -1), array('.', ',', ';', ':')) === true ) {
        $ret = substr($dest, -1);
        $dest = substr($dest, 0, strlen($dest)-1);
    }
    return $matches[1] . "<a href=\"$dest\" rel=\"nofollow\">$dest</a>" . $ret;
}
  
function _make_email_clickable_cb($matches) {
    $email = $matches[2] . '@' . $matches[3];
    return $matches[1] . "<a href=\"mailto:$email\">$email</a>";
}
  
function make_clickable($ret) {
    $ret = ' ' . $ret;
    
// in testing, using arrays here was found to be faster
    $ret = preg_replace_callback('#([\s>])([\w]+?://[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]*)#is', '_make_url_clickable_cb', $ret);
    $ret = preg_replace_callback('#([\s>])((www|ftp)\.[\w\\x80-\\xff\#$%&~/.\-;:=,?@\[\]+]*)#is', '_make_web_ftp_clickable_cb', $ret);
    $ret = preg_replace_callback('#([\s>])([.0-9a-z_+-]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,})#i', '_make_email_clickable_cb', $ret);
  
    
// this one is not in an array because we need it to run last, for cleanup of accidental links within links
    $ret = preg_replace("#(<a( [^>]+?>|>))<a [^>]+?>([^>]+?)</a></a>#i", "$1$3</a>", $ret);
    $ret = trim($ret);
    return $ret;
}

创建数据URL

function data_uri($file, $mime) {

$contents=file_get_contents($file);

$base64=base64_encode($contents);

echo "data:$mime;base64,$base64" ;

}

从服务器上下载&保存一个远程图片

$image = file_get_contents('http://www.url.com/image.jpg');
file_put_contents('/images/image.jpg', $image);
//Where to save the image

移除Remove Microsoft Word HTML Tag

function cleanHTML($html) {
/// <summary>
/// Removes all FONT and SPAN tags, and all Class and Style attributes.
/// Designed to get rid of non-standard Microsoft Word HTML tags.
/// </summary>
// start by completely removing all unwanted tags
 
$html = ereg_replace("<(/)?(font|span|del|ins)[^>]*>","",$html);
 
// then run another pass over the html (twice), removing unwanted attributes
 
$html = ereg_replace("<([^>]*)(class|lang|style|size|face)=("[^"]*"|'[^']*'|[^>]+)([^>]*)>","<\1>",$html);
$html = ereg_replace("<([^>]*)(class|lang|style|size|face)=("[^"]*"|'[^']*'|[^>]+)([^>]*)>","<\1>",$html);
 
return $html
}

检测浏览器语言

function get_client_language($availableLanguages, $default='en'){
    if (isset($_SERVER['HTTP_ACCEPT_LANGUAGE'])) {
        $langs=explode(',',$_SERVER['HTTP_ACCEPT_LANGUAGE']);
 
        foreach ($langs as $value){
            $choice=substr($value,0,2);
            if(in_array($choice, $availableLanguages)){
                return $choice;
            }
        }
    }
    return $default;
}

显示Facebook 粉丝数量

<?php
    $page_id = "YOUR PAGE-ID";
    $xml = @simplexml_load_file("http://api.facebook.com/restserver.php?method=facebook.fql.query&query=SELECT%20fan_count%20FROM%20page%20WHERE%20page_id=".$page_id."") or die ("a lot");
    $fans = $xml->page->fan_count;
    echo $fans;
?>