/**
* 采集入库函数
* 优酷,土豆,酷6 采集 (自动获取视频缩略图,视频swf地址,视频标题)
* by hkshadow
* QQ 2765237
* dete: 2011-06-25 AM 02:32
* edit: 2011-06-25 PM 17:38
*/
function CaptureVideo($link, $host) {
$return = array ();
if ('youku.com' == $host) {
header ( "Content-Type:text/html; charset=utf-8" ); //优酷是utf-8编码,只为测试显示正常,可自行删除
preg_match_all ( "/id\_(\w+)[\=|.html]/", $link, $matches );
if (! empty ( $matches [1] [0] )) {
$return ['flashvar'] = $matches [1] [0];
}
$text = file_get_contents ( $link );
preg_match ( "/
(.*?) - (.*)/", $text, $title );preg_match_all ( '/
/', $text, $match2 );preg_match ( '/http:\/\/g(.*)\.ykimg.com\/(.*)\|"\>/', $match2 [1] [0], $imageurl );
if (! empty ( $imageurl [1] )) {
$return ['imageurl'] = "http://g" . $imageurl [1] . ".ykimg.com/" . $imageurl [2];
}
preg_match ( '/embed src=\"(.*)\/v.swf/', $text, $vidurls );
if (! empty ( $vidurls [1] )) {
$return ['vidurl'] = $vidurls [1];
}
if (! empty ( $title )) {
$return ['title'] = $title [1];
}
} elseif ('ku6.com' == $host) {
header ( "Content-Type:text/html; charset=gbk" ); //酷6是gbk编码,只为测试显示正常,可自行删除
$text = file_get_contents ( $link );
preg_match_all ( "/\/([\w\-]+)\.html/", $link, $matches );
if (1 > preg_match ( "/\/index_([\w\-]+)\.html/", $link ) && ! empty ( $matches [1] [0] )) {
$return ['flashvar'] = $matches [1] [0];
} else {
preg_match_all ( "/refer\/(.*)\/v.swf/", $text, $videourl );
$return ['flashvar'] = $videourl [1] [0];
}
preg_match ( '/http\:(.*)\/v.swf/', $text, $vidurls );
if (! empty ( $vidurls [0] )) {
$return ['vidurl'] = $vidurls [0];
}
preg_match ( "/\"title\" content=\"(.*)\"\/>/", $text, $title );
preg_match_all ( '/(.*)/', $text, $imageurl );
if (! empty ( $imageurl [1] [0] )) {
$return ['imageurl'] = $imageurl [1] [0];
}
if (! empty ( $title[1] )) {
$return ['title'] = $title [1];
}
} elseif ('tudou.com' == $host) {
header ( "Content-Type:text/html; charset=gbk" ); //土豆是gbk编码,只为测试显示正常,可自行删除
$tudou = file_get_contents ( $link );
preg_match_all ( "/view\/([\w\-]+)\//", $tudou, $matches );
if (! empty ( $matches [1] [0] )) {
$return ['flashvar'] = $matches [1] [0];
}
preg_match ( "/
(.*?)_(.*)/", $tudou, $title );preg_match ( "/pic:\"(.*)\"/", $tudou, $imageurl );
preg_match ( "/,lid = (.*)/", $tudou, $vls );
preg_match ( '/,lid_code = lcode = (.*)/', $tudou, $tx );
$ntx = str_replace ( "'", "", $tx );
if (! empty ( $ntx [1] ) && ! empty ( $vls [1] )) {
$return ['vidurl'] = "http://www.tudou.com/l/" . $ntx [1] . "/&iid=" . $vls [1] . "/v.swf";
}
if (! empty ( $imageurl [1] )) {
$return ['imageurl'] = $imageurl [1];
}
if (! empty ( $title )) {
$return ['title'] = $title [1];
}
}
return $return;
}
Demo
//用法如下
//暂只做了土豆,优酷,酷6三种
//由于以上官方不定期变动html结构,如失效请修改相应正则
//by hkshadow 2011-06-25
$link = 'http://v.youku.com/v_show/id_XMjcxNjU0NjMy.html';
$host = "youku.com";
$text = CaptureVideo ( $link, $host );
print_r ( $text );
?>