From:http://www.pulog.org/Resources/198/PHP-spider/
代码是PHP写的,目的是从milw0rm抓取最新的exploit。虽然现在milw0rm已经停止更新了,但只要
把这个程序改改就可以抓其他exploit站点的数据了,如exploit-db。先收藏在这里,有时间改称py的。
<?php
/***************************************
Milw0rm Spider V1.0
Downloads all exploits on milw0rm.com
into ./exploits/title.txt
Coded by Affix <affix@FedoraProject.org>
***************************************/
class milw0rm {
var $error;
function split_tag($haystack,$start,$end) {
if (strpos($haystack,$start) === false || strpos($haystack,$end) === false) {
return false;
} else {
$start_position = strpos($haystack,$start)+strlen($start);
$end_position = strpos($haystack,$end);
return substr($haystack,$start_position,$end_position-$start_position);
}
}
function save_page($id) {
$page = file_get_contents("http://milw0rm.com/exploits/" . $id);
$rnd = rand(10,20);
print "Sleeping for $rnd Seconds<br>";
sleep($rnd);
$data = $this->split_tag($page, "<pre>", "</pre>");
$name = $this->split_tag($page, "<title>", "</title>");
$data = str_replace("milw0rm.com", "Affix' Exploit Spider", $data);
$name = str_replace("/", "-", $name);
if($name != "") {
$fn = "./exploits/" . $name . ".txt";
$fh = fopen($fn, "w");
if(fwrite($fh, $data)) {
return $name;
} else {
print "<font color=red>" . Mysql_error() . "</font><br />";
$error = "File Write Error!";
return false;
}
} else {
$error = "No Content";
return false;
}
}
function start($from=1, $to=20) {
print "<div align=center><h1>Milw0rm.com Exploit Spider v1.0 Coded By Affix</h1></div><br>";
while($from <= $to) {
print "Trying to get exploit #$from of $to<br>";
$file = $this->save_page($from);
if($file) {
print "<font color=green><strong>$file Written to $name!</font></strong><br>";
} else {
print "<font color=red>#$from Skipped Due to Error : $error</font><br>";
$from++;
}
$from++;
}
}
}
$milw0rm = new milw0rm;
$milw0rm->start(1, 6941);
?>