google sitemap 制作工具

Google SiteMap简介:

  Google推出的Sitemap,是对原来robots.txt的扩展,它使用XML格式来记录整个网站的信息并供Google读取,使搜索引擎能更快更全面的收录网站的内容。Sitemap的作用就好像为网站提供了整站的RSS,而Google就是这些RSS的订阅者,只要网站有更新就会自动通知Google。这样一来,搜索引擎的收录由被动的Pull变成了主动的Push,辛苦的Google爬虫们终于可以松一口气了。
  不过就目前来说,Google Sitemap还不是一个能让每个站长都方便使用的东西。其要求的XML格式虽不是很复杂,但要手工制作还是需要费不少功夫。虽然Google提供了Sitemap自动生成器,但目前只有python语言的版本, 能用上的也是少数,这个在实践中是得到了证实的,我看了N天都是一头雾水。不过Google Sitemap是按照创作共用协议发布的,Sitemap生成器也是开源的,所以相关的工具也很快出现了,我在网上就曾找到针对L-BLOG(此工具你可以参阅本博客直接生成XML的Google SiteMap代码ASP 4 LBLOG版本一文)、O-BLOG、DVBBS自动生成Sitemap的工具,只要简单的修改,各位站长和Blogger们就可以方便的使用这项服务了。
  简单点说,就是你以XML的格式向Google提交一个站点地图,以后google就会根据这个地图,阶段性地抓取该地图指出的页面。抱怨google收入页面太少的朋友不妨一试。
  以下是我找到的两个版本(ASP和PHP版)的网站SiteMap.xml生成工具,我现在用的就是以下ASP版的,经过测试了,我网站缘聚杭州生成的SiteMap文件:http://www.coosuo.com/SiteMap.xml,而下面的PHP版的代码,由于不是很懂,都没有测试过,大家可以试试如有错误,只有敬请谅解了!

ASP版代码:
程序代码<%
Server.ScriptTimeout=50000
' sitemap_gen.asp
' A simple script to automatically produce sitemaps for a webserver, in the Google Sitemap Protocol (GSP)
' by Francesco Passantino
' www.iteam5.net/francesco/sitemap
' v0.2 released 5 june 2005 (Listing a directory tree recursively improvement)
'
' BSD 2.0 license,
' http://www.opensource.org/licenses/bsd-license.php
' 收集整理:        重庆森林@im286.com
' 部分修改:    独人向晚QQ19433114


session("server")="http://www.coosuo.com"                '你的域名
vDir = "/"                                              '制作SiteMap的目录,相对目录(相对于根目录而言)


set objfso = CreateObject("Scripting.FileSystemObject")
root = Server.MapPath(vDir)

'response.ContentType = "text/xml"
'response.write "<?xml version='1.0' encoding='UTF-8'?>"
'response.write "<urlset xmlns='http://www.google.com/schemas/sitemap/0.84'>"

str = "<?xml version='1.0' encoding='UTF-8'?>" & vbcrlf
str = str & "<urlset xmlns='http://www.google.com/schemas/sitemap/0.84'>" & vbcrlf

Set objFolder = objFSO.GetFolder(root)
'response.write getfilelink(objFolder.Path,objFolder.dateLastModified)
Set colFiles = objFolder.Files
For Each objFile In colFiles
        'response.write getfilelink(objFile.Path,objfile.dateLastModified)
        str = str & getfilelink(objFile.Path,objfile.dateLastModified) & vbcrlf
Next
ShowSubFolders(objFolder)

'response.write "</urlset>"
str = str & "</urlset>" & vbcrlf
set fso = nothing

Set objStream = Server.CreateObject("ADODB.Stream")
    With objStream
    '.Type = adTypeText
    '.Mode = adModeReadWrite
    .Open
    .Charset = "utf-8"
    .Position = objStream.Size
    .WriteText=str
    .SaveToFile server.mappath("/sitemap.xml"),2 '生成的XML文件名
    .Close
    End With

  Set objStream = Nothing
  If Not Err Then
    Response.Write("<script>alert('成功生成站点地图!');history.back();</script>")
    Response.End
  End If

Sub ShowSubFolders(objFolder)
        Set colFolders = objFolder.SubFolders
        For Each objSubFolder In colFolders
                if folderpermission(objSubFolder.Path) then
                        'response.write getfilelink(objSubFolder.Path,objSubFolder.dateLastModified)
                        str = str & getfilelink(objSubFolder.Path,objSubFolder.dateLastModified) & vbcrlf
                        Set colFiles = objSubFolder.Files
                        For Each objFile In colFiles
                                'response.write getfilelink(objFile.Path,objFile.dateLastModified)
                                str = str & getfilelink(objFile.Path,objFile.dateLastModified) & vbcrlf
                        Next
                        ShowSubFolders(objSubFolder)
                end if
        Next
End Sub


Function getfilelink(file,datafile)
        file=replace(file,root,"",1,-1,1)
        file=replace(file,"/","/")
        If FileExtensionIsBad(file) then Exit Function
        if month(datafile)<10 then filedatem="0"
        if day(datafile)<10 then filedated="0"
        filedate=year(datafile)&"-"&filedatem&month(datafile)&"-"&filedated&day(datafile)
        getfilelink = "<url><loc>"&server.htmlencode(session("server")&file)&"</loc><lastmod>"&filedate&"</lastmod><changefreq>daily</changefreq><priority>1.0</priority></url>"
        Response.Flush
End Function


Function Folderpermission(pathName)

        '需要过滤的目录(不列在SiteMap里面)
        PathExclusion=Array("/blog","/temp","/_vti_cnf","_vti_pvt","_vti_log","cgi-bin","/admin","/edu")
        Folderpermission =True
        for each PathExcluded in PathExclusion
                if instr(ucase(pathName),ucase(PathExcluded))>0 then
                        Folderpermission = False
                        exit for
                end if
        next
End Function


Function FileExtensionIsBad(sFileName)
        Dim sFileExtension, bFileExtensionIsValid, sFileExt
        'modify for your file extension (http://www.googleguide.com/file_type.html)
        Extensions = Array("asp","png","jpeg","zip","pdf","ps","html","htm","php","wk1","wk2","wk3","wk4","wk5","wki","wks","wku","lwp","mw","xls","ppt","doc","wks","wps","wdb","wri","rtf","ans","txt")
'设置列表的文件名,扩展名不在其中的话SiteMap则不会收录该扩展名的文件

        if len(trim(sFileName)) = 0 then
                FileExtensionIsBad = true
                Exit Function
        end if

        sFileExtension = right(sFileName, len(sFileName) - instrrev(sFileName, "."))
        bFileExtensionIsValid = false        'assume extension is bad
        for each sFileExt in extensions
                if ucase(sFileExt) = ucase(sFileExtension) then
                        bFileExtensionIsValid = True
                        exit for
                end if
        next
        FileExtensionIsBad = not bFileExtensionIsValid
End Function
%>


PHP版代码:
程序代码<?php
header('Content-type: application/xml; charset="GB2312"',true);
?>
<!--
@author Tobias Kluge, enarion.net
@version 0.2, 2005-06-05 17:40 PT
@status working
@update Aditya Naik, so1o@so1o.net
@Licence: LGPL

editor:        MildSeven@im286.com

-->
<?php
$website = "http://my.xxxxx.com"; /* change this */
$page_root = "/usr/local/psa/home/vhosts/subdomains/my/httpdocs";        /* change this */

/* maybe change this: */
$changefreq = "weekly"; //"always", "hourly", "daily", "weekly", "monthly", "yearly" and "never".
$priority = 0.8;
/* this sets the last modification date of all pages to the current date */
$last_modification = date("Y-m-d/TH:i:s") . substr(date("O"),0,3) . ":" . substr(date("O"),3);

/* list of allowed directories */
$allow_dir[] = "web";

/* list of disallowed directories */
$disallow_dir[] = "admin";
$disallow_dir[] = "_notes";

/* list of disallowed file types */
$disallow_file[] = ".inc";
$disallow_file[] = ".old";
$disallow_file[] = ".save";
$disallow_file[] = ".txt";
$disallow_file[] = ".js";
$disallow_file[] = "~";
$disallow_file[] = ".LCK";
$disallow_file[] = ".zip";
$disallow_file[] = ".ZIP";
$disallow_file[] = ".CSV";
$disallow_file[] = ".csv";
$disallow_file[] = ".css";
$disallow_file[] = ".class";
$disallow_file[] = ".jar";
$disallow_file[] = ".mno";
$disallow_file[] = ".bak";
$disallow_file[] = ".lck";
$disallow_file[] = ".BAK";

/* simple compare function: equals */
function ar_contains($key, $array) {
  foreach ($array as $val) {
    if ($key == $val) {
        return true;
    }
  }
  return false;
}

/* better compare function: contains */
function fl_contains($key, $array) {
  foreach ($array as $val) {
    $pos = strpos($key, $val);
    if ($pos === FALSE) continue;
    return true;
  }

  return false;
}

/* this function changes a substring($old_offset) of each array element to $offset */
function changeOffset($array, $old_offset, $offset) {
  $res = array();
  foreach ($array as $val) {
    $res[] = str_replace($old_offset, $offset, $val);
  }
  return $res;
}

/* this walks recursivly through all directories starting at page_root and
  adds all files that fits the filter criterias */
// taken from Lasse Dalegaard, http://php.net/opendir
function getFiles($directory, $directory_orig = "", $directory_offset="") {
  global $disallow_dir, $disallow_file, $allow_dir;

  if ($directory_orig == "") $directory_orig = $directory;

  if($dir = opendir($directory)) {
      // Create an array for all files found
      $tmp = Array();

      // Add the files
      while($file = readdir($dir)) {
              // Make sure the file exists
              if($file != "." && $file != ".." && $file[0] != '.' ) {
              // If it's a directiry, list all files within it
              //echo "point1<br>";
            if(is_dir($directory . "/" . $file)) {
                  //echo "point2<br>";
                $disallowed_abs = fl_contains($directory."/".$file, $disallow_dir); // handle directories with pathes
                $disallowed = ar_contains($file, $disallow_dir); // handle directories only without pathes
                $allowed_abs = fl_contains($directory."/".$file, $allow_dir);
                $allowed = ar_contains($file, $allow_dir);
                if ($disallowed || $disallowed_abs) continue;
                if ($allowed_abs || $allowed){
                  $tmp2 = changeOffset(getFiles($directory . "/" . $file, $directory_orig, $directory_offset), $directory_orig, $directory_offset);
                  if(is_array($tmp2)) {
                      $tmp = array_merge($tmp, $tmp2);
                  }
                }
            } else {  // files
                if (fl_contains($file, $disallow_file)) continue;
                    array_push($tmp, str_replace($directory_orig, $directory_offset, $directory."/".$file));
                  }
              }
      }

      // Finish off the function
      closedir($dir);
      return $tmp;
  }
}

$a = getFiles($page_root);


echo '<?xml version="1.0" encoding="UTF-8"?>';
?>

<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">
<?
foreach ($a as $file) {
?>
  <url>
      <loc><? echo utf8_encode($website.$file); ?></loc>
      <lastmod><? echo utf8_encode(date("Y-m-d/TH:i:s", filectime($page_root.$file)). substr(date("O"),0,3) . ":" . substr(date("O"),3));?></lastmod>
      <changefreq><? echo utf8_encode($changefreq); ?></changefreq>
      <priority><? echo utf8_encode($priority); ?></priority>
  </url>
<?
}
?>
</urlset>

参数说明:
$website--你的域名
$page_root--你的站点的绝对路径
$changefreq--更新时间单位 "always"(始终), "hourly"(小时), "daily"(天), "weekly"(星期), "monthly"(月), "yearly" "never"(从不)
$priority--更新频率
$allow_dir--允许列表的目录
$disallow_dir--禁止列表的目录
$disallow_file--禁止列表的文件类型

制作过程说明:
  将以上代码复制到本地存为sitemap.asp或sitemap.php,上传到你的服务器网站的根目录下,运行一下就可以在你指定的目录中(建议最好是指定为根目录)自动生成一个sitemap.xml文件了。最后进入google提交页面:https://www.google.com/webmasters/sitemaps/login,有gmail的可以用gmail直接登录。登录后把生成的xml文件地址按步骤提交就可以了。注:你的网站如果增加了新的页面,可以在增加完后再运行一下sitemap.asp或sitemap.php,并再重新提交一下sitemap.xml文件,这样就可以保证sitemap.xml文件的不断更新了! 
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值