1.站点地图构建:
1 /// <summary> 2 /// 实现XML格式 站点地图输出 3 /// </summary> 4 public class SitemapResult : ActionResult 5 { 6 public SitemapResult(Sitemap sitemap) 7 { 8 this.Sitemap = sitemap; 9 } 10 11 public Sitemap Sitemap { get; private set; } 12 13 public override void ExecuteResult(ControllerContext context) 14 { 15 context.HttpContext.Response.ContentType = "text/xml"; 16 XmlSerializer serializer = new XmlSerializer(typeof(Sitemap)); 17 serializer.Serialize(context.HttpContext.Response.Output,this.Sitemap); 18 } 19 } 20 21 /// <summary> 22 /// 站点地图实体 23 /// </summary> 24 [XmlRoot(ElementName = "urlset", Namespace = "http://www.sitemaps.org/schemas/sitemap/0.9")] 25 [Serializable] 26 public class Sitemap : List<SitemapUrl> 27 { 28 [XmlInclude(typeof(SitemapUrl))] 29 public void Serialize(TextWriter writer) 30 { 31 XmlSerializer serializer = new XmlSerializer(typeof(Sitemap)); 32 XmlTextWriter xmlTextWriter = new XmlTextWriter(writer); 33 serializer.Serialize(xmlTextWriter,this); 34 } 35 } 36 37 [XmlRoot(ElementName = "url")] 38 [XmlType(TypeName = "url")] 39 [Serializable] 40 public class SitemapUrl 41 { 42 private DateTime lastModified; 43 44 [XmlElement(ElementName = "loc")] 45 public string Location { get; set; } 46 47 [XmlElement(ElementName = "lastmod")] 48 public string LastModified 49 { 50 get 51 { 52 if (DateTime.MinValue.Equals(this.lastModified)) 53 { 54 this.lastModified = DateTime.Now; 55 } 56 57 return this.lastModified.ToString("yyyy-MM-dd"); 58 } 59 set 60 { 61 this.lastModified = DateTime.Parse(value); 62 } 63 } 64 65 [XmlElement(ElementName = "changefreq")] 66 public ChangeFrequency ChangeFrequency { get; set; } 67 68 [XmlElement(ElementName = "priority")] 69 public double Priority { get; set; } 70 } 71 72 public enum ChangeFrequency 73 { 74 always, 75 hourly, 76 daily, 77 weekly, 78 monthly, 79 yearly, 80 never 81 }
Controller使用:
/// <summary> /// 站点地图 /// </summary> /// <param name="pageIndex"> 页面索引</param> /// <param name="subjectID"> 科目 </param> /// <returns></returns> public ActionResult Index(int subjectID = 0, int pageIndex = 1) { int total = 0; int pageSize=2000; var list = GetExamList(subjectID, pageIndex, pageSize,ref total); Sitemap site = new Sitemap(); var time= DateTime.Now.ToLongDateString() ; foreach (var p in list) { site.Add(new SitemapUrl() { ChangeFrequency = ChangeFrequency.weekly, LastModified = time, Location = string.Format("http://Socials/home/view/{0}?t={1}", p.ExaminationID , p.ExaminationTitle), Priority = 0.7 }); } return new SitemapResult(site); }
2.判断是否为搜索引擎:
/// <summary> /// 判断是否为搜索引擎访问 /// </summary> /// <param name="useragent"></param> /// <returns></returns> public static bool IsSearchEngine(string useragent) { bool engine = false; if (!string.IsNullOrEmpty(useragent)) { var SpiderKey = System.Configuration.ConfigurationManager.AppSettings["SpiderKey"].ToString(); // spiderkey=@"Googlebot|Feedfetcher-Google|Baiduspider|Yahoo/s*/!/s*Slurp|YodaoBot|Sosoimagespider|Sosospider|Sogou/s*Web/s*Sprider" System.Text.RegularExpressions.Regex rx = new System.Text.RegularExpressions.Regex(SpiderKey, RegexOptions.IgnoreCase | RegexOptions.Compiled); engine = rx.IsMatch(useragent); } return engine; } public ActionResult View() { bool engine = IsSearchEngine( this.HttpContext.Request.ServerVariables["Http_User_Agent"]); // 判断是否为搜索引擎 然后使用不同的试图呈现数据 if (engine) { return View("SearchView", entity); } else { return View(entity); } }
3. 检验是否成功:
修改浏览器 Http_User_Agent 值,以达到模拟搜索引擎爬虫浏览效果。
以火狐为例:
3.1地址栏键入:about:config 回车
3.2 设置:general.useragent.override–>"Baiduspider" 。可以达到模拟百度爬虫效果。
火符默认:Mozilla/5.0 (Windows; U; Windows NT 6.0; zh-CN) AppleWebKit/525.19 (KHTML, like Gecko) Chrome/0.4.154.29 Safari/525.19
设置完成后可以通过浏览http://www.XXX.com/p-259119935.html 此网址 于其他未设置的浏览器比较 浏览页面效果。