最近写了一个博客类的asp.net程序,一开始没有什么内容好往里面写的,就写了一个QQ空间的抓取程序先抓取QQ空间里的文章充数
下面是Dome的代码
using HefangKeji;
using System.Text.RegularExpressions;
using System.Web;
namespace Itjingpinshare.temp
{
/// <summary>
/// Handler1 的摘要说明
/// </summary>
public class Handler1 : IHttpHandler
{
public void ProcessRequest(HttpContext context)
{
context.Response.ContentType = "text/html";
//读取QQ空间的文章列表页的源码
string html = NetHelper.getStrformUrl("http://m.qzone.com/profile?hostuin=2770710388&no_topbar=0&srctype=10&stat=&g_f=2000000209#mine?res_uin=2770710388&ticket=", "utf-8");
string patternLink = "\"(\\d)\":\"(http://user.qzone.qq.com/\\d{6,12}/blog/\\d{10})\"";
string patternList = "<strong class=\"entry-title\">(.*?)</strong>(.*?)</p>";
//匹配文章连接
MatchCollection ms = Regex.Matches(html, patternLink);
//匹配文章列表
MatchCollection ms2 = Regex.Matches(html, patternList);
html = "";
for (int i = 0; i < ms2.Count; i++)
{
//因为是个dome程序,就直接用for循环拼接字符串了
html += "<a href='" + ms[i * 2].Groups[2] + "'><h1>" + ms2[i].Groups[1] + "</h1></a>";
html += "<div>" + ms2[i].Groups[2] + "</div>";
}
//把结果Write给客户端浏览器
context.Response.Write(html);
}
public bool IsReusable
{
get
{
return false;
}
}
}
}
访问结果如下:
楼主还是个学生,最近才学的C#和正则表达式,写的不好,请各位大神帮我完善一下