怕丢了,说不定以后会用到,存在这里。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;
using System.IO;
using System.Web;
namespace RegexPractice
{
#region Get reserch result
/// <summary>
/// 访问google或者百度主页,输入查询关键字,保存查询结果
/// </summary>
class Program
{
static void Main(string[] args)
{
string[] queryList = { "张柏芝", "黎姿", "梁洛施", "大s", "李小冉", "董洁" };
using (StreamWriter sw = new StreamWriter(@"c:\abc.tsv", false))
{
foreach (string query in queryList)
{
string encodedQuery = HttpUtility.UrlEncode(query, Encoding.GetEncoding("utf-8"));
string url = string.Format("http://www.google.com.hk/search?hl=zh-CN&q={0}", encodedQuery);
HttpWebRequest request = HttpWebRequest.Create(new Uri(url)) as HttpWebRequest;
request.CookieContainer = new CookieContainer();
string webContent = string.Empty;
WebResponse response = request.GetResponse();
using (Stream responseStream = response.GetResponseStream())
{
using (StreamReader sr = new StreamReader(responseStream, Encoding.GetEncoding("gb2312")))
{
webContent = sr.ReadToEnd();
}
}
//Regex regex = new Regex("(?s)<table cellpadding=\"0\" cellspacing=\"0\" class=\"result\" id=\"(?<id>.*?)\">.*?target=\"_blank\">(?<title>.*?)</a>");
//Regex regex = new Regex("(?s)<li class=g>.*?class=l>(?<title>.*?)</a>");
List<string> titleList = new List<string>();
List<string> linkList = new List<string>();
Regex regex = new Regex("(?s)<li class=g(?<type>.*?)>.*?<h3.*?href=\"(?<link>.*?)\".*?>(?<title>.*?)</a>");
MatchCollection mc = regex.Matches(webContent);
foreach (Match m in mc)
{
if ((!m.Groups["type"].Value.Contains("id=newsbox")) && (!m.Groups["type"].Value.Contains("id=imagebox")))
{
string title = m.Groups["title"].Value;
string link = m.Groups["link"].Value;
Regex emRegex = new Regex("</?em>");
title = emRegex.Replace(title, "");
titleList.Add(title);
linkList.Add(link);
}
}
for (int i = 0; i < linkList.Count; i++)
{
sw.WriteLine(string.Format("{0}\t{1}\t{2}\t{3}", query, i.ToString(), titleList[i], linkList[i]));
}
}
}
}
}