c#抓取页面,设置POST数组
2009-04-18 22:57
//这几天在做一个项目
//把这个拿出来分享一下 using System.IO; using System.Text; using System.Data; using System.Windows; using System.Windows.Forms; using System.Net; using System.Text.RegularExpressions;//包含必要的库 namespace WindowsApplication1 { class GetHtml { private string source; private string url; /// <summary> ///判断输入的是否是一个合法的 URl /// </summary> private bool Is_A_URL(string input) { return Regex.IsMatch(input, "^http(s)?://"); //这个判断比较简单 ,你可以自己构造正则表达式 } /// <summary> /// 提取网页源码的实现代码 /// </summary> private string GetSource(string webAddress) { string str = null; try { Encoding encoding = System.Text.Encoding.GetEncoding("gb2312"); string postData = "stype=" + "2" + "&keywordtype=" + "2" + "&keyword=" + "软件工程" + "&jobarea=" + "0802" + "&fromType=" + "1"; //POST数组赋值 byte[] POST = encoding.GetBytes(postData); HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create("http://search.51job.com/jobsearch/keyword_search.php"); myRequest.Method = "POST"; myRequest.ContentType = "application/x-www-form-urlencoded"; myRequest.ContentLength = POST.Length; Stream newStream = myRequest.GetRequestStream(); newStream.Write(POST , 0,POST.Length); //设置POST newStream.Close(); // 获取结果数据 HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse(); StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.Default ); str = reader.ReadToEnd(); } catch (WebException e) { MessageBox.Show(e.Message, "Error", MessageBoxButtons.OK); } return str; } public void GetPageSource() { if (Is_A_URL(url)) //判断网址是否合法 { url = url.ToLower(); source = GetSource(url); //获取网页内容 } else { MessageBox.Show("输入的网址不正确,请重新输入!"); } return; } public string seturl { set { url = value; } } public string getdata { get { return source ; } } }; } |