using System; using System.Net; using System.Text; using System.Text.RegularExpressions; namespace web { public partial class dang : System.Web.UI.Page { protected void Page_Load(object sender, EventArgs e) { WebClient wc = new WebClient(); byte[] bt = wc.DownloadData(@"网址"); string res = Encoding.Default.GetString(bt); res = Regex.Replace(res, @"<!DOCTYPE .*?第1页", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"<!--页尾 开始 -->.*?</html>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"<div id=""divBottomPageNavi"".*?</div>.*?</div>.*?</div>.*?</div>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"</div><div class='list_r_title_text3a'>.*?list_r_line""></div>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"<div class=""clear"">.*?", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"<div class=""list_r_list"">.*?<h2>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"</h2>.*?</div>.*?</div>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res = Regex.Replace(res, @"<a name=""link_prd_name"" href='", ""); res = Regex.Replace(res, @"' target=""_blank"">", "ww"); res = Regex.Replace(res, @"ww.*?</a>", ""); res = Regex.Replace(res, "</div>", ";"); res = res.Substring(0, res.Length - 1).ToString(); string[] ress = res.Split(';'); for (int i = 0; i < ress.Length - 1; i++) { WebClient wc1 = new WebClient(); string ur = @"" + ress[i] + ""; byte[] bt1 = wc.DownloadData(ur); string res1 = Encoding.Default.GetString(bt1); res1 = Regex.Replace(res1, @"<!DOCTYPE .*?您最近的浏览历史", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res1 = Regex.Replace(res1, @"<h2 class=""black14"">.*?<!--价格购买区结束-->", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); res1 = Regex.Replace(res1, @"<a name=""review_point""></a>.*?</html>", string.Empty, RegexOptions.IgnoreCase | RegexOptions.Singleline); // Response.Write(res1); Regex chubantime = new Regex(@"<li>出版时间:.*?</li>"); if (chubantime.IsMatch(res1)) { string chubantime1 = chubantime.Match(res1).Value.ToString(); chubantime1 = Regex.Replace(chubantime1, "<li>出版时间:", ""); chubantime1 = Regex.Replace(chubantime1, "</li>", ""); Response.Write(chubantime1.Trim() + "<br>"); } Regex zishu = new Regex(@"<li>字 数:.*?</li>"); if (zishu.IsMatch(res1)) { string zishu1 = zishu.Match(res1).Value.ToString(); zishu1 = Regex.Replace(zishu1, "<li>字 数:", ""); zishu1 = Regex.Replace(zishu1, "</li>", ""); Response.Write(zishu1.Trim() + "<br>"); } Regex banci = new Regex(@"<li>版 次:.*?</li>"); if (banci.IsMatch(res1)) { string banci1 = banci.Match(res1).Value.ToString(); banci1 = Regex.Replace(banci1, "<li>版 次:", ""); banci1 = Regex.Replace(banci1, "</li>", ""); Response.Write(banci1.Trim() + "<br>"); } Regex yeshu = new Regex(@"<li>页 数:.*?</li>"); if (yeshu.IsMatch(res1)) { string yeshu1 = yeshu.Match(res1).Value.ToString(); yeshu1 = Regex.Replace(yeshu1, "<li>页 数:", ""); yeshu1 = Regex.Replace(yeshu1, "</li>", ""); Response.Write(yeshu1.Trim() + "<br>"); } Regex yshsj = new Regex(@"<li>印刷时间:.*?</li>"); if (yshsj.IsMatch(res1)) { string yshsj1 = yshsj.Match(res1).Value.ToString(); yshsj1 = Regex.Replace(yshsj1, "<li>印刷时间:", ""); yshsj1 = Regex.Replace(yshsj1, "</li>", ""); Response.Write(yshsj1.Trim() + "<br>"); } Regex kaiben = new Regex(@"<li>开 本:.*?</li>"); if (kaiben.IsMatch(res1)) { string kaiben1 = kaiben.Match(res1).Value.ToString(); kaiben1 = Regex.Replace(kaiben1, "<li>开 本:", ""); kaiben1 = Regex.Replace(kaiben1, "</li>", ""); Response.Write(kaiben1.Trim() + "<br>"); } Regex yinci = new Regex(@"<li>印 次:.*?</li>"); if (yinci.IsMatch(res1)) { string yinci1 = yinci.Match(res1).Value.ToString(); yinci1 = Regex.Replace(yinci1, "<li>印 次:", ""); yinci1 = Regex.Replace(yinci1, "</li>", ""); Response.Write(yinci1.Trim() + "<br>"); } Regex zhizhang = new Regex(@"<li纸 张:.*?</li>"); if (zhizhang.IsMatch(res1)) { string zhizhang1 = zhizhang.Match(res1).Value.ToString(); zhizhang1 = Regex.Replace(zhizhang1, "<li>纸 张:", ""); zhizhang1 = Regex.Replace(zhizhang1, "</li>", ""); Response.Write(zhizhang1.Trim() + "<br>"); } Regex isbnn = new Regex(@"<li>I S B N :.*?</li>"); if (isbnn.IsMatch(res1)) { string isbnn1 = isbnn.Match(res1).Value.ToString(); isbnn1 = Regex.Replace(isbnn1, "<li>I S B N :", ""); isbnn1 = Regex.Replace(isbnn1, "</li>", ""); Response.Write(isbnn1.Trim() + "<br>"); } Regex baozhuang = new Regex(@"<li>包 装:.*?</li>"); if (baozhuang.IsMatch(res1)) { string baozhuang1 = baozhuang.Match(res1).Value.ToString(); baozhuang1 = Regex.Replace(baozhuang1, "<li>包 装:", ""); baozhuang1 = Regex.Replace(baozhuang1, "</li>", ""); Response.Write(baozhuang1.Trim() + "<br>"); } Regex chubanshe = new Regex(@"<div id='publisher_'>出 版 社:.*?</div>"); if (chubanshe.IsMatch(res1)) { string chubanshe1 = chubanshe.Match(res1).Value.ToString(); chubanshe1 = Regex.Replace(chubanshe1, "<div id='publisher_'>出 版 社:", ""); chubanshe1 = Regex.Replace(chubanshe1, "</div>", ""); Response.Write(chubanshe1.Trim() + "<br>"); } Regex zuozhe = new Regex(@"<div id='author_' >作 者:.*?</div>"); if (zuozhe.IsMatch(res1)) { string zuozhe1 = zuozhe.Match(res1).Value.ToString(); zuozhe1 = Regex.Replace(zuozhe1, "<div id='author_' >作 者:", ""); zuozhe1 = Regex.Replace(zuozhe1, "</div>", ""); Response.Write(zuozhe1.Trim() + "<br>"); } Regex dingjia = new Regex(@"<span class=""gray87"">定价:<span class=""del"">.*?</span></span>"); if (dingjia.IsMatch(res1)) { string dingjia1 = dingjia.Match(res1).Value.ToString(); dingjia1 = Regex.Replace(dingjia1, @"<span class=""gray87"">定价:<span class=""del"">", ""); dingjia1 = Regex.Replace(dingjia1, "</span></span>", ""); Response.Write(dingjia1.Trim() + "<br>"); } Regex jiage = new Regex(@"<span class=""redc30"">价格:.*?</b></span>"); if (jiage.IsMatch(res1)) { string jiage1 = jiage.Match(res1).Value.ToString(); jiage1 = Regex.Replace(jiage1, @"<span class=""redc30"">价格:<b>", ""); jiage1 = Regex.Replace(jiage1, "</b></span>", ""); Response.Write(jiage1.Trim() + "<br>"); } Regex neirong = new Regex(@"内容简介</h2> <div class=""right_content"">.*?</div><div class=""dashed"">"); if (neirong.IsMatch(res1)) { string neirong1 = neirong.Match(res1).Value.ToString(); neirong1 = Regex.Replace(neirong1, @"内容简介</h2> <div class=""right_content"">", ""); neirong1 = Regex.Replace(neirong1, @"</div><div class=""dashed"">", ""); Response.Write(neirong1.Trim() + "<br>"); } Regex mulu = new Regex(@"目录</h2> <div class=""right_content"">.*?</div>"); if (mulu.IsMatch(res1)) { string mulu1 = mulu.Match(res1).Value.ToString(); mulu1 = Regex.Replace(mulu1, @"目录</h2> <div class=""right_content"">", ""); mulu1 = Regex.Replace(mulu1, "</div>", ""); Response.Write(mulu1.Trim() + "<br>"); } Regex phs = new Regex(@"<img src="".*?id=""img_show_prd""/>"); if (phs.IsMatch(res1)) { string phs1 = phs.Match(res1).Value.ToString(); phs1 = Regex.Replace(phs1, @"<img src=""", ""); phs1 = Regex.Replace(phs1, @""" id=""img_show_prd""/>", ""); //小图 string phsname = Regex.Replace(phs1,@"http.*?\.com/\d.*/\d.*/",string.Empty,RegexOptions.IgnoreCase|RegexOptions.Singleline); Response.Write("图片名"+phsname+"<br>"); string phbb = phs1.Substring(0, phs1.Length - 5) + "o.jpg"; //大图 //WebClient WCs = new WebClient(); //WebClient WCb = new WebClient(); //WCp.DownloadFile(@"http://www.XXX.com/img/XXX_logo.gif", Server.MapPath("XX.gif")); //WCs.DownloadFile(@"""phs1""",Server.MapPath("")); //WCs.DownloadFile(@"""phbb"""); Response.Write(phs1 + "<br>"); Response.Write(phbb + "<br>"); } //Regex phb = new Regex(@"<a class=""gray878787a"" href=""javascript:ImgBtnChgPrd_Click\(this,'(.*?)'\)"" name=""bigpicture"">点击查看大图</a></div>"); //if (phb.IsMatch(res1)) //{ // string phb1 = phs.Match(res1).Value.ToString(); // phb1 = Regex.Replace(phb1, @"this,'", ""); // phb1 = Regex.Replace(phb1, @"'\)"" name=""bigpicture"">点击查看大图</a></div>", ""); //大图 // Response.Write(phb1); //} } WebClient WCp = new WebClient(); WCp.DownloadFile(@"http://www.XXX.com/img/XXX_logo.gif", Server.MapPath("XXX.gif")); //这里是用WEBCLIENT保存图片 } } }
此处只列出了.CS文件