前台:<asp:TextBox ID="txtName" runat="server"></asp:TextBox>
<asp:Button ID="Button1" runat="server" Text="抓取" OnClick="Button1_Click" />
后台:
public static string GetHtmlStr(string url)
{
try
{
WebRequest rGet = WebRequest.Create(url);
WebResponse rSet = rGet.GetResponse();
Stream s = rSet.GetResponseStream();
StreamReader reader = new StreamReader(s, Encoding.UTF8);
return reader.ReadToEnd();
}
catch (WebException)
{
//连接失败
return null;
}
}
protected void Button1_Click(object sender, EventArgs e)
{
string name = txtName.Text.Trim();
string url = "https://www.ivsky.com/search.php?q=" + name;
string htmlstr = GetHtmlStr(url);
HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
doc.LoadHtml(htmlstr);
HtmlNode rootnode = doc.DocumentNode; //XPath路径表达式,这里表示选取所有span节点中的font最后一个子节点,其中span节点的class属性值为num
//根据网页的内容设置XPath路径表达式
//string xpathstring = "//ul[@class='clearfix']/li/a/img";
string xpathstring = "//ul[@class='pli']/li/div/a/img";
HtmlNodeCollection aa = rootnode.SelectNodes(xpathstring); //所有找到的节点都是一个集合
if (aa != null)
{
string sPath = @"C:\Users\Administrator\Desktop\JQuery练习4.14爬虫高德支付宝\Weeb\imagess" + "\\"; //imagess的文件夹
Directory.CreateDirectory(sPath);
for (int i = 0; i < aa.Count(); i++)
{
string innertext = aa[i].InnerText;
string src = aa[i].GetAttributeValue("src", "");
// string src = aa[i].GetAttributeValue("data-original", ""); //获取color属性,第二个参数为默认值
//其他属性大家自己尝试http://pic213.nipic.com/pic/20190418/25420067_201520670080_4.jpg
DownloadPhotoFromUrl(src, sPath);
Response.Write("<img src='" + src + "' referrerPolicy='no-referrer' >");
Response.Write("<p>" + src + "</p>");
}
}
}
public static void DownloadPhotoFromUrl(string Url, string sPath)
{
HttpWebRequest webrequest = (HttpWebRequest)WebRequest.Create(Url);
HttpWebResponse webresponse = (HttpWebResponse)webrequest.GetResponse();
if (webresponse.StatusCode == HttpStatusCode.OK)
{
System.Drawing.Image image = System.Drawing.Image.FromStream(webresponse.GetResponseStream());
//保存在本地文件夹
image.Save(sPath + DateTime.Now.ToString("yyyyMMddHHmmssff") + ".jpg");
//释放资源
image.Dispose();
}
}
}