C# 网页图片采集

最新推荐文章于 2022-05-16 14:14:19 发布

weixin_30596165

最新推荐文章于 2022-05-16 14:14:19 发布

阅读量134

点赞数

文章标签： c#

原文链接：http://www.cnblogs.com/qq260250932/p/5361043.html

版权

http://blog.csdn.net/a237428367/article/details/5987832

using System;

using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.Windows.Forms;
namespace ImageCollect
{
public class GatherPic
{
private string savePath;
private string getUrl;
private WebBrowser wb;
private int iImgCount;
//初始化参数
public GatherPic(string sWebUrl, string sSavePath)
{
this.getUrl = sWebUrl;
this.savePath = sSavePath;
}
//开始采集
public bool start()
{
if (getUrl.Trim().Equals(""))
{
MessageBox.Show("哪来的虾米连网址都没输！");
return false;
}
this.wb = new WebBrowser();
this.wb.Navigate(getUrl);
//委托事件
this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted);
return true;
}
//WebBrowser.DocumentCompleted委托事件
private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
//页面里框架iframe加载完成不掉用SearchImgList()
if (e.Url != wb.Document.Url) return;
SearchImgList();
}
//检查出所有图片并采集到本地
public void SearchImgList()
{
string sImgUrl;
//取得所有图片地址
HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img");
this.iImgCount = elemColl.Count;
foreach (HtmlElement elem in elemColl)
{
sImgUrl = elem.GetAttribute("src");
//调用保存远程图片函数
SaveImageFromWeb(sImgUrl, this.savePath);
}
}
//保存远程图片函数
public int SaveImageFromWeb(string imgUrl, string path)
{
string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1);
path = path + "//" + imgName;
string defaultType = ".jpg";
string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" };
string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("."));
foreach (string it in imgTypes)
{
if (imgType.ToLower().Equals(it))
break;
if (it.Equals(".bmp"))
imgType = defaultType;
}
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl);
request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)";
request.Timeout = 10000;
WebResponse response = request.GetResponse();
Stream stream = response.GetResponseStream();
if (response.ContentType.ToLower().StartsWith("image/"))
{
byte[] arrayByte = new byte[1024];
int imgLong = (int)response.ContentLength;
int l = 0;
// CreateDirectory(path);
FileStream fso = new FileStream(path, FileMode.Create);
while (l < imgLong)
{
int i = stream.Read(arrayByte, 0, 1024);
fso.Write(arrayByte, 0, i);
l += i;
}
fso.Close();
stream.Close();
response.Close();
return 1;
}
else
{
return 0;
}
}
catch (WebException)
{
return 0;
}
catch (UriFormatException)
{
return 0;
}
}
}
}

using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Text.RegularExpressions; using System.Net; using System.IO; using System.Windows.Forms; namespace ImageCollect { public class GatherPic { private string savePath; private string getUrl; private WebBrowser wb; private int iImgCount; //初始化参数 public GatherPic(string sWebUrl, string sSavePath) { this.getUrl = sWebUrl; this.savePath = sSavePath; } //开始采集 public bool start() { if (getUrl.Trim().Equals("")) { MessageBox.Show("哪来的虾米连网址都没输！"); return false; } this.wb = new WebBrowser(); this.wb.Navigate(getUrl); //委托事件 this.wb.DocumentCompleted += new System.Windows.Forms.WebBrowserDocumentCompletedEventHandler(DocumentCompleted); return true; } //WebBrowser.DocumentCompleted委托事件 private void DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { //页面里框架iframe加载完成不掉用SearchImgList() if (e.Url != wb.Document.Url) return; SearchImgList(); } //检查出所有图片并采集到本地 public void SearchImgList() { string sImgUrl; //取得所有图片地址 HtmlElementCollection elemColl = this.wb.Document.GetElementsByTagName("img"); this.iImgCount = elemColl.Count; foreach (HtmlElement elem in elemColl) { sImgUrl = elem.GetAttribute("src"); //调用保存远程图片函数 SaveImageFromWeb(sImgUrl, this.savePath); } } //保存远程图片函数 public int SaveImageFromWeb(string imgUrl, string path) { string imgName = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf("/") + 1); path = path + "//" + imgName; string defaultType = ".jpg"; string[] imgTypes = new string[] { ".jpg", ".jpeg", ".png", ".gif", ".bmp" }; string imgType = imgUrl.ToString().Substring(imgUrl.ToString().LastIndexOf(".")); foreach (string it in imgTypes) { if (imgType.ToLower().Equals(it)) break; if (it.Equals(".bmp")) imgType = defaultType; } try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(imgUrl); request.UserAgent = "Mozilla/6.0 (MSIE 6.0; Windows NT 5.1; Natas.Robot)"; request.Timeout = 10000; WebResponse response = request.GetResponse(); Stream stream = response.GetResponseStream(); if (response.ContentType.ToLower().StartsWith("image/")) { byte[] arrayByte = new byte[1024]; int imgLong = (int)response.ContentLength; int l = 0; // CreateDirectory(path); FileStream fso = new FileStream(path, FileMode.Create); while (l < imgLong) { int i = stream.Read(arrayByte, 0, 1024); fso.Write(arrayByte, 0, i); l += i; } fso.Close(); stream.Close(); response.Close(); return 1; } else { return 0; } } catch (WebException) { return 0; } catch (UriFormatException) { return 0; } } } }

调用方法

[c-sharp] view plain copy print ?

GatherPic g = new GatherPic(“http://www.baidu.com”,"E:/XXX");
g.start();

=====================================================

在web项目中使用WebBrowser类-----给网站抓图

最近做一个WEB项目，其中要求有个功能就是程序能网页抓图，举个例子：在test.aspx页面上放一个TextBox和一个Button，TextBox用来输入要抓取的网页地址，然后按了Button之后，服务器要对前面输入的网址进行抓图，然后显示出来。我把抓图的业务逻辑做成一个类：

using System;
using System.Data;
using System.Windows.Forms;
using System.Drawing;

/// <summary>
/// WebSnap ：网页抓图对象
/// </summary>
public class WebSnap2
{

    public WebSnap2()
    {
        //
        // TODO: 在此处添加构造函数逻辑
        //
    }

    /// <summary>
    /// 开始一个抓图并返回图象
    /// </summary>
    /// <param name="Url">要抓取的网页地址</param>
    /// <returns></returns>
    public Bitmap StartSnap(string Url)
    {
        WebBrowser myWB = this.GetPage(Url);
        Bitmap returnValue = this.SnapWeb(myWB);
        myWB.Dispose();
        return returnValue;
    }

    private WebBrowser GetPage(string Url)
    {
        WebBrowser myWB = new WebBrowser();
        myWB.ScrollBarsEnabled = false;
        myWB.Navigate(Url);
        while (myWB.ReadyState != WebBrowserReadyState.Complete)
        {
            System.Windows.Forms.Application.DoEvents();
        }
        return myWB;
    }

    private Bitmap SnapWeb(WebBrowser wb)
    {
        HtmlDocument hd = wb.Document;
        int height = Convert.ToInt32(hd.Body.GetAttribute("scrollHeight")) + 10;
        int width = Convert.ToInt32(hd.Body.GetAttribute("scrollWidth")) + 10;
        wb.Height = height;
        wb.Width = width;
        Bitmap bmp = new Bitmap(width, height);
        Rectangle rec = new Rectangle();
        rec.Width = width;
        rec.Height = height;
        wb.DrawToBitmap(bmp, rec);
        return bmp;
    }

}

然后在test.asp的button_click事件里面调用：

        WebSnap ws = new WebSnap();
        Bitmap bmp= ws.StartSnap(TextBox1.Text);
        System.IO.MemoryStream ms = new System.IO.MemoryStream();
        bmp.Save(ms, System.Drawing.Imaging.ImageFormat.Jpeg);
        Response.BinaryWrite(ms.GetBuffer());

转载于:https://www.cnblogs.com/qq260250932/p/5361043.html

weixin_30596165

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
C# 网页图片采集

http://blog.csdn.net/a237428367/article/details/5987832usingSystem;usingSystem.Collections.Generic;usingSystem.Linq;usingSystem.Text;usingSystem.Text.RegularExpressions;...
复制链接

扫一扫