抓取图片及时保存

抓取的正则

  • 匹配的正则表达式
//背景图 @"url\s*\(([^\)]+)\)"
//Html标签图 @"<img.*?src=""([^""]*)"".*?>"
  • 图片抓取
        /// <summary>
        /// 网络图片文件上传到服务器上
        /// </summary>
        /// <param name="networkFileUrl">网络图片的Url地址</param>
        /// <param name="serverUri">图片服务器地址</param>
        /// <param name="querystring">请求参数</param>
        /// <param name="cookies">连带的cookies</param>
        /// <returns></returns>
        public  string SendNetworkFileToServer(string networkFileUrl, string serverUri, NameValueCollection querystring = null, CookieContainer cookies = null)
        {
            var request = WebRequest.Create(new Uri(networkFileUrl));
            byte[] boundaryBytes = null;
            Stream requestStream = null;
            HttpWebRequest webrequest = null;
            HttpWebResponse webResponse=(HttpWebResponse)request.GetResponse();
            byte[] imagesBytes = new byte[webResponse.ContentLength];
            using (var responseStream = webResponse.GetResponseStream())
            {    
                int readCount = 0;
                while (readCount < (int)webResponse.ContentLength)
                {
                    readCount += responseStream.Read(imagesBytes, readCount, (int)webResponse.ContentLength - readCount);
                }
            }            

            #region  头部处理
            string fileFormName = "file";
            string contentType = "image/jpeg";
            string fileExtentionStr = ".jpg";
            if (networkFileUrl.IndexOf(".jpg") > 0 || networkFileUrl.IndexOf(".jpeg") > 0)
            {
                contentType = "image/jpeg";
                fileExtentionStr = ".jpg";
            }
            if (networkFileUrl.IndexOf(".png") > 0)
            {
                contentType = "image/x-png";
                fileExtentionStr = ".png";
            }
            if (networkFileUrl.IndexOf(".bmp") > 0)
            {
                contentType = "image/bmp";
                fileExtentionStr = ".bmp";
            }
            if (networkFileUrl.IndexOf(".gif") > 0)
            {
                contentType = "image/gif";
                fileExtentionStr = ".gif";
            }
            if ((fileFormName == null) || (fileFormName.Length == 0))
            {
                fileFormName = "file";
            }
            if ((contentType == null) || (contentType.Length == 0))
            {
                contentType = "application/octet-stream";
            }
            Uri uri;
            if (querystring != null)
            {
                string postdata;
                postdata = "?";
                foreach (string key in querystring.Keys)
                {
                    postdata += key + "=" + querystring.Get(key) + "&";
                }
                postdata = postdata.Trim('&');
                uri = new Uri(serverUri + postdata);
            }
            else
            {
                uri = new Uri(serverUri);
            }
            string boundary = "----------" + DateTime.Now.Ticks.ToString("x");
            webrequest = (HttpWebRequest)WebRequest.Create(uri);
            webrequest.CookieContainer = cookies;
            webrequest.ContentType = "multipart/form-data; boundary=" + boundary;
            webrequest.Method = "POST";
            webrequest.Accept = "*/*";
            webrequest.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36";
            // Build up the post message header
            StringBuilder sb = new StringBuilder();
            sb.Append("--");
            sb.Append(boundary);
            sb.Append("\r\n");
            sb.Append("Content-Disposition: form-data; name=\"");
            sb.Append(fileFormName);
            sb.Append("\"; filename=\"");
            int startCharIndex = networkFileUrl.LastIndexOf("/") + 1;
            int endCharIndex = networkFileUrl.IndexOf(fileExtentionStr);
            string fileNameAndExtension = networkFileUrl.Substring(startCharIndex, endCharIndex - startCharIndex) + fileExtentionStr;
            sb.Append(fileNameAndExtension);
            sb.Append("\"");
            sb.Append("\r\n");
            sb.Append("Content-Type: ");
            sb.Append(contentType);
            sb.Append("\r\n");
            sb.Append("\r\n");
            string postHeader = sb.ToString();
            byte[] postHeaderBytes = Encoding.ASCII.GetBytes(postHeader);
            // Build the trailing boundary string as a byte array
            // ensuring the boundary appears on a line by itself
            boundaryBytes = Encoding.ASCII.GetBytes("\r\n--" + boundary + "\r\n");

            long length = postHeaderBytes.Length + imagesBytes.Length + boundaryBytes.Length;
            webrequest.ContentLength = length;
            requestStream = webrequest.GetRequestStream();
            // Write out our post header
            requestStream.Write(postHeaderBytes, 0, postHeaderBytes.Length);
            #endregion
            requestStream.Write(imagesBytes, 0, imagesBytes.Length);
            // Write out the trailing boundary
            requestStream.Write(boundaryBytes, 0, boundaryBytes.Length);
            WebResponse responce = webrequest.GetResponse();
            Stream s = responce.GetResponseStream();
            StreamReader sr = new StreamReader(s);
            string result = sr.ReadToEnd();
            s.Close();
            sr.Close();
            requestStream.Close();
            var objResult = Newtonsoft.Json.JsonConvert.DeserializeObject<UploadResult>(result);
            return objResult.url;
        }
  • 并发处理
//_filterImageSrc.ImagesSrcPair 为  ConcurrentDictionary<string, string> 类型
//其中Key 为替换前的match-value,value 为处理后的image-url
        public void ProcessTranslateImageSrc()
        {
            var imageUrls = this._filterImageSrc.ImagesSrcPair.Keys;
            if (imageUrls.Count > 0)
            {
                var excuteList = imageUrls.AsParallel().WithDegreeOfParallelism(imageUrls.Count > 32 ? 32 : imageUrls.Count)
                    .Select(imageUrl => ProcessTranslateItem(imageUrl, this._defaultServerUri, this._defaultNameValueCollection, null)).ToList();
                foreach (var urlItem in this._filterImageSrc.ImagesSrcPair)
                {
                    this._filterImageSrc.ImagesSrcPair[urlItem.Key] = excuteList.Where(entity => entity.SourceUrl == urlItem.Key).FirstOrDefault().DestUrl;
                }
            }
        }
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值