- private void button3_Click(object sender, EventArgs e)
- {
- Thread firstTread = new Thread(new ParameterizedThreadStart(GatherProduct));
- Thread secondTread = new Thread(new ParameterizedThreadStart(GatherProduct));
- ArrayList arr1 = new ArrayList();
- arr1.Add(200);
- arr1.Add(300);
- ArrayList arr2 = new ArrayList();
- arr1.Add(300);
- arr1.Add(400);
- firstTread.Start(arr1);
- secondTread.Start(arr1);
- }
- public void GatherProduct(object obj)
- {
- ArrayList arr = (ArrayList)obj;
- for (int i = Convert.ToInt32( arr[0].ToString()); i <Convert.ToInt32( arr[1].ToString()); i++)
- {
- string sHtml = utility.GetPageHTML("url" + i + ".html");
- Thread.Sleep(10);
- string sPattern = "href=/"(?<ProductURL>[^>]*)/"//sclass=black2//starget=_blank><strong>(?<ProductName>[^<]*)</strong>";
- MatchCollection matchs = Regex.Matches(sHtml, sPattern, RegexOptions.IgnoreCase | RegexOptions.Singleline);
- string sPattern1 = "<TD//sheight=/"20/"//sbgcolor=/"f8f8f8/"//sclass=/"black12/"><a//shref=/"(?<CompanyURL>[^>]*)/"//sclass=orange>(?<CompanyName>[^<]*)</a></TD>";
- MatchCollection matchs1 = Regex.Matches(sHtml, sPattern1, RegexOptions.IgnoreCase | RegexOptions.Singleline);
- for (int j = 0; j < (matchs.Count > matchs1.Count ? matchs1.Count : matchs.Count); j++)
- {
- Model.pharmnetProduct productModel = new Model.pharmnetProduct();
- productModel.ProductName = matchs[j].Groups["ProductName"].Value;
- productModel.ProductURL = matchs[j].Groups["ProductURL"].Value;
- productModel.CompanyName = matchs1[j].Groups["CompanyName"].Value;
- productModel.CompanyURL = matchs1[j].Groups["CompanyURL"].Value;
- new BLL.pharmnetProduct().Add(productModel);
- }
- }
- }
- public static string GetPageHTML(string url)
- {
- HttpWebRequest wr = WebRequest.Create(url) as HttpWebRequest;
- try
- {
- wr.Method = "get";
- wr.Accept = "*/*";
- wr.Headers.Add("Accept-Language: zh-cn");
- wr.Headers.Add("UA-CPU: x86");
- wr.Headers.Add("Accept-Encoding: gzip, deflate");
- wr.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Embedded Web Browser from: http://bsalsa.com/; InfoPath.2; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
- wr.KeepAlive = true;
- wr.ServicePoint.Expect100Continue = false;
- wr.AllowAutoRedirect = false;
- HttpWebResponse wre = wr.GetResponse() as HttpWebResponse;
- StreamReader sreader = new StreamReader(wre.GetResponseStream(), Encoding.Default);
- string sHtml = sreader.ReadToEnd();
- wre.Close();
- return sHtml;
- }
- catch (Exception ex)
- {
- throw ex;
- }
- }
多线程数据采集
最新推荐文章于 2023-01-13 08:45:00 发布