asp.net新闻采集(wyz)

     /// <summary>

        /// 生成新闻

        /// </summary>

        /// <param name="sender"></param>

        /// <param name="e"></param>

        protected void btnAdd_Click(object sender, EventArgs e)

        {

            int incount1 = 0;//成功插入的条数

            int incount2 = 0;//

            int incount3 = 0;//

            int incount4 = 0;//

            int incount5 = 0;//

            var str = getHTTPPage("http://news.chinawutong.com/xwkx/lydt/", "lanmuname", "pagelist", 46, 86); //陆运

            incount1 = matchA(str, "http://news.chinawutong.com/", 4);//4表示公路运输咨询

            var strzx = getHTTPPage("http://news.chinawutong.com/xwkx/zxzx/", "lanmuname", "pagelist", 46, 86);//最新

            incount2 = matchA(strzx, "http://news.chinawutong.com/", 2);//

            var strhot = getHTTPPage("http://news.chinawutong.com/ztrw/zdsj/", "lanmuname", "pagelist", 46, 86);//热点

            incount3 = matchA(strhot, "http://news.chinawutong.com/", 3);//

            var strxxh = getHTTPPage("http://news.chinawutong.com/xwkx/xxhzx/", "lanmuname", "pagelist", 46, 86);//信息化

            incount4 = matchA(strxxh, "http://news.chinawutong.com/", 5);//

            var strss = getHTTPPage("http://news.chinawutong.com/wtzl/ssrd/", "lanmuname", "pagelist", 46, 86);//时事

            incount5 = matchA(strss, "http://news.chinawutong.com/", 6);//

            int incount = incount1 + incount2 + incount3 + incount4 + incount5; //生成总条数

            ShowInfo("本次成功生成共 " + incount + " 条.公路运输 " + incount1 + " 条.最新资讯 " + incount2 + " 条.物流热点 " + incount3 + " 条.物流信息化 " + incount4 + " 条.时事热点 " + incount5 + " 条.");

        }

 

        /// <summary>

        /// 匹配A标签

        /// </summary>

        /// <param name="teststr"></param>

        /// <returns></returns>

        public int matchA(string teststr, string preurl, int categoryID)

        {

            MatchCollection mc = Regex.Matches(teststr, "<a.+?href=\"(.+?)\".*>(.+)</a>");            

            int incount = 0;//成功插入的条数

            //匹配组序号从0开始

            for (int i = 0; i < mc.Count; i++)

            {

                //先MatchCollection序列中取出match,然后进行输出

                Match match = mc[i];

                if (match != null)

                {

                    if (i < 20)

                    {

                        if (newsService.GetAll().Where(n => n.Title == match.Groups[2].Value).Count() < 1)

                        {

                            incount++;

                            var _news = new model.News();

                            //如果数据库里没有这个标题则插入

                            _news.Title = match.Groups[2].Value;

                            _news.NewsContent = preurl + match.Groups[1].Value;

                            _news.CreateDate = DateTime.Now;

                            _news.UpdateDate = DateTime.Now;

                            _news.IsLink = 1;

                            _news.CategoryID = categoryID;//

                            _news.OriginateID = 2;//表示来源

                            newsService.Add(_news);                            

                        }

                    }

                }

            }

            newsService.Save(); //最后保存

            return incount; //返回条数

        }

 

        /// <summary>

        /// 获取新闻html

        /// </summary>

        /// <param name="url"></param>

        /// <param name="starts"></param>

        /// <param name="ends"></param>

        /// <param name="startnum"></param>

        /// <param name="endnum"></param>

        /// <returns></returns>

        public string getHTTPPage(string url, string starts, string ends, int startnum, int endnum)

        {

            //string url = "http://news.chinawutong.com/xwkx/lydt/";     //想要抓取的页面的地址

            HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(url);

            HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();

            //webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24 ";

            Stream stream = webResponse.GetResponseStream();

            System.IO.StreamReader streamReader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));

            string content = streamReader.ReadToEnd();

            streamReader.Close();

            webResponse.Close();

            int start = content.IndexOf(starts) + startnum;

            int end = content.IndexOf(ends) - endnum;

            content = content.Substring(start, (end - start));

            return content;

        }

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值