/// <summary>
/// 生成新闻
/// </summary>
/// <param name="sender"></param>
/// <param name="e"></param>
protected void btnAdd_Click(object sender, EventArgs e)
{
int incount1 = 0;//成功插入的条数
int incount2 = 0;//
int incount3 = 0;//
int incount4 = 0;//
int incount5 = 0;//
var str = getHTTPPage("http://news.chinawutong.com/xwkx/lydt/", "lanmuname", "pagelist", 46, 86); //陆运
incount1 = matchA(str, "http://news.chinawutong.com/", 4);//4表示公路运输咨询
var strzx = getHTTPPage("http://news.chinawutong.com/xwkx/zxzx/", "lanmuname", "pagelist", 46, 86);//最新
incount2 = matchA(strzx, "http://news.chinawutong.com/", 2);//
var strhot = getHTTPPage("http://news.chinawutong.com/ztrw/zdsj/", "lanmuname", "pagelist", 46, 86);//热点
incount3 = matchA(strhot, "http://news.chinawutong.com/", 3);//
var strxxh = getHTTPPage("http://news.chinawutong.com/xwkx/xxhzx/", "lanmuname", "pagelist", 46, 86);//信息化
incount4 = matchA(strxxh, "http://news.chinawutong.com/", 5);//
var strss = getHTTPPage("http://news.chinawutong.com/wtzl/ssrd/", "lanmuname", "pagelist", 46, 86);//时事
incount5 = matchA(strss, "http://news.chinawutong.com/", 6);//
int incount = incount1 + incount2 + incount3 + incount4 + incount5; //生成总条数
ShowInfo("本次成功生成共 " + incount + " 条.公路运输 " + incount1 + " 条.最新资讯 " + incount2 + " 条.物流热点 " + incount3 + " 条.物流信息化 " + incount4 + " 条.时事热点 " + incount5 + " 条.");
}
/// <summary>
/// 匹配A标签
/// </summary>
/// <param name="teststr"></param>
/// <returns></returns>
public int matchA(string teststr, string preurl, int categoryID)
{
MatchCollection mc = Regex.Matches(teststr, "<a.+?href=\"(.+?)\".*>(.+)</a>");
int incount = 0;//成功插入的条数
//匹配组序号从0开始
for (int i = 0; i < mc.Count; i++)
{
//先MatchCollection序列中取出match,然后进行输出
Match match = mc[i];
if (match != null)
{
if (i < 20)
{
if (newsService.GetAll().Where(n => n.Title == match.Groups[2].Value).Count() < 1)
{
incount++;
var _news = new model.News();
//如果数据库里没有这个标题则插入
_news.Title = match.Groups[2].Value;
_news.NewsContent = preurl + match.Groups[1].Value;
_news.CreateDate = DateTime.Now;
_news.UpdateDate = DateTime.Now;
_news.IsLink = 1;
_news.CategoryID = categoryID;//
_news.OriginateID = 2;//表示来源
newsService.Add(_news);
}
}
}
}
newsService.Save(); //最后保存
return incount; //返回条数
}
/// <summary>
/// 获取新闻html
/// </summary>
/// <param name="url"></param>
/// <param name="starts"></param>
/// <param name="ends"></param>
/// <param name="startnum"></param>
/// <param name="endnum"></param>
/// <returns></returns>
public string getHTTPPage(string url, string starts, string ends, int startnum, int endnum)
{
//string url = "http://news.chinawutong.com/xwkx/lydt/"; //想要抓取的页面的地址
HttpWebRequest webRequest = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse webResponse = (HttpWebResponse)webRequest.GetResponse();
//webRequest.UserAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24 ";
Stream stream = webResponse.GetResponseStream();
System.IO.StreamReader streamReader = new StreamReader(stream, System.Text.Encoding.GetEncoding("gb2312"));
string content = streamReader.ReadToEnd();
streamReader.Close();
webResponse.Close();
int start = content.IndexOf(starts) + startnum;
int end = content.IndexOf(ends) - endnum;
content = content.Substring(start, (end - start));
return content;
}