using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.SqlClient;
using System.Collections;
using System.Text.RegularExpressions;
using System.IO;
using System.Net;
namespace CatchWeb
{
public class CreateSql
{
public CreateSql()
{
}
/// <summary>
/// 获取时间id
/// </summary>
public void CatchOtherID()
{
Log objlog = new Log();
try
{
string PageText = "";
string uri = "http://www.cnemc.cn/YeAll.aspx?id=2";//抓取时间id的网址
TheCatchMethod tmd = new TheCatchMethod(uri);
WebClient objClient = new WebClient();
objClient.Credentials = CredentialCache.DefaultCredentials;
Byte[] pageData = objClient.DownloadData(uri);
PageText = Encoding.UTF8.GetString(pageData);
string param = tmd.GetDateID(PageText);
CatchOther(param);
}
catch(Exception ex)
{
objlog.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString + ex.Message);
}
}
/// <summary>
/// 抓取网页数据信息 type = 1
/// </summary>
public void CatchOther(string param)
{
Log objlog = new Log();
try
{
string PageText = "";
string uri = "http://www.cnemc.cn/ReportCont.aspx?" + param; //+ System.DateTime.Now.AddDays(-1).ToShortDateString();
TheCatchMethod tmd = new TheCatchMethod(uri);
WebClient objClient = new WebClient();
objClient.Credentials = CredentialCache.DefaultCredentials;
Byte[] pageData = objClient.DownloadData(uri);
PageText = Encoding.Default.GetString(pageData);
//此处要更改
bool bl = GetSql(tmd.GetDate(PageText), System.DateTime.Now.AddDays(-1).ToShortDateString(), "1");
if (bl == true)
{
objlog.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString() + "数据抓取成功!");
}
}
catch (Exception ex)
{
objlog.WriteLog(System.DateTime.Now.AddDays(-1) + ex.Message);
}
}
/// <summary>
/// 抓取网页数据信息 type = 0
/// </summary>
public void CatchBJ()
{
Log objlog = new Log();
try
{
string PageText = "";
string uri = "http://bjepb.gov.cn/air2008/Air.aspx?time=" + System.DateTime.Now.AddDays(-1).ToShortDateString();
TheCatchMethod tmd = new TheCatchMethod(uri);
WebClient objClient = new WebClient();
objClient.Credentials = CredentialCache.DefaultCredentials;
Byte[] pageData = objClient.DownloadData(uri);
PageText = Encoding.Unicode.GetString(pageData);
bool bl = GetSql(tmd.WebGet(PageText), System.DateTime.Now.AddDays(-1).ToShortDateString(),"0");
if (bl == true)
{
objlog.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString() + "数据抓取成功!");
}
}
catch (Exception ex)
{
objlog.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString() + ex.Message);
}
}
/// <summary>
/// 构建sql
/// </summary>
/// <param name="arrs"></param>
/// <returns></returns>
public bool GetSql(ArrayList arrs,string date,string type)
{
ArrayList arrsqls = new ArrayList();
foreach (ArrayList arr in arrs)
{
string strval = "";
for (int i = 0; i < arr.Count; i++)
{
if (arr[i].ToString() == "")
{
if (i == 1)
{
strval += "-1,";
}
else
{
strval += "null,";
}
}
else
{
if (i == 1)
{
strval += arr[i].ToString() + ",";
}
else
{
strval += "'" + arr[i].ToString() + "',";
}
}
}
strval += "'" + type + "'";
string delsql = "delete 空气质量日报 where 日期='" + date + "' and 子站='" + arr[0].ToString() + "';";
string insertsql="insert into 空气质量日报 values ('" + date + "'," + strval + ")";
arrsqls.Insert(0, delsql);
arrsqls.Add(insertsql);
}
return AlterSql(arrsqls);
}
/// <summary>
/// 判读是否可以导入 true 可以 false 不可以
/// </summary>
/// <param name="place"></param>
/// <returns></returns>
public bool IfCanImport(string place)
{
bool bl = false;
string sql = "select * from 空气质量日报 where 类别='1' and 子站='" + place + "'";
AppData.DataAccess da = new AppData.DataAccess();
DataTable dt = da.GetDataTable(sql);
if (dt.Rows.Count > 0)
{
bl = true;
}
return bl;
}
/// <summary>
/// 执行sql
/// </summary>
/// <param name="arrsqls"></param>
/// <returns></returns>
public bool AlterSql(ArrayList arrsqls)
{
AppData.DataAccess da = new AppData.DataAccess();
return da.ExecuteCommand(arrsqls);
}
}
}
//通过正则取数据的小小框架,别人要用的时候可能需要改哦
using System;
using System.Collections.Generic;
using System.Text;
using System.Net;
using System.IO;
using System.Text.RegularExpressions;
using System.Data;
using System.Data.SqlClient;
using System.Collections;
namespace CatchWeb
{
public class TheCatchMethod
{
private string uri;
public TheCatchMethod(string uri)
{
this.uri = uri;
}
/// <summary>
/// 网页抓取Bj
/// </summary>
public ArrayList WebGet(string text)
{
Log lg = new Log();
//要返回的DataTable
ArrayList arrs = new ArrayList();
text = text.Replace("/r/n", "");
MatchCollection mathCltion = Regex.Matches(text, @"<table[^>]*>(?:/s|/S)*?</table>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
try
{
if (mathCltion.Count > 1)
{
string tabname = mathCltion[3].Groups[0].Value;
MatchCollection TrMatchs = Regex.Matches(tabname, @"<tr[^>]*>((/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*)</tr>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (TrMatchs.Count > 1)
{
for (int i = 2; i < TrMatchs.Count; i++)
{
string trText = TrMatchs[i].Groups[0].Value;
MatchCollection TdMaths = Regex.Matches(trText, @"<td[^>]*>((/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*)</td>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (TdMaths.Count >= 4)
{
ArrayList arr = new ArrayList();
for (int j = 0; j < TdMaths.Count; j++)
{
arr.Add(TdMaths[j].Groups[0].Value.Replace("<td>", "").Replace("</td>", "").Replace("<font color=/"Black/">", "").Replace("</font>", ""));
}
arrs.Add(arr);
}
}
}
}
}
catch (Exception ex)
{
lg.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString() + ex.Message);
}
return arrs;
}
/// <summary>
/// 网页抓取Other
/// </summary>
public ArrayList GetDate(string text)
{
Log lg = new Log();
//要返回的DataTable
ArrayList arrs = new ArrayList();
text = text.Replace("/r/n", "");
MatchCollection mathCltion = Regex.Matches(text, @"<table[^>]*>(?:/s|/S)*?</table>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
try
{
if (mathCltion.Count > 4)
{
string tabtext = mathCltion[4].Groups[0].Value;
MatchCollection TrMatchs = Regex.Matches(tabtext, @"<tr[^>]*>((/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*)</tr>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (TrMatchs.Count > 1)
{
for (int i = 2; i < TrMatchs.Count; i++)
{
string trText = TrMatchs[i].Groups[0].Value;
MatchCollection TdMaths = Regex.Matches(trText, @"<td[^>]*>((/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*)</td>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (TdMaths.Count >= 4)
{
ArrayList arr = new ArrayList();
for (int j = 0; j < TdMaths.Count; j++)
{
//<td nowrap="nowrap" align="Left"><font face="宋体">鞍山</font></td>
arr.Add(TdMaths[j].Groups[0].Value.Replace("<td nowrap=/"nowrap/" align=/"Left/"><font face=/"宋体/">", "").Replace("</font></td>", ""));
}
CreateSql cql = new CreateSql();
if (cql.IfCanImport(arr[0].ToString()))
{
arrs.Add(arr);
}
else
{
continue;
}
}
}
}
}
}
catch(Exception ex)
{
lg.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString() + ex.Message);
}
return arrs;
}
/// <summary>
/// 网页抓取Other时间id
/// </summary>
public string GetDateID(string text)
{
Log lg = new Log();
string id = "";
//要返回的DataTable
ArrayList arrs = new ArrayList();
text = text.Replace("/r/n", "");
//href//s*=//s*(?:/"(?<1>[^/"]*)/"|(?<1>//S+)) 连接正则
try
{
MatchCollection mathCltion = Regex.Matches(text, @"<table[^>]*>(?:/s|/S)*?</table>", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (mathCltion.Count > 0)
{
text = mathCltion[3].Groups[0].Value;
mathCltion = Regex.Matches(text, "href//s*=//s*(?:/"(?<1>[^/"]*)/"|(?<1>//S+))", RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (mathCltion.Count > 0)
{
for (int i = 0; i < mathCltion.Count; i++)
{
//mathCltion[i].Groups[0].Value href='ReportCont.aspx?type=2&id=311'>20080616
string temp = mathCltion[i].Groups[0].Value.Replace("</a></td><td><a", "");
string textYear = temp.Substring(temp.Length - 8, 4) + "-" + temp.Substring(temp.Length - 4, 2) + "-" + temp.Substring(temp.Length - 2, 2);
if (Convert.ToDateTime(textYear).ToShortDateString() == System.DateTime.Now.AddDays(-1).ToShortDateString())
{
id = temp.Replace("href='ReportCont.aspx?", "");//type=2&id=311'>20080616
id = id.Substring(0, id.Length - 10);
break;
}
}
}
else
{
}
}
}
catch (Exception ex)
{
lg.WriteLog(System.DateTime.Now.AddDays(-1).ToShortDateString() + ex.Message);
}
return id;
}
}
}