目录
首先大家在工作以后都会总结一下自己常用的工具类。
这里分享下我的工具类:
下面开始挨个介绍:
1.DataTable帮助类
using System;
using System.Collections.Generic;
using System.Text;
using System.Reflection;
using System.Data;
using System.Collections;
namespace Core.DBUtility
{
/// <summary>
/// DataTable帮助类
/// </summary>
public class DataTableHelper
{
/// <summary>
/// 给DataTable增加一个自增列
/// 如果DataTable 存在 identityid 字段 则 直接返回DataTable 不做任何处理
/// </summary>
/// <param name="dt">DataTable</param>
/// <returns>返回Datatable 增加字段 identityid </returns>
public static DataTable AddIdentityColumn(DataTable dt)
{
if (!dt.Columns.Contains("identityid"))
{
dt.Columns.Add("identityid");
for (int i = 0; i < dt.Rows.Count; i++)
{
dt.Rows[i]["identityid"] = (i + 1).ToString();
}
}
return dt;
}
/// <summary>
/// 检查DataTable 是否有数据行
/// </summary>
/// <param name="dt">DataTable</param>
/// <returns></returns>
public static bool IsHaveRows(DataTable dt)
{
if (dt != null && dt.Rows.Count > 0)
return true;
return false;
}
/// <summary>
/// DataTable转换成实体列表
/// </summary>
/// <typeparam name="T">实体 T </typeparam>
/// <param name="table">datatable</param>
/// <returns></returns>
public static IList<T> DataTableToList<T>(DataTable table)
where T : class
{
if (!IsHaveRows(table))
return new List<T>();
IList<T> list = new List<T>();
T model = default(T);
foreach (DataRow dr in table.Rows)
{
model = Activator.CreateInstance<T>();
foreach (DataColumn dc in dr.Table.Columns)
{
object drValue = dr[dc.ColumnName];
PropertyInfo pi = model.GetType().GetProperty(dc.ColumnName);
if (pi != null && pi.CanWrite && (drValue != null && !Convert.IsDBNull(drValue)))
{
pi.SetValue(model, drValue, null);
}
}
list.Add(model);
}
return list;
}
/// <summary>
/// 实体列表转换成DataTable
/// </summary>
/// <typeparam name="T">实体</typeparam>
/// <param name="list"> 实体列表</param>
/// <returns></returns>
public static DataTable ListToDataTable<T>(IList<T> list)
where T : class
{
if (list == null || list.Count <= 0)
{
return null;
}
DataTable dt = new DataTable(typeof(T).Name);
DataColumn column;
DataRow row;
PropertyInfo[] myPropertyInfo = typeof(T).GetProperties(BindingFlags.Public | BindingFlags.Instance);
int length = myPropertyInfo.Length;
bool createColumn = true;
foreach (T t in list)
{
if (t == null)
{
continue;
}
row = dt.NewRow();
for (int i = 0; i < length; i++)
{
PropertyInfo pi = myPropertyInfo[i];
string name = pi.Name;
if (createColumn)
{
column = new DataColumn(name, pi.PropertyType);
dt.Columns.Add(column);
}
row[name] = pi.GetValue(t, null);
}
if (createColumn)
{
createColumn = false;
}
dt.Rows.Add(row);
}
return dt;
}
/// <summary>
/// 将泛型集合类转换成DataTable
/// </summary>
/// <typeparam name="T">集合项类型</typeparam>
/// <param name="list">集合</param>
/// <returns>数据集(表)</returns>
public static DataTable ToDataTable<T>(IList<T> list)
{
return ToDataTable<T>(list, null);
}
/// <summary>
/// 将泛型集合类转换成DataTable
/// </summary>
/// <typeparam name="T">集合项类型</typeparam>
/// <param name="list">集合</param>
/// <param name="propertyName">需要返回的列的列名</param>
/// <returns>数据集(表)</returns>
public static DataTable ToDataTable<T>(IList<T> list, params string[] propertyName)
{
List<string> propertyNameList = new List<string>();
if (propertyName != null)
propertyNameList.AddRange(propertyName);
DataTable result = new DataTable();
if (list.Count > 0)
{
PropertyInfo[] propertys = list[0].GetType().GetProperties();
foreach (PropertyInfo pi in propertys)
{
if (propertyNameList.Count == 0)
{
result.Columns.Add(pi.Name, pi.PropertyType);
}
else
{
if (propertyNameList.Contains(pi.Name))
{
result.Columns.Add(pi.Name, pi.PropertyType);
}
}
}
for (int i = 0; i < list.Count; i++)
{
ArrayList tempList = new ArrayList();
foreach (PropertyInfo pi in propertys)
{
if (propertyNameList.Count == 0)
{
object obj = pi.GetValue(list[i], null);
tempList.Add(obj);
}
else
{
if (propertyNameList.Contains(pi.Name))
{
object obj = pi.GetValue(list[i], null);
tempList.Add(obj);
}
}
}
object[] array = tempList.ToArray();
result.LoadDataRow(array, true);
}
}
return result;
}
/// <summary>
/// 根据nameList里面的字段创建一个表格,返回该表格的DataTable
/// </summary>
/// <param name="nameList">包含字段信息的列表</param>
/// <returns>DataTable</returns>
public static DataTable CreateTable(List<string> nameList)
{
if (nameList.Count <= 0)
return null;
DataTable myDataTable = new DataTable();
foreach (string columnName in nameList)
{
myDataTable.Columns.Add(columnName, typeof(string));
}
return myDataTable;
}
/// <summary>
/// 通过字符列表创建表字段,字段格式可以是:
/// 1) a,b,c,d,e
/// 2) a|int,b|string,c|bool,d|decimal
/// </summary>
/// <param name="nameString"></param>
/// <returns></returns>
public static DataTable CreateTable(string nameString)
{
string[] nameArray = nameString.Split(new char[] { ',', ';' });
List<string> nameList = new List<string>();
DataTable dt = new DataTable();
foreach (string item in nameArray)
{
if (!string.IsNullOrEmpty(item))
{
string[] subItems = item.Split('|');
if (subItems.Length == 2)
{
dt.Columns.Add(subItems[0], ConvertType(subItems[1]));
}
else
{
dt.Columns.Add(subItems[0]);
}
}
}
return dt;
}
private static Type ConvertType(string typeName)
{
typeName = typeName.ToLower().Replace("system.", "");
Type newType = typeof(string);
switch (typeName)
{
case "boolean":
case "bool":
newType = typeof(bool);
break;
case "int16":
case "short":
newType = typeof(short);
break;
case "int32":
case "int":
newType = typeof(int);
break;
case "long":
case "int64":
newType = typeof(long);
break;
case "uint16":
case "ushort":
newType = typeof(ushort);
break;
case "uint32":
case "uint":
newType = typeof(uint);
break;
case "uint64":
case "ulong":
newType = typeof(ulong);
break;
case "single":
case "float":
newType = typeof(float);
break;
case "string":
newType = typeof(string);
break;
case "guid":
newType = typeof(Guid);
break;
case "decimal":
newType = typeof(decimal);
break;
case "double":
newType = typeof(double);
break;
case "datetime":
newType = typeof(DateTime);
break;
case "byte":
newType = typeof(byte);
break;
case "char":
newType = typeof(char);
break;
}
return newType;
}
/// <summary>
/// 获得从DataRowCollection转换成的DataRow数组
/// </summary>
/// <param name="drc">DataRowCollection</param>
/// <returns></returns>
public static DataRow[] GetDataRowArray(DataRowCollection drc)
{
int count = drc.Count;
DataRow[] drs = new DataRow[count];
for (int i = 0; i < count; i++)
{
drs[i] = drc[i];
}
return drs;
}
/// <summary>
/// 将DataRow数组转换成DataTable,注意行数组的每个元素须具有相同的数据结构,
/// 否则当有元素长度大于第一个元素时,抛出异常
/// </summary>
/// <param name="rows">行数组</param>
/// <returns></returns>
public static DataTable GetTableFromRows(DataRow[] rows)
{
if (rows.Length <= 0)
{
return new DataTable();
}
DataTable dt = rows[0].Table.Clone();
dt.DefaultView.Sort = rows[0].Table.DefaultView.Sort;
for (int i = 0; i < rows.Length; i++)
{
dt.LoadDataRow(rows[i].ItemArray, true);
}
return dt;
}
/// <summary>
/// 排序表的视图
/// </summary>
/// <param name="dt"></param>
/// <param name="sorts"></param>
/// <returns></returns>
public static DataTable SortedTable(DataTable dt, params string[] sorts)
{
if (dt.Rows.Count > 0)
{
string tmp = "";
for (int i = 0; i < sorts.Length; i++)
{
tmp += sorts[i] + ",";
}
dt.DefaultView.Sort = tmp.TrimEnd(',');
}
return dt;
}
/// <summary>
/// 根据条件过滤表的内容
/// </summary>
/// <param name="dt"></param>
/// <param name="condition"></param>
/// <returns></returns>
public static DataTable FilterDataTable(DataTable dt, string condition)
{
if (condition.Trim() == "")
{
return dt;
}
else
{
DataTable newdt = new DataTable();
newdt = dt.Clone();
DataRow[] dr = dt.Select(condition);
for (int i = 0; i < dr.Length; i++)
{
newdt.ImportRow((DataRow)dr[i]);
}
return newdt;
}
}
}
}
2.DbhelperOleDb类
using System;
using System.Collections;
using System.Data;
using System.Data.OleDb;
namespace Core.DBUtility
{
/// <summary>
/// Copyright (C) 2004-2008 LiTianPing
/// 数据访问基础类(基于OleDb)
/// 可以用户可以修改满足自己项目的需要。
/// </summary>
public abstract class DbHelperOleDb
{
//数据库连接字符串(web.config来配置),可以动态更改connectionString支持多数据库.
public static string connectionString = "";// PubConstant.ConnectionString;
public DbHelperOleDb()
{
}
#region 公用方法
public static int GetMaxID(string FieldName, string TableName)
{
string strsql = "select max(" + FieldName + ")+1 from " + TableName;
object obj = GetSingle(strsql);
if (obj == null)
{
return 1;
}
else
{
return int.Parse(obj.ToString());
}
}
public static bool Exists(string strSql)
{
object obj = GetSingle(strSql);
int cmdresult;
if ((Object.Equals(obj, null)) || (Object.Equals(obj, System.DBNull.Value)))
{
cmdresult = 0;
}
else
{
cmdresult = int.Parse(obj.ToString());
}
if (cmdresult == 0)
{
return false;
}
else
{
return true;
}
}
public static bool Exists(string strSql, params OleDbParameter[] cmdParms)
{
object obj = GetSingle(strSql, cmdParms);
int cmdresult;
if ((Object.Equals(obj, null)) || (Object.Equals(obj, System.DBNull.Value)))
{
cmdresult = 0;
}
else
{
cmdresult = int.Parse(obj.ToString());
}
if (cmdresult == 0)
{
return false;
}
else
{
return true;
}
}
#endregion
#region 执行简单SQL语句
/// <summary>
/// 执行SQL语句,返回影响的记录数
/// </summary>
/// <param name="SQLString">SQL语句</param>
/// <returns>影响的记录数</returns>
public static int ExecuteSql(string SQLString)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
using (OleDbCommand cmd = new OleDbCommand(SQLString, connection))
{
try
{
connection.Open();
int rows = cmd.ExecuteNonQuery();
return rows;
}
catch (System.Data.OleDb.OleDbException E)
{
connection.Close();
throw new Exception(E.Message);
}
}
}
}
/// <summary>
/// 执行多条SQL语句,实现数据库事务。
/// </summary>
/// <param name="SQLStringList">多条SQL语句</param>
public static void ExecuteSqlTran(ArrayList SQLStringList)
{
using (OleDbConnection conn = new OleDbConnection(connectionString))
{
conn.Open();
OleDbCommand cmd = new OleDbCommand();
cmd.Connection = conn;
OleDbTransaction tx = conn.BeginTransaction();
cmd.Transaction = tx;
try
{
for (int n = 0; n < SQLStringList.Count; n++)
{
string strsql = SQLStringList[n].ToString();
if (strsql.Trim().Length > 1)
{
cmd.CommandText = strsql;
cmd.ExecuteNonQuery();
}
}
tx.Commit();
}
catch (System.Data.OleDb.OleDbException E)
{
tx.Rollback();
throw new Exception(E.Message);
}
}
}
/// <summary>
/// 执行带一个存储过程参数的的SQL语句。
/// </summary>
/// <param name="SQLString">SQL语句</param>
/// <param name="content">参数内容,比如一个字段是格式复杂的文章,有特殊符号,可以通过这个方式添加</param>
/// <returns>影响的记录数</returns>
public static int ExecuteSql(string SQLString, string content)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
OleDbCommand cmd = new OleDbCommand(SQLString, connection);
System.Data.OleDb.OleDbParameter myParameter = new System.Data.OleDb.OleDbParameter("@content", OleDbType.VarChar);
myParameter.Value = content;
cmd.Parameters.Add(myParameter);
try
{
connection.Open();
int rows = cmd.ExecuteNonQuery();
return rows;
}
catch (System.Data.OleDb.OleDbException E)
{
throw new Exception(E.Message);
}
finally
{
cmd.Dispose();
connection.Close();
}
}
}
/// <summary>
/// 向数据库里插入图像格式的字段(和上面情况类似的另一种实例)
/// </summary>
/// <param name="strSQL">SQL语句</param>
/// <param name="fs">图像字节,数据库的字段类型为image的情况</param>
/// <returns>影响的记录数</returns>
public static int ExecuteSqlInsertImg(string strSQL, byte[] fs)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
OleDbCommand cmd = new OleDbCommand(strSQL, connection);
System.Data.OleDb.OleDbParameter myParameter = new System.Data.OleDb.OleDbParameter("@fs", OleDbType.Binary);
myParameter.Value = fs;
cmd.Parameters.Add(myParameter);
try
{
connection.Open();
int rows = cmd.ExecuteNonQuery();
return rows;
}
catch (System.Data.OleDb.OleDbException E)
{
throw new Exception(E.Message);
}
finally
{
cmd.Dispose();
connection.Close();
}
}
}
/// <summary>
/// 执行一条计算查询结果语句,返回查询结果(object)。
/// </summary>
/// <param name="SQLString">计算查询结果语句</param>
/// <returns>查询结果(object)</returns>
public static object GetSingle(string SQLString)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
using (OleDbCommand cmd = new OleDbCommand(SQLString, connection))
{
try
{
connection.Open();
object obj = cmd.ExecuteScalar();
if ((Object.Equals(obj, null)) || (Object.Equals(obj, System.DBNull.Value)))
{
return null;
}
else
{
return obj;
}
}
catch (System.Data.OleDb.OleDbException e)
{
connection.Close();
throw new Exception(e.Message);
}
}
}
}
/// <summary>
/// 执行查询语句,返回OleDbDataReader
/// </summary>
/// <param name="strSQL">查询语句</param>
/// <returns>OleDbDataReader</returns>
public static OleDbDataReader ExecuteReader(string strSQL)
{
OleDbConnection connection = new OleDbConnection(connectionString);
OleDbCommand cmd = new OleDbCommand(strSQL, connection);
try
{
connection.Open();
OleDbDataReader myReader = cmd.ExecuteReader();
return myReader;
}
catch (System.Data.OleDb.OleDbException e)
{
throw new Exception(e.Message);
}
}
/// <summary>
/// 执行查询语句,返回DataSet
/// </summary>
/// <param name="SQLString">查询语句</param>
/// <returns>DataSet</returns>
public static DataSet Query(string SQLString)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
DataSet ds = new DataSet();
try
{
connection.Open();
OleDbDataAdapter command = new OleDbDataAdapter(SQLString, connection);
command.Fill(ds, "ds");
}
catch (System.Data.OleDb.OleDbException ex)
{
throw new Exception(ex.Message);
}
return ds;
}
}
#endregion
#region 执行带参数的SQL语句
/// <summary>
/// 执行SQL语句,返回影响的记录数
/// </summary>
/// <param name="SQLString">SQL语句</param>
/// <returns>影响的记录数</returns>
public static int ExecuteSql(string SQLString, params OleDbParameter[] cmdParms)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
using (OleDbCommand cmd = new OleDbCommand())
{
try
{
PrepareCommand(cmd, connection, null, SQLString, cmdParms);
int rows = cmd.ExecuteNonQuery();
cmd.Parameters.Clear();
return rows;
}
catch (System.Data.OleDb.OleDbException E)
{
throw new Exception(E.Message);
}
}
}
}
/// <summary>
/// 执行多条SQL语句,实现数据库事务。
/// </summary>
/// <param name="SQLStringList">SQL语句的哈希表(key为sql语句,value是该语句的OleDbParameter[])</param>
public static void ExecuteSqlTran(Hashtable SQLStringList)
{
using (OleDbConnection conn = new OleDbConnection(connectionString))
{
conn.Open();
using (OleDbTransaction trans = conn.BeginTransaction())
{
OleDbCommand cmd = new OleDbCommand();
try
{
//循环
foreach (DictionaryEntry myDE in SQLStringList)
{
string cmdText = myDE.Key.ToString();
OleDbParameter[] cmdParms = (OleDbParameter[])myDE.Value;
PrepareCommand(cmd, conn, trans, cmdText, cmdParms);
int val = cmd.ExecuteNonQuery();
cmd.Parameters.Clear();
trans.Commit();
}
}
catch
{
trans.Rollback();
throw;
}
}
}
}
/// <summary>
/// 执行一条计算查询结果语句,返回查询结果(object)。
/// </summary>
/// <param name="SQLString">计算查询结果语句</param>
/// <returns>查询结果(object)</returns>
public static object GetSingle(string SQLString, params OleDbParameter[] cmdParms)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
using (OleDbCommand cmd = new OleDbCommand())
{
try
{
PrepareCommand(cmd, connection, null, SQLString, cmdParms);
object obj = cmd.ExecuteScalar();
cmd.Parameters.Clear();
if ((Object.Equals(obj, null)) || (Object.Equals(obj, System.DBNull.Value)))
{
return null;
}
else
{
return obj;
}
}
catch (System.Data.OleDb.OleDbException e)
{
throw new Exception(e.Message);
}
}
}
}
/// <summary>
/// 执行查询语句,返回OleDbDataReader
/// </summary>
/// <param name="strSQL">查询语句</param>
/// <returns>OleDbDataReader</returns>
public static OleDbDataReader ExecuteReader(string SQLString, params OleDbParameter[] cmdParms)
{
OleDbConnection connection = new OleDbConnection(connectionString);
OleDbCommand cmd = new OleDbCommand();
try
{
PrepareCommand(cmd, connection, null, SQLString, cmdParms);
OleDbDataReader myReader = cmd.ExecuteReader();
cmd.Parameters.Clear();
return myReader;
}
catch (System.Data.OleDb.OleDbException e)
{
throw new Exception(e.Message);
}
}
/// <summary>
/// 执行查询语句,返回DataSet
/// </summary>
/// <param name="SQLString">查询语句</param>
/// <returns>DataSet</returns>
public static DataSet Query(string SQLString, params OleDbParameter[] cmdParms)
{
using (OleDbConnection connection = new OleDbConnection(connectionString))
{
OleDbCommand cmd = new OleDbCommand();
PrepareCommand(cmd, connection, null, SQLString, cmdParms);
using (OleDbDataAdapter da = new OleDbDataAdapter(cmd))
{
DataSet ds = new DataSet();
try
{
da.Fill(ds, "ds");
cmd.Parameters.Clear();
}
catch (System.Data.OleDb.OleDbException ex)
{
throw new Exception(ex.Message);
}
return ds;
}
}
}
private static void PrepareCommand(OleDbCommand cmd, OleDbConnection conn, OleDbTransaction trans, string cmdText, OleDbParameter[] cmdParms)
{
if (conn.State != ConnectionState.Open)
conn.Open();
cmd.Connection = conn;
cmd.CommandText = cmdText;
if (trans != null)
cmd.Transaction = trans;
cmd.CommandType = CommandType.Text;//cmdType;
if (cmdParms != null)
{
foreach (OleDbParameter parm in cmdParms)
cmd.Parameters.Add(parm);
}
}
#endregion
}
}
3.SearchCondition 类
using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Data.Common;
using System.Data;
using System.Text.RegularExpressions;
namespace Core.DBUtility
{
/// <summary>
/// 查询条件组合辅助类
/// </summary>
public class SearchCondition
{
#region 添加查询条件
private Hashtable conditionTable = new Hashtable();
/// <summary>
/// 查询条件列表
/// </summary>
public Hashtable ConditionTable
{
get { return this.conditionTable; }
}
/// <summary>
/// 为查询添加条件
/// <example>
/// 用法一:
/// SearchCondition searchObj = new SearchCondition();
/// searchObj.AddCondition("Test", 1, SqlOperator.NotEqual);
/// searchObj.AddCondition("Test2", "Test2Value", SqlOperator.Like);
/// string conditionSql = searchObj.BuildConditionSql();
///
/// 用法二:AddCondition函数可以串起来添加多个条件
/// SearchCondition searchObj = new SearchCondition();
/// searchObj.AddCondition("Test", 1, SqlOperator.NotEqual).AddCondition("Test2", "Test2Value", SqlOperator.Like);
/// string conditionSql = searchObj.BuildConditionSql();
/// </example>
/// </summary>
/// <param name="fielName">字段名称</param>
/// <param name="fieldValue">字段值</param>
/// <param name="sqlOperator">SqlOperator枚举类型</param>
/// <returns>增加条件后的Hashtable</returns>
public SearchCondition AddCondition(string fielName, object fieldValue, SqlOperator sqlOperator)
{
this.conditionTable.Add(System.Guid.NewGuid()/*fielName*/, new SearchInfo(fielName, fieldValue, sqlOperator));
return this;
}
/// <summary>
/// 为查询添加条件
/// <example>
/// 用法一:
/// SearchCondition searchObj = new SearchCondition();
/// searchObj.AddCondition("Test", 1, SqlOperator.NotEqual, false);
/// searchObj.AddCondition("Test2", "Test2Value", SqlOperator.Like, true);
/// string conditionSql = searchObj.BuildConditionSql();
///
/// 用法二:AddCondition函数可以串起来添加多个条件
/// SearchCondition searchObj = new SearchCondition();
/// searchObj.AddCondition("Test", 1, SqlOperator.NotEqual, false).AddCondition("Test2", "Test2Value", SqlOperator.Like, true);
/// string conditionSql = searchObj.BuildConditionSql();
/// </example>
/// </summary>
/// <param name="fielName">字段名称</param>
/// <param name="fieldValue">字段值</param>
/// <param name="sqlOperator">SqlOperator枚举类型</param>
/// <param name="excludeIfEmpty">如果字段为空或者Null则不作为查询条件</param>
/// <returns></returns>
public SearchCondition AddCondition(string fielName, object fieldValue, SqlOperator sqlOperator, bool excludeIfEmpty)
{
this.conditionTable.Add(System.Guid.NewGuid()/*fielName*/, new SearchInfo(fielName, fieldValue, sqlOperator, excludeIfEmpty));
return this;
}
/// <summary>
/// 将多个条件分组归类作为一个条件来查询,
/// 如需构造一个括号内的条件 ( Test = "AA1" OR Test = "AA2")
/// </summary>
/// <param name="fielName">字段名称</param>
/// <param name="fieldValue">字段值</param>
/// <param name="sqlOperator">SqlOperator枚举类型</param>
/// <param name="excludeIfEmpty">如果字段为空或者Null则不作为查询条件</param>
/// <param name="groupName">分组的名称,如需构造一个括号内的条件 ( Test = "AA1" OR Test = "AA2"), 定义一个组名集中条件</param>
/// <returns></returns>
public SearchCondition AddCondition(string fielName, object fieldValue, SqlOperator sqlOperator,
bool excludeIfEmpty, string groupName)
{
this.conditionTable.Add(System.Guid.NewGuid()/*fielName*/, new SearchInfo(fielName, fieldValue, sqlOperator, excludeIfEmpty, groupName));
return this;
}
#endregion
/// <summary>
/// 根据对象构造相关的条件语句(不使用参数),如返回的语句是:
/// <![CDATA[
/// Where (1=1) AND Test4 < 'Value4' AND Test6 >= 'Value6' AND Test7 <= 'value7' AND Test <> '1' AND Test5 > 'Value5' AND Test2 Like '%Value2%' AND Test3 = 'Value3'
/// ]]>
/// </summary>
/// <returns></returns>
public string BuildConditionSql(DatabaseType dbType)
{
string sql = " Where (1=1) ";
string fieldName = string.Empty;
SearchInfo searchInfo = null;
StringBuilder sb = new StringBuilder();
sql += BuildGroupCondiction(dbType);
foreach (DictionaryEntry de in this.conditionTable)
{
searchInfo = (SearchInfo)de.Value;
TypeCode typeCode = Type.GetTypeCode(searchInfo.FieldValue.GetType());
//如果选择ExcludeIfEmpty为True,并且该字段为空值的话,跳过
if (searchInfo.ExcludeIfEmpty &&
(searchInfo.FieldValue == null || string.IsNullOrEmpty(searchInfo.FieldValue.ToString())))
{
continue;
}
//只有组别名称为空才继续,即正常的sql条件
if (string.IsNullOrEmpty(searchInfo.GroupName))
{
if (searchInfo.SqlOperator == SqlOperator.Like)
{
sb.AppendFormat(" AND {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("%{0}%", searchInfo.FieldValue));
}
else if (searchInfo.SqlOperator == SqlOperator.NotLike)
{
sb.AppendFormat(" AND {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("%{0}%", searchInfo.FieldValue));
}
else if (searchInfo.SqlOperator == SqlOperator.LikeStartAt)
{
sb.AppendFormat(" AND {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("{0}%", searchInfo.FieldValue));
}
else if (searchInfo.SqlOperator == SqlOperator.In)
{
sb.AppendFormat(" AND {0} {1} {2}", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("({0})", searchInfo.FieldValue));
}
else
{
if (dbType == DatabaseType.Oracle)
{
#region 特殊Oracle操作
if (IsDate(searchInfo.FieldValue.ToString()))
{
sb.AppendFormat(" AND {0} {1} to_date('{2}','YYYY-MM-dd')", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else if (IsDateHourMinute(searchInfo.FieldValue.ToString()))
{
sb.AppendFormat(" AND {0} {1} to_date('{2}','YYYY-MM-dd HH:mi')", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else if (!searchInfo.ExcludeIfEmpty)
{
//如果要进行空值查询的时候
if (searchInfo.SqlOperator == SqlOperator.Equal)
{
sb.AppendFormat(" AND ({0} is null or {0}='')", searchInfo.FieldName);
}
else if (searchInfo.SqlOperator == SqlOperator.NotEqual)
{
sb.AppendFormat(" AND {0} is not null", searchInfo.FieldName);
}
}
else
{
sb.AppendFormat(" AND {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
#endregion
}
else if (dbType == DatabaseType.Access)
{
#region 特殊Access操作
if (searchInfo.SqlOperator == SqlOperator.Equal &&
typeCode == TypeCode.String && string.IsNullOrEmpty(searchInfo.FieldValue.ToString()))
{
sb.AppendFormat(" AND ({0} {1} '{2}' OR {0} IS NULL)", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else
{
if (typeCode == TypeCode.DateTime)
{
sb.AppendFormat(" AND {0} {1} #{2}#", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else if (typeCode == TypeCode.Byte || typeCode == TypeCode.Decimal || typeCode == TypeCode.Double ||
typeCode == TypeCode.Int16 || typeCode == TypeCode.Int32 || typeCode == TypeCode.Int64 ||
typeCode == TypeCode.SByte || typeCode == TypeCode.Single || typeCode == TypeCode.UInt16 ||
typeCode == TypeCode.UInt32 || typeCode == TypeCode.UInt64)
{
//数值类型操作
sb.AppendFormat(" AND {0} {1} {2}", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else
{
sb.AppendFormat(" AND {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
}
#endregion
}
else //if (dbType == DatabaseType.SqlServer)
{
sb.AppendFormat(" AND {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
}
}
}
sql += sb.ToString();
return sql;
}
/// <summary>
/// 建立分组条件
/// </summary>
/// <returns></returns>
private string BuildGroupCondiction(DatabaseType dbType)
{
Hashtable ht = GetGroupNames();
SearchInfo searchInfo = null;
StringBuilder sb = new StringBuilder();
string sql = string.Empty;
string tempSql = string.Empty;
foreach (string groupName in ht.Keys)
{
sb = new StringBuilder();
tempSql = " AND ({0})";
foreach (DictionaryEntry de in this.conditionTable)
{
searchInfo = (SearchInfo)de.Value;
TypeCode typeCode = Type.GetTypeCode(searchInfo.FieldValue.GetType());
//如果选择ExcludeIfEmpty为True,并且该字段为空值的话,跳过
if (searchInfo.ExcludeIfEmpty &&
(searchInfo.FieldValue == null || string.IsNullOrEmpty(searchInfo.FieldValue.ToString())) )
{
continue;
}
if (groupName.Equals(searchInfo.GroupName, StringComparison.OrdinalIgnoreCase))
{
if (searchInfo.SqlOperator == SqlOperator.Like)
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("%{0}%", searchInfo.FieldValue));
}
else if (searchInfo.SqlOperator == SqlOperator.NotLike)
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("%{0}%", searchInfo.FieldValue));
}
else if (searchInfo.SqlOperator == SqlOperator.LikeStartAt)
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("{0}%", searchInfo.FieldValue));
}
else
{
if (dbType == DatabaseType.Oracle)
{
#region Oracle分组
if (IsDate(searchInfo.FieldValue.ToString()))
{
sb.AppendFormat(" OR {0} {1} to_date('{2}','YYYY-MM-dd')", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else if (IsDateHourMinute(searchInfo.FieldValue.ToString()))
{
sb.AppendFormat(" OR {0} {1} to_date('{2}','YYYY-MM-dd HH:mi')", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else if (!searchInfo.ExcludeIfEmpty)
{
//如果要进行空值查询的时候
if (searchInfo.SqlOperator == SqlOperator.Equal)
{
sb.AppendFormat(" OR ({0} is null or {0}='')", searchInfo.FieldName);
}
else if (searchInfo.SqlOperator == SqlOperator.NotEqual)
{
sb.AppendFormat(" OR {0} is not null", searchInfo.FieldName);
}
}
else
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
#endregion
}
else if (dbType == DatabaseType.Access)
{
#region Access分组
if (typeCode == TypeCode.DateTime)
{
sb.AppendFormat(" OR {0} {1} #{2}#", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else if (typeCode == TypeCode.Byte || typeCode == TypeCode.Decimal || typeCode == TypeCode.Double ||
typeCode == TypeCode.Int16 || typeCode == TypeCode.Int32 || typeCode == TypeCode.Int64 ||
typeCode == TypeCode.SByte || typeCode == TypeCode.Single || typeCode == TypeCode.UInt16 ||
typeCode == TypeCode.UInt32 || typeCode == TypeCode.UInt64)
{
//数值类型操作
sb.AppendFormat(" OR {0} {1} {2}", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
else
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
#endregion
}
else //if (dbType == DatabaseType.SqlServer)
{
#region SqlServer分组
if (searchInfo.SqlOperator == SqlOperator.Like)
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), string.Format("%{0}%", searchInfo.FieldValue));
}
else
{
sb.AppendFormat(" OR {0} {1} '{2}'", searchInfo.FieldName,
this.ConvertSqlOperator(searchInfo.SqlOperator), searchInfo.FieldValue);
}
#endregion
}
}
}
}
if(!string.IsNullOrEmpty(sb.ToString()))
{
tempSql = string.Format(tempSql, sb.ToString().Substring(3));//从第一个Or开始位置
sql += tempSql;
}
}
return sql;
}
/// <summary>
/// 获取给定条件集合的组别对象集合
/// </summary>
/// <returns></returns>
private Hashtable GetGroupNames()
{
Hashtable htGroupNames = new Hashtable();
SearchInfo searchInfo = null;
foreach (DictionaryEntry de in this.conditionTable)
{
searchInfo = (SearchInfo)de.Value;
if (!string.IsNullOrEmpty(searchInfo.GroupName) && !htGroupNames.Contains(searchInfo.GroupName))
{
htGroupNames.Add(searchInfo.GroupName, searchInfo.GroupName);
}
}
return htGroupNames;
}
/ <summary>
/ 创建用于Enterprise Library的DbCommand对象。
/ 该对象包含了可以运行的参数化语句和参数列表。
/ <example>
/ 函数用法如下:
/ <code>
/ <para>
/ Database db = DatabaseFactory.CreateDatabase();
/ SearchCondition searchObj = new SearchCondition();
/ searchObj.AddCondition("Name", "测试" , SqlOperator.Like)
/ .AddCondition("ID", 1, SqlOperator.MoreThanOrEqual);
/ DbCommand dbComand = searchObj.BuildDbCommand(db, "select Comments from Test", " Order by Name");
/ using (IDataReader dr = db.ExecuteReader(dbComand))
/ {
/ while (dr.Read())
/ {
/ this.txtSql.Text += "\r\n" + dr["Comments"].ToString();
/ }
/ }
/ </para>
/ </code>
/ </example>
/ </summary>
/ <remarks>Enterprise Library的DbCommand对象</remarks>
/ <param name="db">Database对象</param>
/ <param name="mainSql">除了Where条件和排序语句的主Sql语句</param>
/ <param name="orderSql">排序语句</param>
/ <returns>Enterprise Library的DbCommand对象</returns>
//public DbCommand BuildDbCommand(Database db, string mainSql, string orderSql)
//{
// string sql = " Where (1=1) ";
// string fieldName = string.Empty;
// SearchInfo searchInfo = null;
// StringBuilder sb = new StringBuilder();
// foreach (DictionaryEntry de in this.ConditionTable)
// {
// searchInfo = (SearchInfo)de.Value;
// //如果选择ExcludeIfEmpty为True,并且该字段为空值的话,跳过
// if (searchInfo.ExcludeIfEmpty &&
// (searchInfo.FieldValue == null || string.IsNullOrEmpty(searchInfo.FieldValue.ToString())))
// {
// continue;
// }
// sb.AppendFormat(" AND {0} {1} @{0} ", searchInfo.FieldName, this.ConvertSqlOperator(searchInfo.SqlOperator));
// }
// sql += sb.ToString();
// sql = mainSql + sql + orderSql;
// DbCommand dbCommand = db.GetSqlStringCommand(sql);
// foreach (DictionaryEntry de in this.ConditionTable)
// {
// searchInfo = (SearchInfo)de.Value;
// //如果选择ExcludeIfEmpty为True,并且该字段为空值的话,跳过
// if (searchInfo.ExcludeIfEmpty && string.IsNullOrEmpty((string)searchInfo.FieldValue))
// {
// continue;
// }
// if (searchInfo.SqlOperator == SqlOperator.Like)
// {
// if ( !string.IsNullOrEmpty(searchInfo.FieldValue.ToString()) )
// db.AddInParameter(dbCommand, searchInfo.FieldName,
// this.GetFieldDbType(searchInfo.FieldValue), string.Format("%{0}%", searchInfo.FieldValue));
// }
// else
// {
// db.AddInParameter(dbCommand, searchInfo.FieldName,
// this.GetFieldDbType(searchInfo.FieldValue), searchInfo.FieldValue);
// }
// }
// return dbCommand;
//}
#region 辅助函数
/// <summary>
/// 转换枚举类型为对应的Sql语句操作符号
/// </summary>
/// <param name="sqlOperator">SqlOperator枚举对象</param>
/// <returns><![CDATA[对应的Sql语句操作符号(如 ">" "<>" ">=")]]></returns>
private string ConvertSqlOperator(SqlOperator sqlOperator)
{
string stringOperator = " = ";
switch (sqlOperator)
{
case SqlOperator.Equal:
stringOperator = " = ";
break;
case SqlOperator.LessThan:
stringOperator = " < ";
break;
case SqlOperator.LessThanOrEqual:
stringOperator = " <= ";
break;
case SqlOperator.Like:
stringOperator = " Like ";
break;
case SqlOperator.NotLike:
stringOperator = " NOT Like ";
break;
case SqlOperator.LikeStartAt:
stringOperator = " Like ";
break;
case SqlOperator.MoreThan:
stringOperator = " > ";
break;
case SqlOperator.MoreThanOrEqual:
stringOperator = " >= ";
break;
case SqlOperator.NotEqual:
stringOperator = " <> ";
break;
case SqlOperator.In:
stringOperator = " in ";
break;
default:
break;
}
return stringOperator;
}
/// <summary>
/// 根据传入对象的值类型获取其对应的DbType类型
/// </summary>
/// <param name="fieldValue">对象的值</param>
/// <returns>DbType类型</returns>
private DbType GetFieldDbType(object fieldValue)
{
DbType type = DbType.String;
switch (fieldValue.GetType().ToString())
{
case "System.Int16":
type = DbType.Int16;
break;
case "System.UInt16":
type = DbType.UInt16;
break;
case "System.Single":
type = DbType.Single;
break;
case "System.UInt32":
type = DbType.UInt32;
break;
case "System.Int32":
type = DbType.Int32;
break;
case "System.UInt64":
type = DbType.UInt64;
break;
case "System.Int64":
type = DbType.Int64;
break;
case "System.String":
type = DbType.String;
break;
case "System.Double":
type = DbType.Double;
break;
case "System.Decimal":
type = DbType.Decimal;
break;
case "System.Byte":
type = DbType.Byte;
break;
case "System.Boolean":
type = DbType.Boolean;
break;
case "System.DateTime":
type = DbType.DateTime;
break;
case "System.Guid":
type = DbType.Guid;
break;
default:
break;
}
return type;
}
/// <summary>
/// 判断输入的字符是否为日期
/// </summary>
/// <param name="strValue"></param>
/// <returns></returns>
internal bool IsDate(string strValue)
{
return Regex.IsMatch(strValue, @"^((\d{2}(([02468][048])|([13579][26]))[\-\/\s]?((((0?[13578])|(1[02]))[\-\/\s]?((0?[1-9])|([1-2][0-9])|(3[01])))|(((0?[469])|(11))[\-\/\s]?((0?[1-9])|([1-2][0-9])|(30)))|(0?2[\-\/\s]?((0?[1-9])|([1-2][0-9])))))|(\d{2}(([02468][1235679])|([13579][01345789]))[\-\/\s]?((((0?[13578])|(1[02]))[\-\/\s]?((0?[1-9])|([1-2][0-9])|(3[01])))|(((0?[469])|(11))[\-\/\s]?((0?[1-9])|([1-2][0-9])|(30)))|(0?2[\-\/\s]?((0?[1-9])|(1[0-9])|(2[0-8]))))))");
}
/// <summary>
/// 判断输入的字符是否为日期,如2004-07-12 14:25|||1900-01-01 00:00|||9999-12-31 23:59
/// </summary>
/// <param name="strValue"></param>
/// <returns></returns>
internal bool IsDateHourMinute(string strValue)
{
return Regex.IsMatch(strValue, @"^(19[0-9]{2}|[2-9][0-9]{3})-((0(1|3|5|7|8)|10|12)-(0[1-9]|1[0-9]|2[0-9]|3[0-1])|(0(4|6|9)|11)-(0[1-9]|1[0-9]|2[0-9]|30)|(02)-(0[1-9]|1[0-9]|2[0-9]))\x20(0[0-9]|1[0-9]|2[0-3])(:[0-5][0-9]){1}$");
}
#endregion
}
}
4. Html操作类
using System;
using System.IO.Compression;
using System.Text;
using System.Net;
using System.IO;
using System.Threading;
using System.Text.RegularExpressions;
using System.Web;
namespace Core.Html
{
/// <summary>
///1、获取HTML
///1.1获取指定页面的HTML代码 GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer)
///1.2获取HTMLGetHtml(string url, CookieContainer cookieContainer)
///2、获取字符流
///2.1获取字符流GetStream(string url, CookieContainer cookieContainer)
///3、清除HTML标记
///3.1清除HTML标记 NoHTML(string Htmlstring)
///4、匹配页面的链接
///4.1获取页面的链接正则 GetHref(string HtmlCode)
///5、匹配页面的图片地址
/// 5.1匹配页面的图片地址 GetImgSrc(string HtmlCode, string imgHttp)
///5.2匹配<img src="" />中的图片路径实际链接 GetImg(string ImgString, string imgHttp)
///6、抓取远程页面内容
/// 6.1以GET方式抓取远程页面内容 Get_Http(string tUrl)
/// 6.2以POST方式抓取远程页面内容 Post_Http(string url, string postData, string encodeType)
///7、压缩HTML输出
///7.1压缩HTML输出 ZipHtml(string Html)
///8、过滤HTML标签
/// 8.1过滤指定HTML标签 DelHtml(string s_TextStr, string html_Str)
/// 8.2过滤HTML中的不安全标签 RemoveUnsafeHtml(string content)
/// HTML转行成TEXT HtmlToTxt(string strHtml)
/// 字符串转换为 HtmlStringToHtml(string str)
/// html转换成字符串HtmlToString(string strHtml)
/// 获取URL编码
/// 判断URL是否有效
/// 返回 HTML 字符串的编码解码结果
/// </summary>
public class HtmlHelper
{
#region 私有字段
private static CookieContainer cc = new CookieContainer();
private static string contentType = "application/x-www-form-urlencoded";
private static string accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg," +
" application/x-shockwave-flash, application/x-silverlight, " +
"application/vnd.ms-excel, application/vnd.ms-powerpoint, " +
"application/msword, application/x-ms-application," +
" application/x-ms-xbap," +
" application/vnd.ms-xpsdocument, application/xaml+xml, application/x-silverlight-2-b1, */*";
private static string userAgent = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1;" +
" .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)";
private static Encoding encoding = Encoding.GetEncoding("utf-8");
private static int delay = 1000;
private static int maxTry = 300;
private static int currentTry = 0;
#endregion
#region 公有属性
/// <summary>
/// Cookie容器
/// </summary>
public static CookieContainer CookieContainer
{
get
{
return cc;
}
}
/// <summary>
/// 获取网页源码时使用的编码
/// </summary>
public static Encoding Encoding
{
get
{
return encoding;
}
set
{
encoding = value;
}
}
public static int NetworkDelay
{
get
{
Random r = new Random();
return (r.Next(delay, delay * 2));
// return (r.Next(delay / 1000, delay / 1000 * 2)) * 1000;
}
set
{
delay = value;
}
}
public static int MaxTry
{
get
{
return maxTry;
}
set
{
maxTry = value;
}
}
#endregion
#region 1、获取HTML
/// <summary>
/// 1.1获取指定页面的HTML代码
/// </summary>
/// <param name="url">指定页面的路径</param>
/// <param name="postData">post 提交的字符串</param>
/// <param name="isPost">是否以post方式发送请求</param>
/// <param name="cookieContainer">Cookie集合</param>
public static string GetHtml(string url, string postData, bool isPost, CookieContainer cookieContainer)
{
if (string.IsNullOrEmpty(postData))
{
return GetHtml(url, cookieContainer);
}
Thread.Sleep(NetworkDelay);
currentTry++;
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
try
{
byte[] byteRequest = Encoding.Default.GetBytes(postData);
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url);
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentType = contentType;
httpWebRequest.ServicePoint.ConnectionLimit = maxTry;
httpWebRequest.Referer = url;
httpWebRequest.Accept = accept;
httpWebRequest.UserAgent = userAgent;
httpWebRequest.Method = isPost ? "POST" : "GET";
httpWebRequest.ContentLength = byteRequest.Length;
httpWebRequest.AllowAutoRedirect = false;
Stream stream = httpWebRequest.GetRequestStream();
stream.Write(byteRequest, 0, byteRequest.Length);
stream.Close();
try
{
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
//redirectURL = httpWebResponse.Headers["Location"];// Get redirected uri
}
catch (WebException ex)
{
httpWebResponse = (HttpWebResponse)ex.Response;
}
//httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(responseStream, encoding);
string html = streamReader.ReadToEnd();
streamReader.Close();
responseStream.Close();
currentTry = 0;
httpWebRequest.Abort();
httpWebResponse.Close();
return html;
}
catch (Exception e)
{
if (currentTry <= maxTry)
{
GetHtml(url, postData, isPost, cookieContainer);
}
currentTry--;
if (httpWebRequest != null) httpWebRequest.Abort();
if (httpWebResponse != null) httpWebResponse.Close();
return string.Empty;
}
}
/// <summary>
/// 1.2获取HTML
/// </summary>
/// <param name="url">地址</param>
/// <param name="cookieContainer">Cookie集合</param>
public static string GetHtml(string url, CookieContainer cookieContainer)
{
Thread.Sleep(NetworkDelay);
currentTry++;
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
try
{
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url);
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentType = contentType;
httpWebRequest.ServicePoint.ConnectionLimit = maxTry;
httpWebRequest.Referer = url;
httpWebRequest.Accept = accept;
httpWebRequest.UserAgent = userAgent;
httpWebRequest.Method = "GET";
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
StreamReader streamReader = new StreamReader(responseStream, encoding);
string html = streamReader.ReadToEnd();
streamReader.Close();
responseStream.Close();
currentTry--;
httpWebRequest.Abort();
httpWebResponse.Close();
return html;
}
catch (Exception e)
{
if (currentTry <= maxTry) GetHtml(url, cookieContainer);
currentTry--;
if (httpWebRequest != null) httpWebRequest.Abort();
if (httpWebResponse != null) httpWebResponse.Close();
return string.Empty;
}
}
#endregion
#region 2、获取字符流
/// <summary>
/// 2.1获取字符流
/// </summary>
//---------------------------------------------------------------------------------------------------------------
// 示例:
// System.Net.CookieContainer cookie = new System.Net.CookieContainer();
// Stream s = HttpHelper.GetStream("http://ptlogin2.qq.com/getimage?aid=15000102&0.43878429697395826", cookie);
// picVerify.Image = Image.FromStream(s);
//---------------------------------------------------------------------------------------------------------------
/// <param name="url">地址</param>
/// <param name="cookieContainer">cookieContainer</param>
public static Stream GetStream(string url, CookieContainer cookieContainer)
{
currentTry++;
HttpWebRequest httpWebRequest = null;
HttpWebResponse httpWebResponse = null;
try
{
httpWebRequest = (HttpWebRequest)HttpWebRequest.Create(url);
httpWebRequest.CookieContainer = cookieContainer;
httpWebRequest.ContentType = contentType;
httpWebRequest.ServicePoint.ConnectionLimit = maxTry;
httpWebRequest.Referer = url;
httpWebRequest.Accept = accept;
httpWebRequest.UserAgent = userAgent;
httpWebRequest.Method = "GET";
httpWebResponse = (HttpWebResponse)httpWebRequest.GetResponse();
Stream responseStream = httpWebResponse.GetResponseStream();
currentTry--;
return responseStream;
}
catch (Exception e)
{
if (currentTry <= maxTry)
{
GetHtml(url, cookieContainer);
}
currentTry--;
if (httpWebRequest != null)
{
httpWebRequest.Abort();
} if (httpWebResponse != null)
{
httpWebResponse.Close();
}
return null;
}
}
#endregion
#region 3、清除HTML标记
///<summary>
///3.1清除HTML标记
///</summary>
///<param name="NoHTML">包括HTML的源码</param>
///<returns>已经去除后的文字</returns>
public static string RemoveHTML(string Htmlstring)
{
//删除脚本
Htmlstring = Regex.Replace(Htmlstring, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
Regex regex = new Regex("<.+?>", RegexOptions.IgnoreCase);
Htmlstring = regex.Replace(Htmlstring, "");
Htmlstring = Regex.Replace(Htmlstring, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"-->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"<!--.*", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(quot|#34);", "\"", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(amp|#38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(lt|#60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(gt|#62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(nbsp|#160);", " ", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(iexcl|#161);", "\xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(cent|#162);", "\xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(pound|#163);", "\xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&(copy|#169);", "\xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace(Htmlstring, @"&#(\d+);", "", RegexOptions.IgnoreCase);
Htmlstring.Replace("<", "");
Htmlstring.Replace(">", "");
Htmlstring.Replace("\r\n", "");
return Htmlstring;
}
#endregion
#region 4、匹配页面的链接
#region 4.1获取页面的链接正则
/// <summary>
/// 4.1获取页面的链接正则
/// </summary>
public string GetHref(string HtmlCode)
{
string MatchVale = "";
string Reg = @"(h|H)(r|R)(e|E)(f|F) *= *('|"")?((\w|\\|\/|\.|:|-|_)+)[\S]*";
foreach (Match m in Regex.Matches(HtmlCode, Reg))
{
MatchVale += (m.Value).ToLower().Replace("href=", "").Trim() + "|";
}
return MatchVale;
}
#endregion
#region 4.2取得所有链接URL
/// <summary>
/// 4.2取得所有链接URL
/// </summary>
/// <param name="html"></param>
/// <returns></returns>
public static string GetAllURL(string html)
{
StringBuilder sb = new StringBuilder();
Match m = Regex.Match(html.ToLower(), "<a href=(.*?)>.*?</a>");
while (m.Success)
{
sb.AppendLine(m.Result("$1"));
m.NextMatch();
}
return sb.ToString();
}
#endregion
#region 4.3获取所有连接文本
/// <summary>
/// 4.3获取所有连接文本
/// </summary>
/// <param name="html"></param>
/// <returns></returns>
public static string GetAllLinkText(string html)
{
StringBuilder sb = new StringBuilder();
Match m = Regex.Match(html.ToLower(), "<a href=.*?>(1,100})</a>");
while (m.Success)
{
sb.AppendLine(m.Result("$1"));
m.NextMatch();
}
return sb.ToString();
}
#endregion
#endregion
#region 5、匹配页面的图片地址
/// <summary>
/// 5.1匹配页面的图片地址
/// </summary>
/// <param name="imgHttp">要补充的http://路径信息</param>
public string GetImgSrc(string HtmlCode, string imgHttp)
{
string MatchVale = "";
string Reg = @"<img.+?>";
foreach (Match m in Regex.Matches(HtmlCode.ToLower(), Reg))
{
MatchVale += GetImg((m.Value).ToLower().Trim(), imgHttp) + "|";
}
return MatchVale;
}
/// <summary>
/// 5.2匹配<img src="" />中的图片路径实际链接
/// </summary>
/// <param name="ImgString"><img src="" />字符串</param>
public string GetImg(string ImgString, string imgHttp)
{
string MatchVale = "";
string Reg = @"src=.+\.(bmp|jpg|gif|png|)";
foreach (Match m in Regex.Matches(ImgString.ToLower(), Reg))
{
MatchVale += (m.Value).ToLower().Trim().Replace("src=", "");
}
if (MatchVale.IndexOf(".net") != -1 || MatchVale.IndexOf(".com") != -1 || MatchVale.IndexOf(".org") != -1 || MatchVale.IndexOf(".cn") != -1 || MatchVale.IndexOf(".cc") != -1 || MatchVale.IndexOf(".info") != -1 || MatchVale.IndexOf(".biz") != -1 || MatchVale.IndexOf(".tv") != -1)
return (MatchVale);
else
return (imgHttp + MatchVale);
}
#endregion
#region 6、抓取远程页面内容
/// <summary>
/// 6.1以GET方式抓取远程页面内容
/// </summary>
public static string Get_Http(string tUrl)
{
string strResult;
try
{
HttpWebRequest hwr = (HttpWebRequest)HttpWebRequest.Create(tUrl);
hwr.Timeout = 19600;
HttpWebResponse hwrs = (HttpWebResponse)hwr.GetResponse();
Stream myStream = hwrs.GetResponseStream();
StreamReader sr = new StreamReader(myStream, Encoding.Default);
StringBuilder sb = new StringBuilder();
while (-1 != sr.Peek())
{
sb.Append(sr.ReadLine() + "\r\n");
}
strResult = sb.ToString();
hwrs.Close();
}
catch (Exception ee)
{
strResult = ee.Message;
}
return strResult;
}
/// <summary>
/// 6.2以POST方式抓取远程页面内容
/// </summary>
/// <param name="postData">参数列表</param>
public static string Post_Http(string url, string postData, string encodeType)
{
string strResult = null;
try
{
Encoding encoding = Encoding.GetEncoding(encodeType);
byte[] POST = encoding.GetBytes(postData);
HttpWebRequest myRequest = (HttpWebRequest)WebRequest.Create(url);
myRequest.Method = "POST";
myRequest.ContentType = "application/x-www-form-urlencoded";
myRequest.ContentLength = POST.Length;
Stream newStream = myRequest.GetRequestStream();
newStream.Write(POST, 0, POST.Length); //设置POST
newStream.Close();
HttpWebResponse myResponse = (HttpWebResponse)myRequest.GetResponse();
StreamReader reader = new StreamReader(myResponse.GetResponseStream(), Encoding.Default);
strResult = reader.ReadToEnd();
}
catch (Exception ex)
{
strResult = ex.Message;
}
return strResult;
}
#endregion
#region 7、压缩HTML输出
/// <summary>
/// 7.1压缩HTML输出
/// </summary>
public static string ZipHtml(string Html)
{
Html = Regex.Replace(Html, @">\s+?<", "><");//去除HTML中的空白字符
Html = Regex.Replace(Html, @"\r\n\s*", "");
Html = Regex.Replace(Html, @"<body([\s|\S]*?)>([\s|\S]*?)</body>", @"<body$1>$2</body>", RegexOptions.IgnoreCase);
return Html;
}
#endregion
#region 8、过滤HTML标签
#region 8.1过滤指定HTML标签
/// <summary>
/// 8.1过滤指定HTML标签
/// </summary>
/// <param name="s_TextStr">要过滤的字符</param>
/// <param name="html_Str">a img p div</param>
public static string DelHtml(string s_TextStr, string html_Str)
{
string rStr = "";
if (!string.IsNullOrEmpty(s_TextStr))
{
rStr = Regex.Replace(s_TextStr, "<" + html_Str + "[^>]*>", "", RegexOptions.IgnoreCase);
rStr = Regex.Replace(rStr, "</" + html_Str + ">", "", RegexOptions.IgnoreCase);
}
return rStr;
}
#endregion
#region 8.2过滤HTML中的不安全标签
/// <summary>
/// 8.2过滤HTML中的不安全标签
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
public static string RemoveUnsafeHtml(string content)
{
content = Regex.Replace(content, @"(\<|\s+)o([a-z]+\s?=)", "$1$2", RegexOptions.IgnoreCase);
content = Regex.Replace(content, @"(script|frame|form|meta|behavior|style)([\s|:|>])+", "$1.$2", RegexOptions.IgnoreCase);
return content;
}
#endregion
#endregion
#region 转换HTML操作
#region HTML转行成TEXT
/// <summary>
/// HTML转行成TEXT HtmlToTxt(string strHtml)
/// </summary>
/// <param name="strHtml"></param>
/// <returns></returns>
public static string HtmlToTxt(string strHtml)
{
string[] aryReg ={
@"<script[^>]*?>.*?</script>",
@"<(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(\\[""'tbnr]|[^\7])*?\7|\w+)|.{0})|\s)*?(\/\s*)?>",
@"([\r\n])[\s]+",
@"&(quot|#34);",
@"&(amp|#38);",
@"&(lt|#60);",
@"&(gt|#62);",
@"&(nbsp|#160);",
@"&(iexcl|#161);",
@"&(cent|#162);",
@"&(pound|#163);",
@"&(copy|#169);",
@"&#(\d+);",
@"-->",
@"<!--.*\n"
};
string newReg = aryReg[0];
string strOutput = strHtml;
for (int i = 0; i < aryReg.Length; i++)
{
Regex regex = new Regex(aryReg[i], RegexOptions.IgnoreCase);
strOutput = regex.Replace(strOutput, string.Empty);
}
strOutput.Replace("<", "");
strOutput.Replace(">", "");
strOutput.Replace("\r\n", "");
return strOutput;
}
#endregion
#region 字符串转换为 Html
/// <summary>
/// 字符串转换为 HtmlStringToHtml(string str)
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string StringToHtml(string str)
{
str = str.Replace("&", "&");
str = str.Replace(" ", " ");
str = str.Replace("'", "''");
str = str.Replace("\"", """);
str = str.Replace("<", "<");
str = str.Replace(">", ">");
str = str.Replace("\n", "<br />");
str = str.Replace("\r", "<br />");
str = str.Replace("\r\n", "<br />");
return str;
}
#endregion
#region Html转换成字符串
/// <summary>
/// html转换成字符串
/// </summary>
/// <param name="strHtml"></param>
/// <returns></returns>
public static string HtmlToString(string strHtml)
{
strHtml = strHtml.Replace("<br>", "\r\n");
strHtml = strHtml.Replace(@"<br />", "\r\n");
strHtml = strHtml.Replace(@"<br/>", "\r\n");
strHtml = strHtml.Replace(">", ">");
strHtml = strHtml.Replace("<", "<");
strHtml = strHtml.Replace(" ", " ");
strHtml = strHtml.Replace(""", "\"");
strHtml = Regex.Replace(strHtml, @"<\/?[^>]+>", "", RegexOptions.IgnoreCase);
return strHtml;
}
#endregion
#endregion
#region 获取URL编码
/// <summary>
/// 获取URL编码
/// </summary>
/// <param name="url"></param>
/// <returns></returns>
public string GetEncoding(string url)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))
{
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
}
else
{
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);
}
string html = reader.ReadToEnd();
Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
{
return Encoding.Default.BodyName;
}
}
}
catch
{
}
finally
{
if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close();
if (request != null)
request = null;
}
return Encoding.Default.BodyName;
}
#endregion
#region 判断URL是否有效
/// <summary>
/// 判断URL是否有效
/// </summary>
/// <param name="url">待判断的URL,可以是网页以及图片链接等</param>
/// <returns>200为正确,其余为大致网页错误代码</returns>
public int GetUrlError(string url)
{
int num = 200;
try
{
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(new Uri(url));
ServicePointManager.Expect100Continue = false;
((HttpWebResponse)request.GetResponse()).Close();
}
catch (WebException exception)
{
if (exception.Status != WebExceptionStatus.ProtocolError)
{
return num;
}
if (exception.Message.IndexOf("500 ") > 0)
{
return 500;
}
if (exception.Message.IndexOf("401 ") > 0)
{
return 401;
}
if (exception.Message.IndexOf("404") > 0)
{
num = 404;
}
}
catch
{
num = 401;
}
return num;
}
#endregion
#region 返回 HTML 字符串的编码解码结果
/// <summary>
/// 返回 HTML 字符串的编码结果
/// </summary>
/// <param name="inputData">字符串</param>
/// <returns>编码结果</returns>
public static string HtmlEncode(string inputData)
{
return HttpUtility.HtmlEncode(inputData);
}
/// <summary>
/// 返回 HTML 字符串的解码结果
/// </summary>
/// <param name="str">字符串</param>
/// <returns>解码结果</returns>
public static string HtmlDecode(string str)
{
return HttpUtility.HtmlDecode(str);
}
#endregion
#region 加载文件块
/// <summary>
/// 加载文件块
/// </summary>
public static string File(string Path, System.Web.UI.Page p)
{
return @p.ResolveUrl(Path);
}
#endregion
#region 加载CSS样式文件
/// <summary>
/// 加载CSS样式文件
/// </summary>
public static string CSS(string cssPath, System.Web.UI.Page p)
{
return @"<link href=""" + p.ResolveUrl(cssPath) + @""" rel=""stylesheet"" type=""text/css"" />" + "\r\n";
}
#endregion
#region 加载JavaScript脚本文件
/// <summary>
/// 加载javascript脚本文件
/// </summary>
public static string JS(string jsPath, System.Web.UI.Page p)
{
return @"<script type=""text/javascript"" src=""" + p.ResolveUrl(jsPath) + @"""></script>" + "\r\n";
}
#endregion
public CookieCollection GetCookieCollection(string cookieString)
{
CookieCollection ccc = new CookieCollection();
//string cookieString = "SID=ARRGy4M1QVBtTU-ymi8bL6X8mVkctYbSbyDgdH8inu48rh_7FFxHE6MKYwqBFAJqlplUxq7hnBK5eqoh3E54jqk=;Domain=.google.com;Path=/,LSID=AaMBTixN1MqutGovVSOejyb8mVkctYbSbyDgdH8inu48rh_7FFxHE6MKYwqBFAJqlhCe_QqxLg00W5OZejb_UeQ=;Domain=www.google.com;Path=/accounts";
Regex re = new Regex("([^;,]+)=([^;,]+);Domain=([^;,]+);Path=([^;,]+)", RegexOptions.IgnoreCase);
foreach (Match m in re.Matches(cookieString))
{
//name, value, path, domain
Cookie c = new Cookie(m.Groups[1].Value, m.Groups[2].Value, m.Groups[3].Value, m.Groups[3].Value);
ccc.Add(c);
}
return ccc;
}
#region 从HTML中获取文本,保留br,p,img
/// <summary>
/// 从HTML中获取文本,保留br,p,img
/// </summary>
/// <param name="HTML"></param>
/// <returns></returns>
public static string GetTextFromHTML(string HTML)
{
Regex regEx = new Regex(@"</?(?!br|/?p|img)[^>]*>", RegexOptions.IgnoreCase);
return regEx.Replace(HTML, "");
}
#endregion
#region 获取HTML页面内制定Key的Value内容
/// <summary>
/// 获取HTML页面内制定Key的Value内容
/// </summary>
/// <param name="html"></param>
/// <param name="key"></param>
/// <returns></returns>
public string GetHiddenKeyValue(string html, string key)
{
string result = "";
string sRegex = string.Format("<input\\s*type=\"hidden\".*?name=\"{0}\".*?\\s*value=[\"|'](?<value>.*?)[\"|'^/]", key);
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match mc = re.Match(html);
if (mc.Success)
{
result = mc.Groups[1].Value;
}
return result;
}
#endregion
/// <summary>
/// 替换回车换行符为html换行符
/// </summary>
public static string StrFormat(string str)
{
string str2;
if (str == null)
{
str2 = "";
}
else
{
str = str.Replace("\r\n", "<br />");
str = str.Replace("\n", "<br />");
str2 = str;
}
return str2;
}
/// <summary>
/// 替换html字符
/// </summary>
public static string EncodeHtml(string strHtml)
{
if (strHtml != "")
{
strHtml = strHtml.Replace(",", "&def");
strHtml = strHtml.Replace("'", "&dot");
strHtml = strHtml.Replace(";", "&dec");
return strHtml;
}
return "";
}
/// <summary>
/// 为脚本替换特殊字符串
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string ReplaceStrToScript(string str)
{
str = str.Replace("\\", "\\\\");
str = str.Replace("'", "\\'");
str = str.Replace("\"", "\\\"");
return str;
}
}
}
5.HtmlPager 类
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
namespace Core.Html
{
/// <summary>
/// HTMl 分页
/// </summary>
public static class HtmlPager
{
/// <summary>
/// 写出分页
/// </summary>
/// <param name="pageCount">页数</param>
/// <param name="currentPage">当前页</param>
public static string GetPager(int pageCount, int currentPage)
{
return GetPager(pageCount, currentPage, new string[] { }, new string[] { });
}
/// <summary>
/// 写出分页
/// </summary>
/// <param name="pageCount">页数</param>
/// <param name="currentPage">当前页</param>
/// <param name="FieldName">地址栏参数</param>
/// <param name="FieldValue">地址栏参数值</param>
/// <returns></returns>
public static string GetPager(int pageCount, int currentPage, string[] FieldName, string[] FieldValue)
{
string pString = "";
for (int i = 0; i < FieldName.Length; i++)
{
pString += "&" + FieldName[i].ToString() + "=" + FieldValue[i].ToString();
}
int stepNum = 4;
int pageRoot = 1;
pageCount = pageCount == 0 ? 1 : pageCount;
currentPage = currentPage == 0 ? 1 : currentPage;
StringBuilder sb = new StringBuilder();
sb.Append("<table cellpadding=0 cellspacing=1 class=\"pager\">\r<tr>\r");
sb.Append("<td class=pagerTitle> 分页 </td>\r");
sb.Append("<td class=pagerTitle> " + currentPage.ToString() + "/" + pageCount.ToString() + " </td>\r");
if (currentPage - stepNum < 2)
pageRoot = 1;
else
pageRoot = currentPage - stepNum;
int pageFoot = pageCount;
if (currentPage + stepNum >= pageCount)
pageFoot = pageCount;
else
pageFoot = currentPage + stepNum;
if (pageRoot == 1)
{
if (currentPage > 1)
{
sb.Append("<td> <a href='?page=1" + pString + "' title='首页'>首页</a> </td>\r");
sb.Append("<td> <a href='?page=" + Convert.ToString(currentPage - 1) + pString + "' title='上页'>上页</a> </td>\r");
}
}
else
{
sb.Append("<td> <a href='?page=1" + pString + "' title='首页'>首页</a> </td>");
sb.Append("<td> <a href='?page=" + Convert.ToString(currentPage - 1) + pString + "' title='上页'>上页</a> </td>\r");
}
for (int i = pageRoot; i <= pageFoot; i++)
{
if (i == currentPage)
{
sb.Append("<td class='current'> " + i.ToString() + " </td>\r");
}
else
{
sb.Append("<td> <a href='?page=" + i.ToString() + pString + "' title='第" + i.ToString() + "页'>" + i.ToString() + "</a> </td>\r");
}
if (i == pageCount)
break;
}
if (pageFoot == pageCount)
{
if (pageCount > currentPage)
{
sb.Append("<td> <a href='?page=" + Convert.ToString(currentPage + 1) + pString + "' title='下页'>下页</a> </td>\r");
sb.Append("<td> <a href='?page=" + pageCount.ToString() + pString + "' title='尾页'>尾页</a> </td>\r");
}
}
else
{
sb.Append("<td> <a href='?page=" + Convert.ToString(currentPage + 1) + pString + "' title='下页'>下页</a> </td>\r");
sb.Append("<td> <a href='?page=" + pageCount.ToString() + pString + "' title='尾页'>尾页</a> </td>\r");
}
sb.Append("</tr>\r</table>");
return sb.ToString();
}
/// <summary>
/// 写出分页
/// </summary>
/// <param name="pageCount">总页数</param>
/// <param name="currentPage">当前页</param>
/// <param name="prefix">上一页</param>
/// <param name="suffix">下一页</param>
/// <returns></returns>
public static string GetHtmlPager(int pageCount, int currentPage, string prefix, string suffix)
{
int stepNum = 4;
int pageRoot = 1;
pageCount = pageCount == 0 ? 1 : pageCount;
currentPage = currentPage == 0 ? 1 : currentPage;
StringBuilder sb = new StringBuilder();
sb.Append("<table cellpadding=0 cellspacing=1 class=\"pager\">\r<tr>\r");
sb.Append("<td class=pagerTitle> 分页 </td>\r");
sb.Append("<td class=pagerTitle> " + currentPage.ToString() + "/" + pageCount.ToString() + " </td>\r");
if (currentPage - stepNum < 2)
pageRoot = 1;
else
pageRoot = currentPage - stepNum;
int pageFoot = pageCount;
if (currentPage + stepNum >= pageCount)
pageFoot = pageCount;
else
pageFoot = currentPage + stepNum;
if (pageRoot == 1)
{
if (currentPage > 1)
{
sb.Append("<td> <a href='" + prefix + "1" + suffix + "' title='首页'>首页</a> </td>\r");
sb.Append("<td> <a href='" + prefix + Convert.ToString(currentPage - 1) + suffix + "' title='上页'>上页</a> </td>\r");
}
}
else
{
sb.Append("<td> <a href='" + prefix + "1" + suffix + "' title='首页'>首页</a> </td>");
sb.Append("<td> <a href='" + prefix + Convert.ToString(currentPage - 1) + suffix + "' title='上页'>上页</a> </td>\r");
}
for (int i = pageRoot; i <= pageFoot; i++)
{
if (i == currentPage)
{
sb.Append("<td class='current'> " + i.ToString() + " </td>\r");
}
else
{
sb.Append("<td> <a href='" + prefix + i.ToString() + suffix + "' title='第" + i.ToString() + "页'>" + i.ToString() + "</a> </td>\r");
}
if (i == pageCount)
break;
}
if (pageFoot == pageCount)
{
if (pageCount > currentPage)
{
sb.Append("<td> <a href='" + prefix + Convert.ToString(currentPage + 1) + suffix + "' title='下页'>下页</a> </td>\r");
sb.Append("<td> <a href='" + prefix + pageCount.ToString() + suffix + "' title='尾页'>尾页</a> </td>\r");
}
}
else
{
sb.Append("<td> <a href='" + prefix + Convert.ToString(currentPage + 1) + suffix + "' title='下页'>下页</a> </td>\r");
sb.Append("<td> <a href='" + prefix + pageCount.ToString() + suffix + "' title='尾页'>尾页</a> </td>\r");
}
sb.Append("</tr>\r</table>");
return sb.ToString();
}
#region 分页
public static string paging(string url, string para, int sumpage, int page)
{
string result = string.Empty;
if (sumpage == 1)
{
return result;
}
if (sumpage > 500)
{
sumpage = 500;
}
if (page > sumpage)
{
page = 1;
}
StringBuilder sb = new StringBuilder();
if (sumpage > 0)
{
switch (page)
{
case 1:
sb.Append(string.Format("<p class=\"next\"><a href=\"{0}?page={1}{2}\">{3}</a> ", new object[] { url, page + 1, para, "下一页" }));
break;
default:
if (sumpage == page)
{
sb.Append(string.Format("<p class=\"next\"><a href=\"{0}?page={1}{2}\">{3}</a> ", new object[] { url, page - 1, para, "上一页" }));
}
else
{
sb.Append(string.Format("<p class=\"next\"><a href=\"{0}?page={1}{2}\">{3}</a> <a href=\"{4}?page={5}{6}\">{7}</a> ",
new object[] { url, page + 1, para, "下一页", url, page - 1, para, "上一页" }));
}
break;
}
sb.Append(string.Format("第{0}/{1}页</p>", new object[] { page, sumpage }));
}
return sb.ToString();
}
public static string paging(string url, string para, int sumpage, int page, System.Web.UI.UserControl myPaging)
{
myPaging.Visible = false;
string result = string.Empty;
if (sumpage == 1)
{
return result;
}
if (sumpage > 500)
{
sumpage = 500;
}
if (page > sumpage)
{
page = 1;
}
StringBuilder sb = new StringBuilder();
if (sumpage > 0)
{
myPaging.Visible = true;
switch (page)
{
case 1:
sb.Append(string.Format("<a href=\"{0}?page={1}{2}\">{3}</a> ", new object[] { url, page + 1, para, "下一页" }));
break;
default:
if (sumpage == page)
{
sb.Append(string.Format("<a href=\"{0}?page={1}{2}\">{3}</a> ", new object[] { url, page - 1, para, "上一页" }));
}
else
{
sb.Append(string.Format("<a href=\"{0}?page={1}{2}\">{3}</a> <a href=\"{4}?page={5}{6}\">{7}</a> ",
new object[] { url, page + 1, para, "下一页", url, page - 1, para, "上一页" }));
}
break;
}
sb.Append(string.Format("第{0}/{1}页", new object[] { page, sumpage }));
}
return sb.ToString();
}
public static string paging(string para, int sumpage, int page, int count)
{
string result = string.Empty;
if (page > sumpage)
{
page = 1;
}
StringBuilder sb = new StringBuilder();
if (sumpage > 0)
{
if (sumpage != 1)
{
switch (page)
{
case 1:
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a> ", new object[] { page + 1, para, "下一页" }));
break;
default:
if (sumpage == page)
{
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a> ", new object[] { page - 1, para, "上一页" }));
}
else
{
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a> <a href=\"?page={3}{4}\">{5}</a> ",
new object[] { page - 1, para, "上一页", page + 1, para, "下一页" }));
}
break;
}
}
sb.Append(string.Format("第{0}/{1}页 共{2}条", new object[] { page, sumpage, count }));
}
return sb.ToString();
}
public static void paging(string clinktail, int sumpage, int page, System.Web.UI.WebControls.Label page_view)
{
if (sumpage > 0)
{
int n = sumpage; //总页数
int x = page; //得到当前页
int i;
int endpage;
string pageview = "", pageviewtop = "";
if (x > 1)
{
pageview += " <a class='pl' href='?page=1" + clinktail + "'>第1页</a> | ";
pageviewtop += " <a class='pl' href='?page=1" + clinktail + "'>第1页</a> | ";
}
else
{
pageview += " <font color='#666666'> 第1页 </font> | ";
pageviewtop += " <font color='#666666'> 第1页 </font> | ";
}
if (x > 1)
{
pageviewtop += " <a class='pl' href='?page=" + (x - 1) + "" + clinktail + "'>上1页</a> ";
}
else
{
pageviewtop += " <font color='#666666'>上1页</font> ";
}
if (x > ((x - 1) / 10) * 10 && x > 10)
{
pageview += "<a class='pl' href='?page=" + ((x - 1) / 10) * 10 + "" + clinktail + "' onclink='return false;'>上10页</a>";
}
//if (((x-1) / 10) * 10 + 10) >= n )
if (((x - 1) / 10) * 10 + 10 >= n)
{
endpage = n;
}
else
{
endpage = ((x - 1) / 10) * 10 + 10;
}
for (i = ((x - 1) / 10) * 10 + 1; i <= endpage; ++i)
{
if (i == x)
{
pageview += " <font color='#FF0000'><b>" + i + "</b></font>";
}
else
{
pageview += " <a class='pl' href='?page=" + i + "" + clinktail + "'>" + i + "</a>";
}
}
if (x < n)
{
pageviewtop += " <a class='pl' href='?page=" + (x + 1) + "" + clinktail + "'>下1页</a> ";
}
else
{
pageviewtop += " <font color='#666666'>下1页</font> ";
}
if (endpage != n)
{
pageview += " <a class='pl' href='?page=" + (endpage + 1) + "" + clinktail + "' class='pl' onclink='return false;'>下10页</a> | ";
}
else
{
pageview += " | ";
}
if (x < n)
{
pageview += " <a class='pl' href='?page=" + n + "" + clinktail + "' class='pl'>第" + n + "页</a> ";
pageviewtop += " | <a class='pl' href='?page=" + n + "" + clinktail + "' class='pl'>第" + n + "页</a> ";
}
else
{
pageview += "<font color='#666666'> 第" + n + "页 </font>";
pageviewtop += " | <font color='#666666'> 第" + n + "页 </font>";
}
page_view.Text = pageview.ToString();
}
else
{
page_view.Text = "";
}
}
//带第一页和最后一页
public static string paging2(string para, int sumpage, int page, int count)
{
string result = string.Empty;
if (page > sumpage)
{
page = 1;
}
StringBuilder sb = new StringBuilder();
if (sumpage > 0)
{
if (sumpage != 1)
{
//第一页
sb.Append(string.Format("<a href=\"?page={0}{1}\"><img src=\"images/first-icon.gif\" border=\"0\"/></a> ", new object[] { 1, para }));
switch (page)
{
case 1:
//前一页图片
sb.Append(string.Format("<a>{0}</a>", new object[] { "<img src=\"images/left-icon.gif\" border=\"0\"/>" }));
sb.Append(string.Format("<a>上一页</a><a href=\"?page={0}{1}\">{2}</a> ", new object[] { page + 1, para, "下一页" }));
//后一页图片
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page + 1, para, "<img src=\"images/right-icon.gif\" border=\"0\"/>" }));
break;
default:
if (sumpage == page)
{
//前一页图片
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page - 1, para, "<img src=\"images/left-icon.gif\" border=\"0\"/>" }));
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a><a>下一页</a> ", new object[] { page - 1, para, "上一页" }));
//后一页图片
sb.Append(string.Format("<a>{0}</a>", new object[] { "<img src=\"images/right-icon.gif\" />" }));
}
else
{
//前一页图片
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page - 1, para, "<img src=\"images/left-icon.gif\" border=\"0\"/>" }));
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a> <a href=\"?page={3}{4}\">{5}</a> ",
new object[] { page - 1, para, "上一页", page + 1, para, "下一页" }));
//后一页图片
sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page + 1, para, "<img src=\"images/right-icon.gif\" border=\"0\"/>" }));
}
break;
}
//最后一页图片
sb.Append(string.Format(" <a href=\"?page={0}{1}\"><img src=\"images/last-icon.gif\" border=\"0\"/></a> ", new object[] { sumpage, para }));
}
sb.Append(string.Format("第{0}页/共{1}页 共{2}条", new object[] { page, sumpage, count }));
}
return sb.ToString();
}
public static string paging3(string url, string para, int sumpage, int page, int count)
{
string result = string.Empty;
if (page > sumpage)
{
page = 1;
}
StringBuilder sb = new StringBuilder();
if (sumpage > 0)
{
if (sumpage != 1)
{
//第一页
sb.Append(string.Format("<a href=\"{2}?page={0}{1}\">首页</a>", new object[] { 1, para, url }));
switch (page)
{
case 1:
//前一页图片
// sb.Append(string.Format("<a>{0}</a>", new object[] { "<img src=\"images/left-icon.gif\" border=\"0\"/>" }));
sb.Append(string.Format("<a>上一页</a><a href=\"{3}?page={0}{1}\">{2}</a> ", new object[] { page + 1, para, "下一页", url }));
//后一页图片
// sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page + 1, para, "<img src=\"images/right-icon.gif\" border=\"0\"/>" }));
break;
default:
if (sumpage == page)
{
//前一页图片
//sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page - 1, para, "<img src=\"images/left-icon.gif\" border=\"0\"/>" }));
sb.Append(string.Format("<a href=\"{3}?page={0}{1}\">{2}</a><a>下一页</a> ", new object[] { page - 1, para, "上一页", url }));
//后一页图片
//sb.Append(string.Format("<a>{0}</a>", new object[] { "<img src=\"images/right-icon.gif\" />" }));
}
else
{
//前一页图片
//sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page - 1, para, "<img src=\"images/left-icon.gif\" border=\"0\"/>" }));
sb.Append(string.Format("<a href=\"{6}?page={0}{1}\">{2}</a> <a href=\"{6}?page={3}{4}\">{5}</a> ",
new object[] { page - 1, para, "上一页", page + 1, para, "下一页", url }));
//后一页图片
//sb.Append(string.Format("<a href=\"?page={0}{1}\">{2}</a>", new object[] { page + 1, para, "<img src=\"images/right-icon.gif\" border=\"0\"/>" }));
}
break;
}
//最后一页图片
sb.Append(string.Format("<a href=\"{2}?page={0}{1}\">末页</a> ", new object[] { sumpage, para, url }));
}
sb.Append(string.Format("第{0}页/共{1}页 共{2}条", new object[] { page, sumpage, count }));
}
return sb.ToString();
}
#endregion
}
}
6.HtmlUtils类
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Sockets;
using System.Text;
using System.Text.RegularExpressions;
using System.Data;
using System.Web;
using System.Xml;
namespace Common
{
public class HtmlUtils
{
#region BaseMethod
/// <summary>
/// 多个匹配内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="iGroupIndex">第几个分组, 从1开始, 0代表不分组</param>
public static List<string> GetList(string sInput, string sRegex, int iGroupIndex)
{
List<string> list = new List<string>();
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sInput);
foreach (Match mc in mcs)
{
if (iGroupIndex > 0)
{
list.Add(mc.Groups[iGroupIndex].Value);
}
else
{
list.Add(mc.Value);
}
}
return list;
}
/// <summary>
/// 多个匹配内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="sGroupName">分组名, ""代表不分组</param>
public static List<string> GetList(string sInput, string sRegex, string sGroupName)
{
List<string> list = new List<string>();
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sInput);
foreach (Match mc in mcs)
{
if (sGroupName != "")
{
list.Add(mc.Groups[sGroupName].Value);
}
else
{
list.Add(mc.Value);
}
}
return list;
}
/// <summary>
/// 单个匹配内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="iGroupIndex">分组序号, 从1开始, 0不分组</param>
public static string GetText(string sInput, string sRegex, int iGroupIndex)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match mc = re.Match(sInput);
string result = "";
if (mc.Success)
{
if (iGroupIndex > 0)
{
result = mc.Groups[iGroupIndex].Value;
}
else
{
result = mc.Value;
}
}
return result;
}
/// <summary>
/// 单个匹配内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="sGroupName">分组名, ""代表不分组</param>
public static string GetText(string sInput, string sRegex, string sGroupName)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match mc = re.Match(sInput);
string result = "";
if (mc.Success)
{
if (sGroupName != "")
{
result = mc.Groups[sGroupName].Value;
}
else
{
result = mc.Value;
}
}
return result;
}
/// <summary>
/// 替换指定内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="sReplace">替换值</param>
/// <param name="iGroupIndex">分组序号, 0代表不分组</param>
public static string Replace(string sInput, string sRegex, string sReplace, int iGroupIndex)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sInput);
foreach (Match mc in mcs)
{
if (iGroupIndex > 0)
{
sInput = sInput.Replace(mc.Groups[iGroupIndex].Value, sReplace);
}
else
{
sInput = sInput.Replace(mc.Value, sReplace);
}
}
return sInput;
}
/// <summary>
/// 替换指定内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="sReplace">替换值</param>
/// <param name="sGroupName">分组名, "" 代表不分组</param>
public static string Replace(string sInput, string sRegex, string sReplace, string sGroupName)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sInput);
foreach (Match mc in mcs)
{
if (sGroupName != "")
{
sInput = sInput.Replace(mc.Groups[sGroupName].Value, sReplace);
}
else
{
sInput = sInput.Replace(mc.Value, sReplace);
}
}
return sInput;
}
/// <summary>
/// 分割指定内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
/// <param name="iStrLen">最小保留字符串长度</param>
public static List<string> Split(string sInput, string sRegex, int iStrLen)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
string[] sArray = re.Split(sInput);
List<string> list = new List<string>();
list.Clear();
foreach (string s in sArray)
{
if (s.Trim().Length < iStrLen)
continue;
list.Add(s.Trim());
}
return list;
}
#endregion BaseMethod
#region 获得特定内容
/// <summary>
/// 多个链接
/// </summary>
/// <param name="sInput">输入内容</param>
public static List<string> GetLinks(string sInput)
{
return GetList(sInput, @"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", "href");
}
/// <summary>
/// 单个链接
/// </summary>
/// <param name="sInput">输入内容</param>
public static string GetLinkHelp(string sInput)
{
return GetText(sInput, @"<a[^>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", "href");
}
/// <summary>
/// 图片标签
/// </summary>
/// <param name="sInput">输入内容</param>
public static List<string> GetImgTag(string sInput)
{
return GetList(sInput, "<img[^>]+src=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", "");
}
/// <summary>
/// 图片地址
/// </summary>
/// <param name="sInput">输入内容</param>
public static string GetImgSrc(string sInput)
{
return GetText(sInput, "<img[^>]+src=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", "src");
}
/// <summary>
/// 根据URL获得域名
/// </summary>
/// <param name="sUrl">输入内容</param>
public static string GetDomain(string sInput)
{
return GetText(sInput, @"http(s)?://([\w-]+\.)+(\w){2,}", 0);
}
#endregion 获得特定内容
#region 根据表达式,获得文章内容
/// <summary>
/// 文章标题
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
public static string GetTitle(string sInput, string sRegex)
{
string sTitle = GetText(sInput, sRegex, "Title");
sTitle = ClearTag(sTitle);
if (sTitle.Length > 99)
{
sTitle = sTitle.Substring(0, 99);
}
return sTitle;
}
/// <summary>
/// 网页标题
/// </summary>
public static string GetTitle(string sInput)
{
return GetText(sInput, @"<Title[^>]*>(?<Title>[\s\S]{10,})</Title>", "Title");
}
/// <summary>
/// 网页内容
/// </summary>
/// <param name="sInput">输入内容</param>
public static string GetHtml(string sInput)
{
return Replace(sInput, @"(?<Head>[^<]+)<", "", "Head");
}
/// <summary>
/// 网页Body内容
/// </summary>
public static string GetBodyHelp(string sInput)
{
return GetText(sInput, @"<Body[^>]*>(?<Body>[\s\S]{10,})</body>", "Body");
}
/// <summary>
/// 网页Body内容
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
public static string GetBody(string sInput, string sRegex)
{
return GetText(sInput, sRegex, "Body");
}
/// <summary>
/// 文章来源
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
public static string GetSource(string sInput, string sRegex)
{
string sSource = GetText(sInput, sRegex, "Source");
sSource = ClearTag(sSource);
if (sSource.Length > 99)
sSource = sSource.Substring(0, 99);
return sSource;
}
/// <summary>
/// 作者名
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
public static string GetAuthor(string sInput, string sRegex)
{
string sAuthor = GetText(sInput, sRegex, "Author");
sAuthor = ClearTag(sAuthor);
if (sAuthor.Length > 99)
sAuthor = sAuthor.Substring(0, 99);
return sAuthor;
}
/// <summary>
/// 分页链接地址
/// </summary>
/// <param name="sInput">输入内容</param>
/// <param name="sRegex">表达式字符串</param>
public static List<string> GetPageLinks(string sInput, string sRegex)
{
return GetList(sInput, sRegex, "href");
}
/// <summary>
/// 根据相对路径得到绝对路径
/// </summary>
/// <param name="sUrl">输入内容</param>
/// <param name="sInput">原始网站地址</param>
/// <param name="sRelativeUrl">相对链接地址</param>
public static string GetUrl(string sInput, string sRelativeUrl)
{
string sReturnUrl = "";
string sUrl = _GetStandardUrlDepth(sInput);//返回了http://www.163.com/news/这种形式
if (sRelativeUrl.ToLower().StartsWith("http") || sRelativeUrl.ToLower().StartsWith("https"))
{
sReturnUrl = sRelativeUrl.Trim();
}
else if (sRelativeUrl.StartsWith("/"))
{
sReturnUrl = GetDomain(sInput) + sRelativeUrl;
}
else if (sRelativeUrl.StartsWith("../"))
{
sUrl = sUrl.Substring(0, sUrl.Length - 1);
while (sRelativeUrl.IndexOf("../") >= 0)
{
string temp = sUrl.Substring(0, sUrl.LastIndexOf("/")); ;// CString.GetPreStrByLast(sUrl, "/");
if (temp.Length > 6)
{//temp != "http:/",否则的话,说明已经回溯到尽头了,"../"与网址的层次对应不上。存在这种情况,网页上面的链接是错误的,但浏览器还能正常显示
sUrl = temp;
}
sRelativeUrl = sRelativeUrl.Substring(3);
}
sReturnUrl = sUrl + "/" + sRelativeUrl.Trim();
}
else if (sRelativeUrl.StartsWith("./"))
{
sReturnUrl = sUrl + sRelativeUrl.Trim().Substring(2);
}
else if (sRelativeUrl.Trim() != "")
{//2007images/modecss.css
sReturnUrl = sUrl + sRelativeUrl.Trim();
}
else
{
sRelativeUrl = sUrl;
}
return sReturnUrl;
}
/// <summary>
/// 获得标准的URL路径深度
/// </summary>
/// <param name="url"></param>
/// <returns>返回标准的形式:http://www.163.com/或http://www.163.com/news/。</returns>
private static string _GetStandardUrlDepth(string url)
{
string sheep = url.Trim().ToLower();
string header = "http://";
if (sheep.IndexOf("https://") != -1)
{
header = "https://";
sheep = sheep.Replace("https://", "");
}
else
{
sheep = sheep.Replace("http://", "");
}
int p = sheep.LastIndexOf("/");
if (p == -1)
{//www.163.com
sheep += "/";
}
else if (p == sheep.Length - 1)
{//传来的是:http://www.163.com/news/
}
else if (sheep.Substring(p).IndexOf(".") != -1)
{//传来的是:http://www.163.com/news/hello.htm 这种形式
sheep = sheep.Substring(0, p + 1);
}
else
{
sheep += "/";
}
return header + sheep;
}
/// <summary>
/// 关键字
/// </summary>
/// <param name="sInput">输入内容</param>
public static string GetKeyWord(string sInput)
{
List<string> list = Split(sInput, "(,|,|\\+|+|。|;|;|:|:|“)|”|、|_|\\(|(|\\)|)", 2);
List<string> listReturn = new List<string>();
Regex re;
foreach (string str in list)
{
re = new Regex(@"[a-zA-z]+", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
MatchCollection mcs = re.Matches(str);
string sTemp = str;
foreach (Match mc in mcs)
{
if (mc.Value.ToString().Length > 2)
listReturn.Add(mc.Value.ToString());
sTemp = sTemp.Replace(mc.Value.ToString(), ",");
}
re = new Regex(@",{1}", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
mcs = re.Matches(sTemp);
foreach (string s in re.Split(sTemp))
{
if (s.Trim().Length <= 2)
continue;
listReturn.Add(s);
}
}
string sReturn = "";
for (int i = 0; i < listReturn.Count - 1; i++)
{
for (int j = i + 1; j < listReturn.Count; j++)
{
if (listReturn[i] == listReturn[j])
{
listReturn[j] = "";
}
}
}
foreach (string str in listReturn)
{
if (str.Length > 2)
sReturn += str + ",";
}
if (sReturn.Length > 0)
sReturn = sReturn.Substring(0, sReturn.Length - 1);
else
sReturn = sInput;
if (sReturn.Length > 99)
sReturn = sReturn.Substring(0, 99);
return sReturn;
}
public static string GetContent(string sOriContent, string sOtherRemoveReg, string sPageUrl, DataTable dtAntiLink)
{
string sFormartted = sOriContent;
//去掉有危险的标记
sFormartted = Regex.Replace(sFormartted, @"<script[\s\S]*?</script>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
sFormartted = Regex.Replace(sFormartted, @"<iframe[^>]*>[\s\S]*?</iframe>", "", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
Regex r = new Regex(@"<input[\s\S]+?>|<form[\s\S]+?>|</form[\s\S]*?>|<select[\s\S]+?>?</select>|<textarea[\s\S]*?>?</textarea>|<file[\s\S]*?>|<noscript>|</noscript>", RegexOptions.IgnoreCase);
sFormartted = r.Replace(sFormartted, "");
string[] sOtherReg = sOtherRemoveReg.Split(new string[] { "\r\n" }, StringSplitOptions.RemoveEmptyEntries);
foreach (string sRemoveReg in sOtherReg)
{
sFormartted = Replace(sFormartted, sRemoveReg, "", 0);
}
//图片路径
//sFormartted = _ReplaceUrl("<img[^>]+src\\s*=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", "src", sFormartted,sPageUrl);
sFormartted = _ReplaceUrl("<img[\\s\\S]+?src\\s*=\\s*(?:'(?<src>[^']+)'|\"(?<src>[^\"]+)\"|(?<src>[^>\\s]+))\\s*[^>]*>", "src", sFormartted, sPageUrl);
//反防盗链
string domain = GetDomain(sPageUrl);
DataRow[] drs = dtAntiLink.Select("Domain='" + domain + "'");
if (drs.Length > 0)
{
foreach (DataRow dr in drs)
{
switch (Convert.ToInt32(dr["Type"]))
{
case 1://置换
sFormartted = sFormartted.Replace(dr["imgUrl"].ToString(), "http://stat.580k.com/t.asp?url=");
break;
default://附加
sFormartted = sFormartted.Replace(dr["imgUrl"].ToString(), "http://stat.580k.com/t.asp?url=" + dr["imgUrl"].ToString());
break;
}
}
}
//A链接
sFormartted = _ReplaceUrl(@"<a[^>]+href\s*=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", "href", sFormartted, sPageUrl);
//CSS
sFormartted = _ReplaceUrl(@"<link[^>]+href\s*=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", "href", sFormartted, sPageUrl);
//BACKGROUND
sFormartted = _ReplaceUrl(@"background\s*=\s*(?:'(?<img>[^']+)'|""(?<img>[^""]+)""|(?<img>[^>\s]+))", "img", sFormartted, sPageUrl);
//style方式的背景:background-image:url(...)
sFormartted = _ReplaceUrl(@"background-image\s*:\s*url\s*\x28(?<img>[^\x29]+)\x29", "img", sFormartted, sPageUrl);
//FLASH
sFormartted = _ReplaceUrl(@"<param\s[^>]+""movie""[^>]+value\s*=\s*""(?<flash>[^"">]+\x2eswf)""[^>]*>", "flash", sFormartted, sPageUrl);
//XSL
if (IsXml(sFormartted))
{
sFormartted = _ReplaceUrl(@"<\x3fxml-stylesheet\s+[^\x3f>]+href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)"")\s*[^\x3f>]*\x3f>", "href", sFormartted, sPageUrl);
}
//script
//sFormartted = _ReplaceUrl(@"<script[^>]+src\s*=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", "src", sFormartted,sPageUrl);
return sFormartted;
}
/// <summary>
/// 置换连接
/// </summary>
private static string _ReplaceUrl(string strRe, string subMatch, string sFormartted, string sPageUrl)
{
Regex re = new Regex(strRe, RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
MatchCollection mcs = re.Matches(sFormartted);
string sOriStr = "";
string sSubMatch = "";
string sReplaceStr = "";
foreach (Match mc in mcs)
{
sOriStr = mc.Value;
sSubMatch = mc.Groups[subMatch].Value;
sReplaceStr = sOriStr.Replace(sSubMatch, GetUrl(sPageUrl, sSubMatch));
sFormartted = sFormartted.Replace(sOriStr, sReplaceStr);
}
return sFormartted;
}
public static bool IsXml(string sFormartted)
{
Regex re = new Regex(@"<\x3fxml\s+", RegexOptions.IgnoreCase);
MatchCollection mcs = re.Matches(sFormartted);
return (mcs.Count > 0);
}
#endregion 根据表达式,获得文章内容
#region HTML相关操作
public static string ClearTag(string sHtml)
{
if (sHtml == "")
return "";
string sTemp = sHtml;
Regex re = new Regex(@"(<[^>\s]*\b(\w)+\b[^>]*>)|(<>)|( )|(>)|(<)|(&)|\r|\n|\t", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
return re.Replace(sHtml, "");
}
public static string ClearTag(string sHtml, string sRegex)
{
string sTemp = sHtml;
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
return re.Replace(sHtml, "");
}
public static string ConvertToJS(string sHtml)
{
StringBuilder sText = new StringBuilder();
Regex re;
re = new Regex(@"\r\n", RegexOptions.IgnoreCase);
string[] strArray = re.Split(sHtml);
foreach (string strLine in strArray)
{
sText.Append("document.writeln(\"" + strLine.Replace("\"", "\\\"") + "\");\r\n");
}
return sText.ToString();
}
/// <summary>
/// 删除字符串中的特定标记
/// </summary>
/// <param name="str"></param>
/// <param name="tag"></param>
/// <param name="isContent">是否清除内容 </param>
/// <returns></returns>
public static string DelTag(string str, string tag, bool isContent)
{
if (tag == null || tag == " ")
{
return str;
}
if (isContent) //要求清除内容
{
return Regex.Replace(str, string.Format("<({0})[^>]*>([\\s\\S]*?)<\\/\\1>", tag), "", RegexOptions.IgnoreCase);
}
return Regex.Replace(str, string.Format(@"(<{0}[^>]*(>)?)|(</{0}[^>] *>)|", tag), "", RegexOptions.IgnoreCase);
}
/// <summary>
/// 删除字符串中的一组标记
/// </summary>
/// <param name="str"></param>
/// <param name="tagA"></param>
/// <param name="isContent">是否清除内容 </param>
/// <returns></returns>
public static string DelTagArray(string str, string tagA, bool isContent)
{
string[] tagAa = tagA.Split(',');
foreach (string sr1 in tagAa) //遍历所有标记,删除
{
str = DelTag(str, sr1, isContent);
}
return str;
}
#endregion HTML相关操作
#region 根据内容获得链接
public static string GetLink(string sContent)
{
string strReturn = "";
Regex re = new Regex(@"<a\s+[^>]*href\s*=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))\s*[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
Regex js = new Regex(@"(href|onclick)=[^>]+javascript[^>]+(('(?<href>[\w\d/-]+\.[^']*)')|("(?<href>[\w\d/-]+\.[^;]*)"))[^>]*>", RegexOptions.IgnoreCase | RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace);
Match mc = js.Match(sContent);//获取javascript中的链接,有待改进
if (mc.Success)
{
strReturn = mc.Groups["href"].Value;
}
else
{
Match me = re.Match(sContent);
if (me.Success)
{
strReturn = System.Web.HttpUtility.HtmlDecode(me.Groups["href"].Value);
//strReturn = RemoveByReg(strReturn, @";.*|javascript:.*");
strReturn = RemoveByReg(strReturn, @";[^?&]*|javascript:.*");
}
}
return strReturn;
}
public static string GetTextByLink(string sContent)
{
Regex re = new Regex(@"<a(?:\s+[^>]*)?>([\s\S]*)?</a>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Regex email = new Regex(@"(href|onclick)=[^>]+mailto[^>]+@[^>]+>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match me = email.Match(sContent);
if (me.Success)
return "";
Match mc = re.Match(sContent);
if (mc.Success)
return mc.Groups[1].Value;
else
return "";
}
/// <summary>
/// 获取所有有效链接,过滤广告
/// </summary>
/// <param name="sContent"></param>
/// <returns></returns>
public static Dictionary<string, string> GetLinks(string sContent, string sUrl)
{
Dictionary<string, string> lisDes = new Dictionary<string, string>();
return GetLinks(sContent, sUrl, ref lisDes);
}
public static Dictionary<string, string> GetLinks(string sContent, string sUrl, ref Dictionary<string, string> lisDes)
{
Dictionary<string, string> lisA = new Dictionary<string, string>();
_GetLinks(sContent, sUrl, ref lisA);
string domain = GetDomain(sUrl).ToLower();
//抓取脚本输出的链接
Regex re = new Regex(@"<script[^>]+src\s*=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
MatchCollection mcs = re.Matches(sContent);
//foreach (Match mc in mcs)
for (int i = mcs.Count - 1; i >= 0; i--)
{
Match mc = mcs[i];
string subUrl = GetUrl(sUrl, mc.Groups["src"].Value);
if (domain.CompareTo(GetDomain(subUrl).ToLower()) != 0)
{
//同一域的才提炼
continue;
}
string subContent = GetHtmlByUrl(subUrl);
if (subContent.Length == 0)
{
continue;
}
_GetLinks(subContent, subUrl, ref lisA);
}
if (lisA.Count == 0)
{
return GetLinksFromRss(sContent, sUrl, ref lisDes);
}
return lisA;
}
private static void _GetLinks(string sContent, string sUrl, ref Dictionary<string, string> lisA)
{
const string sFilter =
@"首页|下载|中文|English|反馈|讨论区|投诉|建议|联系|关于|about|诚邀|工作|简介|新闻|掠影|风采
|登录|注销|注册|使用|体验|立即|收藏夹|收藏|添加|加入
|更多|more|专题|精选|热卖|热销|推荐|精彩
|加盟|联盟|友情|链接|相关
|订阅|阅读器|RSS
|免责|条款|声明|我的|我们|组织|概况|有限|免费|公司|法律|导航|广告|地图|隐私
|〖|〗|【|】|(|)|[|]|『|』|\.";
Regex re = new Regex(@"<a\s+[^>]*href\s*=\s*[^>]+>[\s\S]*?</a>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Regex re2 = new Regex(@"""|'", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sContent);
//foreach (Match mc in mcs)
for (int i = mcs.Count - 1; i >= 0; i--)
{
Match mc = mcs[i];
string strHref = GetLink(mc.Value).Trim();
strHref = strHref.Replace("\\\"", "");//针对JS输出链接
strHref = strHref.Replace("\\\'", "");
string strTemp = RemoveByReg(strHref, @"^http.*/$");//屏蔽以“http”开头“/”结尾的链接地址
if (strTemp.Length < 2)
{
continue;
}
//过滤广告或无意义的链接
string strText = ClearTag(GetTextByLink(mc.Value)).Trim();
strTemp = RemoveByReg(strText, sFilter);
if (Encoding.Default.GetBytes(strTemp).Length < 9)
{
continue;
}
if (re2.IsMatch(strText))
{
continue;
}
//换上绝对地址
strHref = GetUrlByRelative(sUrl, strHref);
if (strHref.Length <= 18)//例如,http://www.163.com = 18
{
continue;
}
//计算#字符出现的位置,移除它后面的内容
//如果是域名地址,就跳过
int charIndex = strHref.IndexOf('#');
if (charIndex > -1)
{
strHref = strHref.Substring(0, charIndex);
}
strHref = strHref.Trim(new char[] { '/', '\\' });
string tmpDomainURL = GetDomain(strHref);
if (strHref.Equals(tmpDomainURL, StringComparison.OrdinalIgnoreCase))
{
continue;
}
if (!lisA.ContainsKey(strHref) && !lisA.ContainsValue(strText))
{
lisA.Add(strHref, strText);
}
}
}
public static bool IsExistsScriptLink(string sHtml)
{
Regex re = new Regex(@"<script[^>]+src\s*=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.Multiline | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
return re.IsMatch(sHtml);
}
/// <summary>
/// 在现有链接中用关键字过滤
/// </summary>
/// <param name="listA"></param>
/// <param name="listKey"></param>
/// <returns></returns>
public static Dictionary<string, string> GetLinksByKey(Dictionary<string, string> listA, List<string> listKey)
{
if (listKey == null)
{
return listA;
}
Dictionary<string, string> listNeed = new Dictionary<string, string>();
//准备好关键字正则表达式
string sKey = "";
foreach (string s in listKey)
{
sKey += "([\\s\\S]*" + _ForReguSpeciChar(s) + "[\\s\\S]*)|";
}
sKey = (sKey != "") ? sKey.Substring(0, sKey.Length - 1) : "[\\s\\S]+";
Regex reKey = new Regex(sKey, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
foreach (KeyValuePair<string, string> kvp in listA)
{
if (reKey.Match(kvp.Value).Success)
{
if (!listNeed.ContainsKey(kvp.Key))
{
listNeed.Add(kvp.Key, kvp.Value);
}
}
}
return listNeed;
}
private static string _ForReguSpeciChar(string txtRegular)
{
string[] arSpecial = new string[] { ".", "$", "^", "{", "[", "(", "|", ")", "*", "+", "?", "#" };
string txtTranRegular = txtRegular;
foreach (string s in arSpecial)
{
txtTranRegular = txtTranRegular.Replace(s, "\\" + s);
}
return txtTranRegular;
}
/// <summary>
/// 从RSS FEED中读取
/// </summary>
/// <param name="sContent"></param>
/// <param name="listKey"></param>
/// <returns></returns>
public static Dictionary<string, string> GetLinksFromRss(string sContent, string sUrl)
{
Dictionary<string, string> lisDes = new Dictionary<string, string>();
return GetLinksFromRss(sContent, sUrl, ref lisDes);
}
public static Dictionary<string, string> GetLinksFromRss(string sContent, string sUrl, ref Dictionary<string, string> lisDes)
{
Dictionary<string, string> listResult = new Dictionary<string, string>();
XmlDocument xml = new XmlDocument();
//RSS2.0
try
{
xml.LoadXml(sContent.Trim());
XmlNodeList nodes = xml.SelectNodes("/rss/channel/item");
if (nodes.Count > 0)
{
//for (int i = 0; i < nodes.Count; i++)
for (int i = nodes.Count - 1; i >= 0; i--)
{
try
{
string sLink = GetUrlByRelative(sUrl, nodes[i].SelectSingleNode("link").InnerText);
listResult.Add(sLink, nodes[i].SelectSingleNode("title").InnerText);
lisDes.Add(sLink, nodes[i].SelectSingleNode("description").InnerText);
}
catch { }
}
return listResult;
}
}
catch { }
//RSS1.0(RDF)
try
{
XmlNamespaceManager nsMgr = new XmlNamespaceManager(xml.NameTable);
nsMgr.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
nsMgr.AddNamespace("rss", "http://purl.org/rss/1.0/");
XmlNodeList nodes = xml.SelectNodes("/rdf:RDF//rss:item", nsMgr);
if (nodes.Count > 0)
{
for (int i = nodes.Count - 1; i >= 0; i--)
{
try
{
string sLink = GetUrlByRelative(sUrl, nodes[i].SelectSingleNode("rss:link", nsMgr).InnerText);
listResult.Add(sLink, nodes[i].SelectSingleNode("rss:title", nsMgr).InnerText);
lisDes.Add(sLink, nodes[i].SelectSingleNode("rss:description", nsMgr).InnerText);
}
catch { }
//listResult.Add("<a href=\"" + nodes[i].SelectSingleNode("rss:link",nsMgr).InnerText + "\">" + nodes[i].SelectSingleNode("rss:title",nsMgr).InnerText + "</a>");
}
return listResult;
}
}
catch { }
//RSS ATOM
try
{
XmlNamespaceManager nsMgr = new XmlNamespaceManager(xml.NameTable);
nsMgr.AddNamespace("atom", "http://purl.org/atom/ns#");
XmlNodeList nodes = xml.SelectNodes("/atom:feed/atom:entry", nsMgr);
if (nodes.Count > 0)
{
for (int i = nodes.Count - 1; i >= 0; i--)
{
try
{
string sLink = GetUrlByRelative(sUrl, nodes[i].SelectSingleNode("atom:link", nsMgr).Attributes["href"].InnerText);
listResult.Add(sLink, nodes[i].SelectSingleNode("atom:title", nsMgr).InnerText);
lisDes.Add(sLink, nodes[i].SelectSingleNode("atom:content", nsMgr).InnerText);
}
catch { }
//listResult.Add("<a href=\"" + nodes[i].SelectSingleNode("atom:link",nsMgr).Attributes["href"].InnerText + "\">" + nodes[i].SelectSingleNode("atom:title",nsMgr).InnerText + "</a>");
}
return listResult;
}
}
catch { }
return listResult;
}
public static string GetTitleFromRss(string sContent)
{
string title = "";
XmlDocument xml = new XmlDocument();
//RSS2.0
try
{
xml.LoadXml(sContent.Trim());
title = xml.SelectSingleNode("/rss/channel/title").InnerText;
}
catch { }
return title;
}
#region 已过时的方法
[Obsolete("已过时的方法。")]
public static List<string> GetLinksByKey(string sContent, /*string sUrl,*/ List<string> listKey)
{
List<string> listResult = new List<string>();
List<string> list = new List<string>();
string sKey = "";
string strKey;
//提取链接
Regex re = new Regex(@"<a\s+[^>]*href\s*=\s*[^>]+>[\s\S]*?</a>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sContent);
foreach (Match mc in mcs)
{
strKey = RemoveByReg(GetLink(mc.Value), @"^http.*/$");//屏蔽以“http”开头“/”结尾的链接地址
if (strKey.Length > 0)
{
list.Add(mc.Value);
}
}
//准备好关键字
foreach (string s in listKey)
{
sKey += "([\\s\\S]*" + s + "[\\s\\S]*)|";
}
if (sKey != "")
sKey = sKey.Substring(0, sKey.Length - 1);
if (sKey == "")
sKey = "[\\s\\S]+";
Regex reKey = new Regex(sKey, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match tmpmc;
//链接的文字一定要5个字以上才算有效?
re = new Regex(@"<a\s+[^>]+>([\s\S]{5,})?</a>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
foreach (string s in list)
{
tmpmc = re.Match(s);
if (tmpmc.Success)
{
strKey = ClearTag(tmpmc.Groups[1].Value.Trim());
strKey = RemoveByReg(strKey, @"更多|登录|添加|推荐|收藏夹|加盟|关于|订阅|阅读器|我的|有限|免费|公司|more|RSS|about|\.");
if (Encoding.Default.GetBytes(strKey).Length > 8)//最起码是5个是为了屏蔽垃圾信息。
{
if (reKey.Match(strKey).Success)
{
listResult.Add(s);
}
}
}
}
#region 对RSS的支持
if (listResult.Count == 0)
{
return GetLinksByKeyFromRss(sContent, listKey);
}
#endregion
return listResult;
}
/// <summary>
/// 从RSS FEED中读取
/// </summary>
/// <param name="sContent"></param>
/// <param name="listKey"></param>
/// <returns></returns>
[Obsolete("已过时的方法。")]
public static List<string> GetLinksByKeyFromRss(string sContent, List<string> listKey)
{
List<string> listResult = new List<string>();
string sKey = "";
foreach (string s in listKey)
{
sKey += "([\\s\\S]*" + s + "[\\s\\S]*)|";
}
if (sKey != "")
sKey = sKey.Substring(0, sKey.Length - 1);
if (sKey == "")
sKey = "[\\s\\S]+";
Regex reKey = new Regex(sKey, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
XmlDocument xml = new XmlDocument();
//RSS2.0
try
{
xml.LoadXml(sContent.Trim());
XmlNodeList nodes = xml.SelectNodes("/rss/channel/item");
if (nodes.Count > 0)
{
for (int i = 0; i < nodes.Count; i++)
{
listResult.Add("<a href=\"" + nodes[i].SelectSingleNode("link").InnerText + "\">" + nodes[i].SelectSingleNode("title").InnerText + "</a>");
//listResult.Add(nodes[i].SelectSingleNode("link").InnerText, nodes[i].SelectSingleNode("title").InnerText);
}
return listResult;
}
}
catch { }
//RSS1.0(RDF)
try
{
XmlNamespaceManager nsMgr = new XmlNamespaceManager(xml.NameTable);
nsMgr.AddNamespace("rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
nsMgr.AddNamespace("rss", "http://purl.org/rss/1.0/");
XmlNodeList nodes = xml.SelectNodes("/rdf:RDF//rss:item", nsMgr);
if (nodes.Count > 0)
{
for (int i = 0; i < nodes.Count; i++)
{
//listResult.Add(nodes[i].SelectSingleNode("rss:link", nsMgr).InnerText, nodes[i].SelectSingleNode("rss:title", nsMgr).InnerText);
listResult.Add("<a href=\"" + nodes[i].SelectSingleNode("rss:link", nsMgr).InnerText + "\">" + nodes[i].SelectSingleNode("rss:title", nsMgr).InnerText + "</a>");
}
return listResult;
}
}
catch { }
//RSS ATOM
try
{
XmlNamespaceManager nsMgr = new XmlNamespaceManager(xml.NameTable);
nsMgr.AddNamespace("atom", "http://purl.org/atom/ns#");
XmlNodeList nodes = xml.SelectNodes("/atom:feed/atom:entry", nsMgr);
if (nodes.Count > 0)
{
for (int i = 0; i < nodes.Count; i++)
{
//listResult.Add(nodes[i].SelectSingleNode("atom:link", nsMgr).Attributes["href"].InnerText, nodes[i].SelectSingleNode("atom:title", nsMgr).InnerText);
listResult.Add("<a href=\"" + nodes[i].SelectSingleNode("atom:link", nsMgr).Attributes["href"].InnerText + "\">" + nodes[i].SelectSingleNode("atom:title", nsMgr).InnerText + "</a>");
}
return listResult;
}
}
catch { }
return listResult;
}
#endregion
public static string RemoveByReg(string sContent, string sRegex)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sContent);
foreach (Match mc in mcs)
{
sContent = sContent.Replace(mc.Value, "");
}
return sContent;
}
public static string ReplaceByReg(string sContent, string sReplace, string sRegex)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
sContent = re.Replace(sContent, sReplace);
return sContent;
}
/// <summary>
/// 网页Body内容
/// </summary>
/// <param name="sContent"></param>
/// <returns></returns>
public static string GetBody(string sContent)
{
Regex re = new Regex(@"[\s\S]*?<\bbody\b[^>]*>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
sContent = re.Replace(sContent, "");
re = new Regex(@"</\bbody\b[^>]*>\s*</html>", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.RightToLeft);
sContent = re.Replace(sContent, "");
return sContent;
}
#endregion 根据超链接地址获取页面内容
#region 根据内容作字符串分析
public static string GetTextByReg(string sContent, string sRegex)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match mc = re.Match(sContent);
string str = "";
if (mc.Success)
str = mc.Groups[0].Value;
while (str.EndsWith("_"))
{
str = RemoveEndWith(str, "_");
}
return str;
}
// charset=[\s]*(?<Coding>[^'"]+)[\s]*['"]?[\s]*[/]?>
public static string GetTextByReg(string sContent, string sRegex, string sGroupName)
{
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
Match mc = re.Match(sContent);
string str = "";
if (mc.Success)
str = mc.Groups[sGroupName].Value;
return str;
}
/// <summary>
/// 获得链接的绝对路径
/// </summary>
/// <param name="sUrl"></param>
/// <param name="sRUrl"></param>
/// <returns></returns>
public static string GetUrlByRelative(string sUrl, string sRUrl)
{
try
{
//http://q.yesky.com/grp/dsc/view.do;jsessionid=A6324FD46B4893303124F70C0B2AAC1E?grpId=201595&rvId=8215876
Uri baseUri = new Uri(sUrl);
if (!sUrl.EndsWith("/"))
{
int i = baseUri.Segments.Length - 1;
if (i > 0)
{
string file = baseUri.Segments[i];
if (file.IndexOf('.') < 1)
{
baseUri = new Uri(sUrl + "/");
}
}
}
Uri myUri = new Uri(baseUri, sRUrl);
return myUri.AbsoluteUri;
}
catch
{
return sUrl;
}
}
public static List<string> GetListByReg(string sContent, string sRegex)
{
List<string> list = new List<string>();
Regex re = new Regex(sRegex, RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace | RegexOptions.Multiline);
MatchCollection mcs = re.Matches(sContent);
foreach (Match mc in mcs)
{
list.Add(mc.Groups["href"].Value);
}
return list;
}
public static string GetDomainUrl(string sUrl)
{
try
{
Uri baseUri = new Uri(sUrl);
return baseUri.Scheme + "://" + baseUri.Authority;
}
catch
{
return sUrl;
}
//Regex re = new Regex(@"http(s)?://([\w-]+\.)+(\w){2,}", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
//Match mc1 = re.Match(sUrl);
//if (mc1.Success)
//{
// return mc1.Value;
//}
//else
// return "";
}
/// <summary>
///
/// </summary>
/// <param name="sOri"></param>
/// <returns></returns>
public static List<string> GetKeys(string sOri)
{
if (sOri.Trim().Length == 0)
{
return null;
}
//Regex re = new Regex("(,{1})|(,{1})", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
//string[] sArray = re.Split(sOri);
string[] sArray = sOri.Split(new char[] { ',', ',', '\\', '/', '、' });
List<string> listStr = new List<string>();
foreach (string sContent in sArray)
{
if (sContent.Length == 0)
continue;
listStr.Add(sContent);
}
return listStr;
}
/// <summary>
///
/// </summary>
/// <param name="sOri"></param>
/// <returns></returns>
public static string Split(string sOri)
{
Regex re = new Regex("(,{1})|(,{1})|(\\+{1})|(+{1})|(。{1})|(;{1})|(;{1})|(:{1})|(:{1})|(“{1})|(”{1})|(、{1})|(_{1})",
RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
string[] sArray = re.Split(sOri);
List<string> listStr = new List<string>();
listStr.Clear();
foreach (string sContent in sArray)
{
if (sContent.Length <= 2)
continue;
re = new Regex(@"[a-zA-z]+", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
MatchCollection mcs = re.Matches(sContent);
string sTemp = sContent;
foreach (Match mc in mcs)
{
if (mc.Value.ToString().Length > 2)
listStr.Add(mc.Value.ToString());
sTemp = sTemp.Replace(mc.Value.ToString(), ",");
}
re = new Regex(@",{1}", RegexOptions.IgnoreCase | RegexOptions.IgnorePatternWhitespace);
mcs = re.Matches(sTemp);
foreach (string s in re.Split(sTemp))
{
if (s.Trim().Length <= 2)
continue;
listStr.Add(s);
}
}
string sReturn = "";
for (int i = 0; i < listStr.Count - 1; i++)
{
for (int j = i + 1; j < listStr.Count; j++)
{
if (listStr[i] == listStr[j])
{
listStr[j] = "";
}
}
}
foreach (string str in listStr)
{
if (str.Length > 2)
sReturn += str + ",";
}
if (sReturn.Length > 0)
return sReturn.Substring(0, sReturn.Length - 1);
else
return sReturn;
}
#endregion
#region 杂项
public static string GetTxtFromHtml(string sHtml)
{
string del = @"<head[^>]*>[\s\S]*?</head>";
string content = RemoveByReg(sHtml, del);
del = @"(<script[^>]*>[\s\S]*?</script>)|(<IFRAME[^>]*>[\s\S]*?</IFRAME>)|(<style[^>]*>[\s\S]*?</style>|<title[^>]*>[\s\S]*?</title>|<meta[^>]*>|<option[^>]*>[\s\S]*?</option>)";
content = RemoveByReg(content, del);
del = @"( )|([\n\t]+)";
content = RemoveByReg(content, del);
string re = @"(<table(\s+[^>]*)*>)|(<td(\s+[^>]*)*>)|(<tr(\s+[^>]*)*>)|(<p(\s+[^>]*)*>)|(<div(\s+[^>]*)*>)|(<ul(\s+[^>]*)*>)|(<li(\s+[^>]*)*>)|</table>|</td>|</tr>|</p>|<br>|</div>|</li>|</ul>|<p />|<br />";
content = ReplaceByReg(content, "", re);
content = ReplaceByReg(content, "", @"[\f\n\r\v]+");
content = RemoveByReg(content, @"<a(\s+[^>]*)*>[\s\S]*?</a>");
content = RemoveByReg(content, "<[^>]+>");//去除各种HTML标记,获得纯内容
content = content.Replace("\n", "");
content = content.Replace("\r", "");
content = content.Trim();
return content;
}
/// <summary>
/// 和GetTxtFromHtml功能一样,不过保留换行符号
/// </summary>
/// <param name="sHtml"></param>
/// <returns></returns>
public static string GetTxtFromHtml2(string sHtml)
{
string del = @"<head[^>]*>[\s\S]*?</head>";
string content = RemoveByReg(sHtml, del);
del = @"(<script[^>]*>[\s\S]*?</script>)|(<IFRAME[^>]*>[\s\S]*?</IFRAME>)|(<style[^>]*>[\s\S]*?</style>|<title[^>]*>[\s\S]*?</title>|<meta[^>]*>|<option[^>]*>[\s\S]*?</option>)";
content = RemoveByReg(content, del);
del = @"( )|([\t]+)";//del = @"( )|([\n\t]+)";
content = RemoveByReg(content, del);
string re = @"(<table(\s+[^>]*)*>)|(<td(\s+[^>]*)*>)|(<tr(\s+[^>]*)*>)|(<p(\s+[^>]*)*>)|(<div(\s+[^>]*)*>)|(<ul(\s+[^>]*)*>)|(<li(\s+[^>]*)*>)|</table>|</td>|</tr>|</p>|<br>|</div>|</li>|</ul>|<p />|<br />";
content = ReplaceByReg(content, "", re);
//content = CText.ReplaceByReg(content, "", @"[\f\n\r\v]+");
content = RemoveByReg(content, @"<a(\s+[^>]*)*>[\s\S]*?</a>");
content = RemoveByReg(content, "<[^>]+>");//去除各种HTML标记,获得纯内容
content = content.Trim();
return content;
}
#endregion
public static string RemoveEndWith(string sOrg, string sEnd)
{
if (sOrg.EndsWith(sEnd))
sOrg = sOrg.Remove(sOrg.IndexOf(sEnd), sEnd.Length);
return sOrg;
}
#region 根据超链接地址获取页面内容
public static string GetHtmlByUrl(string sUrl)
{
return GetHtmlByUrl(sUrl, "auto");
}
public static string GetHtmlByUrl(string sUrl, string sCoding)
{
return GetHtmlByUrl(ref sUrl, sCoding);
}
public static string GetHtmlByUrl(ref string sUrl, string sCoding)
{
string content = "";
try
{
HttpWebResponse response = _MyGetResponse(sUrl);
if (response == null)
{
return content;
}
sUrl = response.ResponseUri.AbsoluteUri;
Stream stream = response.GetResponseStream();
byte[] buffer = GetContent(stream);
stream.Close();
stream.Dispose();
string charset = "";
if (sCoding == null || sCoding == "" || sCoding.ToLower() == "auto")
{//如果不指定编码,那么系统代为指定
//首先,从返回头信息中寻找
string ht = response.GetResponseHeader("Content-Type");
response.Close();
string regCharSet = "[\\s\\S]*charset=(?<charset>[\\S]*)";
Regex r = new Regex(regCharSet, RegexOptions.IgnoreCase);
Match m = r.Match(ht);
charset = (m.Captures.Count != 0) ? m.Result("${charset}") : "";
if (charset == "-8") charset = "utf-8";
if (charset == "")
{//找不到,则在文件信息本身中查找
//先按gb2312来获取文件信息
content = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer);
regCharSet = "(<meta[^>]*charset=(?<charset>[^>'\"]*)[\\s\\S]*?>)|(xml[^>]+encoding=(\"|')*(?<charset>[^>'\"]*)[\\s\\S]*?>)";
r = new Regex(regCharSet, RegexOptions.IgnoreCase);
m = r.Match(content);
if (m.Captures.Count == 0)
{//没办法,都找不到编码,只能返回按"gb2312"获取的信息
//content = CText.RemoveByReg(content, @"<!--[\s\S]*?-->");
return content;
}
charset = m.Result("${charset}");
}
}
else
{
response.Close();
charset = sCoding.ToLower();
}
try
{
content = System.Text.Encoding.GetEncoding(charset).GetString(buffer);
}
catch (ArgumentException)
{//指定的编码不可识别
content = System.Text.Encoding.GetEncoding("gb2312").GetString(buffer);
}
//content = CText.RemoveByReg(content, @"<!--[\s\S]*?-->");
}
catch
{
content = "";
}
return content;
}
private static HttpWebResponse _MyGetResponse(string sUrl)
{
int iTimeOut = 10000;
//try
//{
// //iTimeOut = int.Parse(System.Configuration.ConfigurationManager.AppSettings["SocketTimeOut"]);
//}
//catch { iTimeOut = 10000; }
bool bCookie = false;
bool bRepeat = false;
Uri target = new Uri(sUrl);
ReCatch:
try
{
HttpWebRequest resquest = (HttpWebRequest)WebRequest.Create(target);
resquest.MaximumResponseHeadersLength = -1;
resquest.ReadWriteTimeout = 120000;//120秒就超时
resquest.Timeout = iTimeOut;
resquest.MaximumAutomaticRedirections = 50;
resquest.MaximumResponseHeadersLength = 5;
resquest.AllowAutoRedirect = true;
if (bCookie)
{
resquest.CookieContainer = new CookieContainer();
}
resquest.UserAgent = "Mozilla/6.0 (compatible; MSIE 6.0; Windows NT 5.1)";
//resquest.UserAgent = @"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2; .NET CLR 1.1.4322; .NET CLR 2.0.50727; InfoPath.1) Web-Sniffer/1.0.24";
//resquest.KeepAlive = true;
return (HttpWebResponse)resquest.GetResponse();
}
catch (WebException we)
{
if (!bRepeat)
{
bRepeat = true;
bCookie = true;
goto ReCatch;
}
return null;
}
catch
{
return null;
}
}
private static byte[] GetContent(Stream stream)
{
ArrayList arBuffer = new ArrayList();
const int BUFFSIZE = 4096;
try
{
byte[] buffer = new byte[BUFFSIZE];
int count = stream.Read(buffer, 0, BUFFSIZE);
while (count > 0)
{
for (int i = 0; i < count; i++)
{
arBuffer.Add(buffer[i]);
}
count = stream.Read(buffer, 0, BUFFSIZE);
}
}
catch { }
return (byte[])arBuffer.ToArray(System.Type.GetType("System.Byte"));
}
public static string GetHttpHead(string sUrl)
{
string sHead = "";
Uri uri = new Uri(sUrl);
try
{
WebRequest req = WebRequest.Create(uri);
WebResponse resp = req.GetResponse();
WebHeaderCollection headers = resp.Headers;
string[] sKeys = headers.AllKeys;
foreach (string sKey in sKeys)
{
sHead += sKey + ":" + headers[sKey] + "\r\n";
}
}
catch
{
}
return sHead;
}
/// <summary>
/// 处理框架页面问题。如果该页面是框架结构的话,返回该框架
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
public static string[] DealWithFrame(string url, string content)
{
string regFrame = @"<frame\s+[^>]*src\s*=\s*(?:""(?<src>[^""]+)""|'(?<src>[^']+)'|(?<src>[^\s>""']+))[^>]*>";
return DealWithFrame(regFrame, url, content);
}
/// <summary>
/// 处理浮动桢问题。如果该页面存在浮动桢,返回浮动桢
/// </summary>
/// <param name="content"></param>
/// <returns></returns>
public static string[] DealWithIFrame(string url, string content)
{
string regiFrame = @"<iframe\s+[^>]*src\s*=\s*(?:""(?<src>[^""]+)""|'(?<src>[^']+)'|(?<src>[^\s>""']+))[^>]*>";
return DealWithFrame(regiFrame, url, content);
}
private static string[] DealWithFrame(string strReg, string url, string content)
{
ArrayList alFrame = new ArrayList();
Regex r = new Regex(strReg, RegexOptions.IgnoreCase);
Match m = r.Match(content);
while (m.Success)
{
alFrame.Add(GetUrl(url, m.Groups["src"].Value));
m = m.NextMatch();
}
return (string[])alFrame.ToArray(System.Type.GetType("System.String"));
}
#endregion 根据超链接地址获取页面内容
#region 获得多个页面
public static List<KeyValuePair<int, string>> GetHtmlByUrlList(List<KeyValuePair<int, string>> listUrl, string sCoding)
{
int iTimeOut = int.Parse(System.Configuration.ConfigurationManager.AppSettings["SocketTimeOut"]);
StringBuilder sbHtml = new StringBuilder();
List<KeyValuePair<int, string>> listResult = new List<KeyValuePair<int, string>>();
int nBytes = 0;
Socket sock = null;
IPHostEntry ipHostInfo = null;
try
{
// 初始化
Uri site = new Uri(listUrl[0].Value.ToString());
try
{
ipHostInfo = System.Net.Dns.GetHostEntry(site.Host);
}
catch (Exception Ex)
{
throw Ex;
}
IPAddress ipAddress = ipHostInfo.AddressList[0];
IPEndPoint remoteEP = new IPEndPoint(ipAddress, site.Port);
sock = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);
sock.SendTimeout = iTimeOut;
sock.ReceiveTimeout = iTimeOut;
try
{
sock.Connect(remoteEP);
}
catch (Exception Ex)
{
throw Ex;
}
foreach (KeyValuePair<int, string> kvUrl in listUrl)
{
site = new Uri(kvUrl.Value);
string sendMsg = "GET " + HttpUtility.UrlDecode(site.PathAndQuery) + " HTTP/1.1\r\n" +
"Accept: image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, application/vnd.ms-excel, application/msword, application/vnd.ms-powerpoint, */*\r\n" +
"Accept-Language:en-us\r\n" +
"Accept-Encoding:gb2312, deflate\r\n" +
"User-Agent: Mozilla/4.0\r\n" +
"Host: " + site.Host + "\r\n\r\n" + '\0';
// 发送
byte[] msg = Encoding.GetEncoding(sCoding).GetBytes(sendMsg);
if ((nBytes = sock.Send(msg)) == 0)
{
sock.Shutdown(SocketShutdown.Both);
sock.Close();
return listResult;
}
// 接受
byte[] bytes = new byte[2048];
byte bt = Convert.ToByte('\x7f');
do
{
int count = 0;
try
{
nBytes = sock.Receive(bytes, bytes.Length - 1, 0);
}
catch (Exception Ex)
{
string str = Ex.Message;
nBytes = -1;
}
if (nBytes <= 0) break;
if (bytes[nBytes - 1] > bt)
{
for (int i = nBytes - 1; i >= 0; i--)
{
if (bytes[i] > bt)
count++;
else
break;
}
if (count % 2 == 1)
{
count = sock.Receive(bytes, nBytes, 1, 0);
if (count < 0)
break;
nBytes = nBytes + count;
}
}
else
bytes[nBytes] = (byte)'\0';
string s = Encoding.GetEncoding(sCoding).GetString(bytes, 0, nBytes);
sbHtml.Append(s);
} while (nBytes > 0);
listResult.Add(new KeyValuePair<int, string>(kvUrl.Key, sbHtml.ToString()));
sbHtml = null;
sbHtml = new StringBuilder();
}
}
catch (Exception Ex)
{
string s = Ex.Message;
try
{
sock.Shutdown(SocketShutdown.Both);
sock.Close();
}
catch { }
}
finally
{
try
{
sock.Shutdown(SocketShutdown.Both);
sock.Close();
}
catch { }
}
return listResult;
}
#endregion 根据超链接地址获取页面内容
public enum PageType : int { HTML = 0, RSS };
public static PageType GetPageType(string sUrl, ref string sHtml)
{
PageType pt = PageType.HTML;
//看有没有RSS FEED
string regRss = @"<link\s+[^>]*((type=""application/rss\+xml"")|(type=application/rss\+xml))[^>]*>";
Regex r = new Regex(regRss, RegexOptions.IgnoreCase);
Match m = r.Match(sHtml);
if (m.Captures.Count != 0)
{//有,则转向从RSS FEED中抓取
string regHref = @"href=\s*(?:'(?<href>[^']+)'|""(?<href>[^""]+)""|(?<href>[^>\s]+))";
r = new Regex(regHref, RegexOptions.IgnoreCase);
m = r.Match(m.Captures[0].Value);
if (m.Captures.Count > 0)
{
//有可能是相对路径,加上绝对路径
string rssFile = GetUrl(sUrl, m.Groups["href"].Value);
sHtml = GetHtmlByUrl(rssFile);
pt = PageType.RSS;
}
}
else
{//看这个地址本身是不是一个Rss feed
r = new Regex(@"<rss\s+[^>]*>", RegexOptions.IgnoreCase);
m = r.Match(sHtml);
if (m.Captures.Count > 0)
{
pt = PageType.RSS;
}
}
return pt;
}
}
}