///
<summary>
/// 本周起止时间
/// </summary>
/// <param name="dt"></param>
/// <returns></returns>
private string weekrange(System.DateTime dt)
{
int weeknow = Convert.ToInt32(dt.DayOfWeek);
int daydiff = ( - 1 ) * weeknow;
int dayadd = 6 - weeknow;
string datebegin = System.DateTime.Now.AddDays(daydiff).ToString( " yyyyMMdd " );
string dateend = System.DateTime.Now.AddDays(dayadd).ToString( " yyyyMMdd " );
return datebegin + " - " + dateend;
}
/// <summary>
/// 返回指定URL的源代码
/// </summary>
/// <param name="url"></param>
private string GetHTML( string url, string tag)
{
byte [] buf = new byte [ 38192 ];
System.Net.WebRequest request = WebRequest.Create(url);;
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
int count = resStream.Read(buf, 0 , buf.Length);
string result = Encoding.Default.GetString(buf, 0 , count);
resStream.Close();
Regex reg = new Regex( @" (?< " + tag + " >.*?</ " + tag + " >) " ,RegexOptions.IgnoreCase);
string output = reg.Match(result).Value;
return StripHTML(output);
}
/// <summary>
/// 去除HTML标记
/// </summary>
/// <param name="strHtml"> 包括HTML的源码 </param>
/// <returns> 已经去除后的文字 </returns>
public static string StripHTML( string strHtml)
{
string [] aryReg = {
@" <script[^>]*?>.*?</script> " ,
@" <(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?> " ,
@" ([\r\n])[\s]+ " ,
@" &(quot|#34); " ,
@" &(amp|#38); " ,
@" &(lt|#60); " ,
@" &(gt|#62); " ,
@" &(nbsp|#160); " ,
@" &(iexcl|#161); " ,
@" &(cent|#162); " ,
@" &(pound|#163); " ,
@" &(copy|#169); " ,
@" (\d+); " ,
@" --> " ,
@" <!--.*\n "
};
string [] aryRep = {
"" ,
"" ,
"" ,
" \ "" ,
" & " ,
" < " ,
" > " ,
" " ,
" \xa1 " , // chr(161),
" \xa2 " , // chr(162),
" \xa3 " , // chr(163),
" \xa9 " , // chr(169),
"" ,
" \r\n " ,
""
};
string newReg = aryReg[ 0 ];
string strOutput = strHtml;
for ( int i = 0 ;i < aryReg.Length;i ++ )
{
Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
strOutput = regex.Replace(strOutput,aryRep[i]);
}
strOutput.Replace( " < " , "" );
strOutput.Replace( " > " , "" );
strOutput.Replace( " \r\n " , "" );
return strOutput;
}
/// 本周起止时间
/// </summary>
/// <param name="dt"></param>
/// <returns></returns>
private string weekrange(System.DateTime dt)
{
int weeknow = Convert.ToInt32(dt.DayOfWeek);
int daydiff = ( - 1 ) * weeknow;
int dayadd = 6 - weeknow;
string datebegin = System.DateTime.Now.AddDays(daydiff).ToString( " yyyyMMdd " );
string dateend = System.DateTime.Now.AddDays(dayadd).ToString( " yyyyMMdd " );
return datebegin + " - " + dateend;
}
/// <summary>
/// 返回指定URL的源代码
/// </summary>
/// <param name="url"></param>
private string GetHTML( string url, string tag)
{
byte [] buf = new byte [ 38192 ];
System.Net.WebRequest request = WebRequest.Create(url);;
WebResponse response = request.GetResponse();
Stream resStream = response.GetResponseStream();
int count = resStream.Read(buf, 0 , buf.Length);
string result = Encoding.Default.GetString(buf, 0 , count);
resStream.Close();
Regex reg = new Regex( @" (?< " + tag + " >.*?</ " + tag + " >) " ,RegexOptions.IgnoreCase);
string output = reg.Match(result).Value;
return StripHTML(output);
}
/// <summary>
/// 去除HTML标记
/// </summary>
/// <param name="strHtml"> 包括HTML的源码 </param>
/// <returns> 已经去除后的文字 </returns>
public static string StripHTML( string strHtml)
{
string [] aryReg = {
@" <script[^>]*?>.*?</script> " ,
@" <(\/\s*)?!?((\w+:)?\w+)(\w+(\s*=?\s*(([""'])(file://[""'tbnr]|[^/7])*?/7|/w+)|.{0})|/s)*?(///s*)?> " ,
@" ([\r\n])[\s]+ " ,
@" &(quot|#34); " ,
@" &(amp|#38); " ,
@" &(lt|#60); " ,
@" &(gt|#62); " ,
@" &(nbsp|#160); " ,
@" &(iexcl|#161); " ,
@" &(cent|#162); " ,
@" &(pound|#163); " ,
@" &(copy|#169); " ,
@" (\d+); " ,
@" --> " ,
@" <!--.*\n "
};
string [] aryRep = {
"" ,
"" ,
"" ,
" \ "" ,
" & " ,
" < " ,
" > " ,
" " ,
" \xa1 " , // chr(161),
" \xa2 " , // chr(162),
" \xa3 " , // chr(163),
" \xa9 " , // chr(169),
"" ,
" \r\n " ,
""
};
string newReg = aryReg[ 0 ];
string strOutput = strHtml;
for ( int i = 0 ;i < aryReg.Length;i ++ )
{
Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
strOutput = regex.Replace(strOutput,aryRep[i]);
}
strOutput.Replace( " < " , "" );
strOutput.Replace( " > " , "" );
strOutput.Replace( " \r\n " , "" );
return strOutput;
}