asp.net中导出有很多方法。其中比较推荐的兼容导出是导出为word/excel兼容的mhtml格式并设置流格式为word或excel。
这中方法的好处是可以建立一个通用的库。本文中提出了一个通用的导出类,实践中使用效果较好。(ps,html解析类写的比较仓促,各位如有兴趣可重写一下~)
///
///@Author Simsure
///@Version 1.0
///
using System;
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Net;
using System.IO;
/// <summary>
///MHtmlDocument 的摘要说明
/// </summary>
public class MHtmlDocument
{
protected const string _NEXT_PART = "------=_NextPart_WIND";
protected const string _4NEXT_PART = "----=_NextPart_WIND";
protected string _encoding = "utf-8";
protected string _pageVirutalPath;
protected string _rootHttpVirtualPath;
protected bool _enableBase64 = false;
protected HtmlDocument _baseDocument;
protected HttpRequest _request;
protected HttpResponse _response;
protected static readonly log4net.ILog _log = log4net.LogManager.GetLogger("MHtmlDocument");
public string Encoding
{
get { return _encoding; }
}
public string RootHttpVirtualPath
{
get { return _rootHttpVirtualPath; }
}
public string PageVirtualPath
{
get { return _pageVirutalPath; }
}
public bool EnableBase64
{
get { return _enableBase64; }
set { _enableBase64 = value; }
}
public HtmlDocument BaseDocument
{
get { return _baseDocument; }
}
public MHtmlDocument()
{
_request = HttpContext.Current.Request;
_response = HttpContext.Current.Response;
_baseDocument = new HtmlDocument();
}
public void LoadFromUrl(string url)
{
BuildVirtualPath(url);
if (url[0] == '.')
FindPathDeepth(ref url);
string contentType;
string content = GetHttpString(url, out contentType, out _encoding);
_baseDocument.Load(content, _encoding);
}
public void LoadFromControl(string url, System.Web.UI.Control ctrl)
{
StringWriter sw = new StringWriter();
System.Web.UI.HtmlTextWriter text = new System.Web.UI.HtmlTextWriter(sw);
ctrl.RenderControl(text);
Load(url, sw.ToString());
}
public void Load(string url, string content)
{
BuildVirtualPath(url);
_baseDocument.Load(content, _encoding);
}
public void AddLinkFile(LinkInfo link)
{
_baseDocument.LinkInfos.Add(link);
}
#region Export
public void ExportWord()
{
ExportWord("Export");
}
public void ExportWord(string fileName)
{
Export(fileName + ".doc", "application/ms-word");
}
public void ExportExcel()
{
ExportExcel("Export");
}
public void ExportExcel(string fileName)
{
Export(fileName + ".xls", "application/ms-excel");
}
public void Export(string fileName, string contentType)
{
_response.Buffer = true;
_response.Clear();
_response.ContentEncoding = System.Text.Encoding.GetEncoding(_encoding);
_response.AppendHeader("Content-Disposition", "attachment;filename=" + HttpUtility.UrlEncode(fileName));
_response.ContentType = contentType;
AddMimePartHeader();
AddHtmlPart();
for (int i = 0; i < _baseDocument.LinkInfos.Count; ++i)
if (_baseDocument.LinkInfos[i].uri.Length > 0)
AddLinkPart(_baseDocument.LinkInfos[i]);
AddFootPart();
}
#endregion
#region Output
void Output(string s)
{
_response.Write(s);
}
void Output(byte[] buf)
{
if (buf != null && buf.Length > 0)
_response.BinaryWrite(buf);
}
void AddMimePartHeader()
{
Output("MIME-Version: 1.0/r/n");
Output("Content-Type: multipart/related;/r/n");
Output("/ttype=/"text/html/";/r/n");
Output("/tboundary=/"");
Output(_4NEXT_PART);
}
void AddPartSeperator()
{
Output("/r/n/r/n");
}
void AddHtmlPartHeader(string location)
{
AddPartHeader(location, "text/html; charset=/"" + _encoding + "/"", " quoted-printable");
}
void AddCssPartHeader(string location)
{
AddPartHeader(location, "text/css", " quoted-printable");
}
void AddPartHeader(string location, string contentType, string transferEncoding)
{
AddPartSeperator();
Output(_NEXT_PART);
Output("/r/n");
Output("Content-Transfer-Encoding: ");
Output(transferEncoding);
Output("/r/n");
Output("Content-Location: ");
Output(location);
Output("/r/n");
Output("Content-Type: ");
Output(contentType);
Output("/r/n/r/n");
}
void AddHtmlPart()
{
AddHtmlPartHeader("Html");
Output(_baseDocument.QuotedPrintableString);
}
void AddLinkPart(LinkInfo link)
{
string contentType;
if (link.type == LinkType.Text)
{
AddCssPartHeader(link.id);
string encodingType;
Output(GetHttpString(link.uri, out contentType, out encodingType));
}
else
{
byte[] buf = GetHttpData(link.uri, out contentType);
AddPartHeader(link.id, contentType, _enableBase64 ? "base64" : "binary");
if (buf != null)
{
if (_enableBase64)
{
string str = Convert.ToBase64String(buf);
Output(str);
}
else
{
Output(buf);
}
}
}
}
void AddFootPart()
{
AddPartSeperator();
Output("/r/n/r/n");
Output(_NEXT_PART);
Output("--");
}
#endregion
#region util
public string GetHttpString(string url)
{
string contentType, encodingType;
return GetHttpString(url, out contentType, out encodingType);
}
public string GetHttpString(string url, out string contentType, out string encodingType)
{
url = BuildPath(url);
AddSessionIdToUrl(ref url);
contentType = "text/html; charset=/"utf-8/"";
encodingType = "utf-8";
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
try
{
WebResponse response = request.GetResponse();
if (response != null)
{
using (Stream responseStream = response.GetResponseStream())
{
contentType = response.ContentType.ToLower();
//string charset = "charset=";
//int pos = contentType.IndexOf(charset);
//if (pos >= 0)
// encodingType = contentType.Substring(pos + charset.Length);
//else encodingType = "utf-8";
encodingType = ((HttpWebResponse)response).CharacterSet;
Encoding encode = System.Text.Encoding.GetEncoding(encodingType);
using (StreamReader reader = new StreamReader(responseStream, encode))
{
return reader.ReadToEnd();
}
}
}
}
catch (Exception e)
{
_log.Error(e.ToString());
}
return string.Empty;
}
public byte[] GetHttpData(string url, out string contentType)
{
url = BuildPath(url);
AddSessionIdToUrl(ref url);
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
contentType = string.Empty;
try
{
WebResponse response = request.GetResponse();
if (response != null)
{
contentType = response.ContentType;
using (Stream responseStream = response.GetResponseStream())
{
using (MemoryStream mem = new MemoryStream())
{
byte[] buf = new byte[1024];
int len;
while ((len = responseStream.Read(buf, 0, buf.Length)) > 0)
mem.Write(buf, 0, len);
mem.SetLength(mem.Position);
return mem.GetBuffer();
}
}
}
}
catch
{
}
return null;
}
public void BuildVirtualPath(string url)
{
if (url == null) return;
if (url.StartsWith("http"))
{
Uri uri = new Uri(url);
_rootHttpVirtualPath = uri.Scheme + "://" + uri.Authority;
_pageVirutalPath = _rootHttpVirtualPath + uri.AbsolutePath;
}
else
{
Uri uri = _request.Url;
_rootHttpVirtualPath = uri.Scheme + "://" + uri.Authority;
if (url.StartsWith("/"))
{
_pageVirutalPath = _rootHttpVirtualPath + url;
}
else
{
_pageVirutalPath = _rootHttpVirtualPath + uri.AbsolutePath;
int p = _pageVirutalPath.LastIndexOf("/");
if (p > 0) _pageVirutalPath = _pageVirutalPath.Substring(0, p);
_pageVirutalPath = BuildPath(_rootHttpVirtualPath, _pageVirutalPath, url);
}
}
int pos = _pageVirutalPath.IndexOf("?");
if (pos > 0)
_pageVirutalPath = _pageVirutalPath.Substring(0, pos);
pos = _pageVirutalPath.LastIndexOf("/");
if (pos > 0) _pageVirutalPath = _pageVirutalPath.Substring(0, pos);
}
public string BuildPath(string path)
{
return BuildPath(_rootHttpVirtualPath, _pageVirutalPath, path);
}
string BuildPath(string rootPath, string pageBasePath, string relative)
{
if (relative.StartsWith("http")) return relative;
else if (relative.StartsWith("."))
{
int c = FindPathDeepth(ref relative);
int j = pageBasePath.Length - 1;
for (; j >= 0; --j)
{
if (c <= 0) break;
if (pageBasePath[j] == '/') --c;
}
string s = pageBasePath.Substring(0, j + 1);
return s + '/' + relative;
}
else if (relative[0] == '/') return rootPath + relative;
else return pageBasePath + "/" + relative;
}
int FindPathDeepth(ref string relative)
{
if (relative[0] != '.') return 0;
int c = 0;
int i = 0;
for (; i < relative.Length; ++i)
{
if (relative[i] != '.' && relative[i] != '/') break;
if (relative[i] == '.' && relative.Length > i + 1)
{
if (relative[i + 1] == '.')
{
++c;
++i;
}
else if (relative[i + 1] == '/')
{
++i;
}
}
}
relative = relative.Substring(i);
return c;
}
void AddSessionIdToUrl(ref string url)
{
//add your session data needed to past here.
}
#endregion
}
public class HtmlDocument
{
private int _pos = -1;
private string _html;
private int _tagBegin = -1;
private int _remove = -1;
private string _qt;
private string _encoding = "utf-8";
private const string SCRIPT = "script";
private StringBuilder _builder = new StringBuilder();
private int _builderTageStartPos = 0;
private Stack<string> _hiddenTags = new Stack<string>();
private Stack<string> _scriptTags = new Stack<string>();
private Tag _tag = new Tag();
private List<LinkInfo> _links = new List<LinkInfo>();
private static Regex _hrefAttribute = new Regex("href//s*=//s*[/"'](?<AttrValue>.*?)[/"']", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static Regex _srcAttribute = new Regex("src//s*=//s*[/"'](?<AttrValue>.*?)[/"']", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static Regex _typeAttribute = new Regex("type//s*=//s*[/"'](?<AttrValue>.*?)[/"']", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private SortedList<string, string> _hash = new SortedList<string, string>(StringComparer.CurrentCultureIgnoreCase);
public HtmlDocument()
{
_tag.content = new StringBuilder();
}
public string QuotedPrintableString
{
get
{
return _qt;
}
}
public List<LinkInfo> LinkInfos
{
get { return _links; }
}
public void Load(string html)
{
Load(html, _encoding);
}
public void Load(string html, string encoding)
{
_pos = -1;
_html = html;
_encoding = encoding;
Parse();
}
void Parse()
{
while (++_pos < _html.Length)
{
switch (_html[_pos])
{
case '<':
ReadTag();
break;
default:
if (_hiddenTags.Count == 0)
_builder.Append(_html[_pos]);
break;
}
}
_qt = QuotedPrintableEncoding.Encode(_builder, _encoding);
}
void ReadTag()
{
_tag.name = null;
_tag.type = TagType.TagBegin;
_tag.content.Remove(0, _tag.content.Length);
_tag.content.Append('<');
_builder.Append('<');
_builderTageStartPos = _builder.Length - 1;
int endIndex = -1;
bool empty = true;
int scriptPos = -1;
bool isScript = false;
bool accept = false;
if (_hiddenTags.Count == 0) _remove = _builder.Length - 1;
_tagBegin = _builder.Length - 1;
while (++_pos < _html.Length)
{
if (empty && (_html[_pos] == 's' || _html[_pos] == 'S'))
{
isScript = true;
scriptPos = 0;
}
if (scriptPos >= 0 && scriptPos < SCRIPT.Length && char.ToLower(_html[_pos]) != SCRIPT[scriptPos++])
isScript = false;
_tag.content.Append(_html[_pos]);
if (_hiddenTags.Count == 0)
_builder.Append(_html[_pos]);
switch (_html[_pos])
{
case '>':
accept = true;
break;
case '<':
if (_scriptTags.Count > 0)
{
--_pos;
_tag.content.Remove(_tag.content.Length - 2, 1);
_builder.Remove(_builder.Length - 2, 1);
accept = true;
}
break;
case ' ':
if (_tag.name == null && !empty && _scriptTags.Count == 0)
_tag.name = _tag.content.ToString(1, _tag.content.Length - 2).ToLower();
break;
case '/':
if (empty)
{
endIndex = _tag.content.Length;
_tag.type = TagType.TagEnd;
}
break;
default:
empty = false;
break;
}
if (accept) break;
}
//noattribute tags do not need tag.name.
if (isScript)
_tag.name = SCRIPT;
else if (_tag.type == TagType.TagEnd && endIndex < _tag.content.Length && (_hiddenTags.Count > 0 || _scriptTags.Count > 0))
_tag.name = _tag.content.ToString(endIndex, _tag.content.Length - 1 - endIndex).ToLower();
if (_tag.content.Length > 2 && _tag.content[_tag.content.Length - 2] == '/' && _tag.content[_tag.content.Length - 1] == '>')
_tag.type = TagType.TagClosure;
if (_tag.name != null) ProcessTag();
}
void ProcessTag()
{
if (_tag.name == SCRIPT)
{
if (_tag.type == TagType.TagEnd)
_scriptTags.Pop();
else _scriptTags.Push(SCRIPT);
return;
}
if (_tag.type == TagType.TagBegin || _tag.type == TagType.TagClosure)
{
string v, uid;
int p;
string n = _tag.name.ToLower();
switch (n)
{
case "input":
v = GetTagAttributeValue(_typeAttribute, "type").ToLower();
_tag.type = TagType.TagClosure;
if (v == "hidden")
{
if (_tag.type == TagType.TagClosure)
_builder.Remove(_remove, _builder.Length - _remove);
else _hiddenTags.Push(_tag.name);
}
break;
case "link":
if (_hiddenTags.Count > 0) break;
v = GetTagAttributeValue(_hrefAttribute, "href", out p);
if (p >= 0)
{
if (!_hash.TryGetValue(v,out uid))
uid = Guid.NewGuid().ToString();
_builder.Replace(v, uid, p + _builderTageStartPos, v.Length);
LinkInfo lk = new LinkInfo();
lk.uri = v;
lk.id = uid;
lk.type = LinkType.Text;
AddLink(lk);
}
break;
case "img":
if (_hiddenTags.Count > 0) break;
v = GetTagAttributeValue(_srcAttribute, "src", out p);
if (p >= 0)
{
if (!_hash.TryGetValue(v, out uid))
uid = Guid.NewGuid().ToString();
_builder.Replace(v, uid, p + _builderTageStartPos, v.Length);
LinkInfo ig = new LinkInfo();
ig.uri = v;
ig.id = uid;
ig.type = LinkType.Binary;
AddLink(ig);
}
break;
}
}
else if (_tag.type == TagType.TagEnd)
{
if (_hiddenTags.Count > 0)
{
string s = _hiddenTags.Peek();
if (s == _tag.name)
{
_hiddenTags.Pop();
if (_hiddenTags.Count == 0)
_builder.Remove(_remove, _builder.Length - _remove);
}
}
}
}
void AddLink(LinkInfo link)
{
if (_hash.ContainsKey(link.uri)) return;
else _hash.Add(link.uri, link.id);
_links.Add(link);
}
string GetTagAttributeValue(Regex regex, string attr)
{
if (_tag.name == null) return string.Empty;
Match m = regex.Match(_tag.content.ToString());
if (m.Success)
{
Group g = m.Groups["AttrValue"];
if (g != null) return g.Value.ToLower();
}
return string.Empty;
}
string GetTagAttributeValue(Regex regex, string attr, out int pos)
{
pos = -1;
if (_tag.name == null) return string.Empty;
Match m = regex.Match(_tag.content.ToString());
if (m.Success)
{
Group g = m.Groups["AttrValue"];
if (g != null)
{
pos = g.Captures[0].Index;
return g.Value;
}
}
return string.Empty;
}
}
public enum TagType { TagBegin, TagEnd, TagClosure }
public class Tag
{
public string name;
public TagType type;
public StringBuilder content;
}
public enum LinkType { Text, Binary }
public class LinkInfo
{
public string uri;
public string id;
public LinkType type;
}
public class QuotedPrintableEncoding
{
private const byte EQUALS = 61;
private const byte CR = 13;
private const byte LF = 10;
private const byte SPACE = 32;
private const byte TAB = 9;
private const int CHAR_COUNT_PER_ENCODING = 512;
private static void InitBuffer(Encoding encoding, out int blockSize, out byte[] buf)
{
blockSize = encoding.GetMaxByteCount(CHAR_COUNT_PER_ENCODING);
buf = new byte[blockSize];
}
public static string Encode(StringBuilder builder)
{
return Encode(builder, "utf-8");
}
public static string Encode(string str)
{
return Encode(str, "utf-8");
}
public static string Encode(string str, string encodingType)
{
StringBuilder encoded = new StringBuilder();
string hex = string.Empty;
byte[] buffer;
int _bBlockSize;
Encoding encoding = Encoding.GetEncoding(encodingType);
InitBuffer(encoding, out _bBlockSize, out buffer);
int start = 0;
int len = 0;
while (start < str.Length && (len = encoding.GetBytes(str, start, Math.Min(CHAR_COUNT_PER_ENCODING, str.Length - start), buffer, 0)) > 0)
{
for (int i = 0; i < len; i++)
{
//these characters must be encoded
if ((buffer[i] < 33 || buffer[i] > 126 || buffer[i] == EQUALS)
&& buffer[i] != CR && buffer[i] != LF && buffer[i] != SPACE)
{
hex = buffer[i].ToString("X");
if (hex.Length < 2)
hex = "0" + hex;
encoded.Append("=" + hex);
}
else
{
if ((i + 1) < len)
{
//if TAB is at the end of the line - encode it!
if (buffer[i] == TAB && (buffer[i + 1] == LF || buffer[i + 1] == CR))
{
encoded.Append("=0" + buffer[i].ToString("X"));
}
//if SPACE is at the end of the line - encode it!
else if (buffer[i] == SPACE && (buffer[i + 1] == LF || buffer[i + 1] == CR))
encoded.Append("=" + buffer[i].ToString("X"));
else
encoded.Append(Convert.ToChar(buffer[i]));
}
else encoded.Append(Convert.ToChar(buffer[i]));
}
}
start += CHAR_COUNT_PER_ENCODING;
}
return encoded.ToString();
}
public static string Encode(StringBuilder builder, string encodingType)
{
StringBuilder encoded = new StringBuilder();
string hex = string.Empty;
byte[] buffer;
Encoding encoding = Encoding.GetEncoding(encodingType);
int _bBlockSize;
InitBuffer(encoding, out _bBlockSize, out buffer);
char[] _bBuf = new char[_bBlockSize];
int _bLen = builder.Length;
int _bStart = 0;
int _bRealLen, start, len;
while (_bStart < _bLen)
{
_bRealLen = Math.Min(_bBlockSize, _bLen - _bStart);
builder.CopyTo(_bStart, _bBuf, 0, _bRealLen);
_bStart += _bRealLen;
start = len = 0;
while (start < _bRealLen
&& (len = encoding.GetBytes(_bBuf, start, Math.Min(CHAR_COUNT_PER_ENCODING, _bRealLen - start), buffer, 0)) > 0)
{
for (int i = 0; i < len; i++)
{
//these characters must be encoded
if ((buffer[i] < 33 || buffer[i] > 126 || buffer[i] == EQUALS)
&& buffer[i] != CR && buffer[i] != LF && buffer[i] != SPACE)
{
hex = buffer[i].ToString("X");
if (hex.Length < 2)
hex = "0" + hex;
encoded.Append("=" + hex);
}
else
{
if ((i + 1) < len)
{
//if TAB is at the end of the line - encode it!
if (buffer[i] == TAB && (buffer[i + 1] == LF || buffer[i + 1] == CR))
{
encoded.Append("=0" + buffer[i].ToString("X"));
}
//if SPACE is at the end of the line - encode it!
else if (buffer[i] == SPACE && (buffer[i + 1] == LF || buffer[i + 1] == CR))
encoded.Append("=" + buffer[i].ToString("X"));
else
encoded.Append(Convert.ToChar(buffer[i]));
}
else encoded.Append(Convert.ToChar(buffer[i]));
}
}
start += CHAR_COUNT_PER_ENCODING;
}
}
return encoded.ToString();
}
public static string Decode(string chars)
{
return Decode(chars, "utf-8");
}
public static string Decode(string chars, string encodingType)
{
byte[] bytes = new byte[chars.Length];
int bytesCount = 0;
for (int i = 0; i < chars.Length; i++)
{
// if encoded character found decode it
if (chars[i] == '=')
{
bytes[bytesCount++] = Convert.ToByte(int.Parse(chars[i + 1].ToString() + chars[i + 2].ToString(), System.Globalization.NumberStyles.HexNumber));
i += 2;
}
else
{
bytes[bytesCount++] = Convert.ToByte(chars[i]);
}
}
return System.Text.Encoding.GetEncoding(encodingType).GetString(bytes, 0, bytesCount);
}
}
使用方法:
MHtmlDocument doc=new MHtmlDocument();
doc.LoadFromUrl("http://www.baidu.com");
doc.ExportWord();
//doc.ExportExcel();