我遇到问题。我有一个用不同类型的问题(列表,复选框,下拉列表)编写的调查生成器,还有一个需要填写的表格。现在的问题是存储表我有3个不同的表在我的数据库,以后创建表有点复杂。我试图做到这一点,使用JavaScript编辑器,以便用户可以在Word中创建表。在我的代码后面,我将表格标记作为字符串接收,并且我想将它作为JSON存储在数据库中。在ASP.NET C中将JSON解析为HTML#
我在这里有此代码将表转换为DataSet对象,然后从这里我可以转换为XML,并从这里转换为JSON使用库JSON.NET。所有这一切都工作正常,但问题是当表具有“colspan”&“rowspan”参数,不与他们合作。你能帮我完成这段代码,这样每个人都可以用这个例子来解析C#中的HTML表格到JSON。
protected void Page_Load(object sender, EventArgs e)
{
string table = @"
Project | Country | Header 1 | Header 2 | |||
---|---|---|---|---|---|---|
Child Header 1 | Child Header 2 | Child Header 3 | Child Header 4 | |||
Child Child Header 1 | tee | ssss | ||||
aas | ||||||
EUS | ES | |||||
ARP | IE | |||||
ARM | UK | |||||
SMRT | US | |||||
CM |
DataSet dataSet = HtmlTableParser.ParseDataSet(table);
StringWriter sw = new StringWriter();
dataSet.WriteXml(sw, XmlWriteMode.IgnoreSchema);
XmlDocument xd = new XmlDocument();
xd.LoadXml(sw.ToString());
string jsonText = JsonConvert.SerializeXmlNode(xd).Replace("_x0020_", " ");
}
///
/// HtmlTableParser parses the contents of an html string into a System.Data DataSet or DataTable.
///
public class HtmlTableParser
{
private const RegexOptions ExpressionOptions = RegexOptions.Singleline | RegexOptions.Multiline | RegexOptions.IgnoreCase;
private const string CommentPattern = "";
private const string TablePattern = "
";private const string HeaderPattern = "
]*>(.*?)";private const string RowPattern = "
]*>(.*?)";private const string CellPattern = "
]*>(.*?)";///
/// Given an HTML string containing n table tables, parse them into a DataSet containing n DataTables.
///
/// An HTML string containing n HTML tables
/// A DataSet containing a DataTable for each HTML table in the input HTML
public static DataSet ParseDataSet(string html)
{
DataSet dataSet = new DataSet();
MatchCollection tableMatches = Regex.Matches(
WithoutComments(html),
TablePattern,
ExpressionOptions);
foreach (Match tableMatch in tableMatches)
dataSet.Tables.Add(ParseTable(tableMatch.Value));
return dataSet;
}
///
/// Given an HTML string containing a single table, parse that table to form a DataTable.
///
/// An HTML string containing a single HTML table
/// A DataTable which matches the input HTML table
public static DataTable ParseTable(string tableHtml)
{
string tableHtmlWithoutComments = WithoutComments(tableHtml);
DataTable dataTable = new DataTable();
MatchCollection rowMatches = Regex.Matches(
tableHtmlWithoutComments,
RowPattern,
ExpressionOptions);
dataTable.Columns.AddRange(tableHtmlWithoutComments.Contains("
? ParseColumns(tableHtml)
: GenerateColumns(rowMatches));
ParseRows(rowMatches, dataTable);
return dataTable;
}
///
/// Strip comments from an HTML stirng
///
/// An HTML string potentially containing comments
/// The input HTML string with comments removed
private static string WithoutComments(string html)
{
return Regex.Replace(html, CommentPattern, string.Empty, ExpressionOptions);
}
///
/// Add a row to the input DataTable for each row match in the input MatchCollection
///
/// A collection of all the rows to add to the DataTable
/// The DataTable to which we add rows
private static void ParseRows(MatchCollection rowMatches, DataTable dataTable)
{
foreach (Match rowMatch in rowMatches)
{
// if the row contains header tags don't use it - it is a header not a row
if (!rowMatch.Value.Contains("
{
DataRow dataRow = dataTable.NewRow();
MatchCollection cellMatches = Regex.Matches(
rowMatch.Value,
CellPattern,
ExpressionOptions);
for (int columnIndex = 0; columnIndex < cellMatches.Count; columnIndex++)
dataRow[columnIndex] = cellMatches[columnIndex].Groups[1].ToString();
dataTable.Rows.Add(dataRow);
}
}
}
///
/// Given a string containing an HTML table, parse the header cells to create a set of DataColumns
/// which define the columns in a DataTable.
///
/// An HTML string containing a single HTML table
/// A set of DataColumns based on the HTML table header cells
private static DataColumn[] ParseColumns(string tableHtml)
{
MatchCollection headerMatches = Regex.Matches(
tableHtml,
HeaderPattern,
ExpressionOptions);
return (from Match headerMatch in headerMatches
select new DataColumn(headerMatch.Groups[1].ToString())).ToArray();
}
///
/// For tables which do not specify header cells we must generate DataColumns based on the number
/// of cells in a row (we assume all rows have the same number of cells).
///
/// A collection of all the rows in the HTML table we wish to generate columns for
/// A set of DataColumns based on the number of celss in the first row of the input HTML table
private static DataColumn[] GenerateColumns(MatchCollection rowMatches)
{
int columnCount = Regex.Matches(
rowMatches[0].ToString(),
CellPattern,
ExpressionOptions).Count;
return (from index in Enumerable.Range(0, columnCount)
select new DataColumn("Column " + Convert.ToString(index))).ToArray();
}
}
+0
我得到的代码解析DataSet到XML从:http://kw-share2learn.blogspot.com/2011/01/convert-dataset-to-json-string-and-vice.html –
+0
存储有什么问题db中的html标记? –
+0
不是我想找的那么干净。以后我可以使用JSON作为对象,并且更容易使用它。 –