用正则表达式解析BT网页到本地数据集


教育网的5Q网站有很多的资源,其中包括很多讲座学习资料,天天上觉得不方便,于是写了一个小软件来解析它。加入XtraGrid控件,查看方便,希望大家多给意见!
waxwork3@163.com

界面

正则解析源码:
ContractedBlock.gif ExpandedBlockStart.gif
None.giftry 
ExpandedBlockStart.gifContractedBlock.gif            
dot.gif{
InBlock.gif                
this.m_WaitDialogForm.Caption = "Geting pagedot.gif";
InBlock.gif                
this.GetPage();
InBlock.gif
InBlock.gif                
this.m_WaitDialogForm.Caption = "Parsing datadot.gif";
InBlock.gif                
string regex = @"(?<PublishTime>\d\d-\d\d\s\d\d:\d\d)
InBlock.gif                                    .*\n.*0066CC>
InBlock.gif                                    (?<Catalog>[^<]*)
InBlock.gif                                    .*\n.*red>
InBlock.gif                                    (?<Seed>[^<]*)
InBlock.gif                                    .*\n.*green>
InBlock.gif                                    (?<Connection>[^<]*)
InBlock.gif                                    .*\n.*?""
InBlock.gif                                    (?<SeedLink>[^""]*)
InBlock.gif                                    .*?
InBlock.gif                                    (?<Content>【.*?)
InBlock.gif                                    </.*\n.*?""
InBlock.gif                                    (?<Seed_1>[^""]*)
InBlock.gif                                    "".*?""
InBlock.gif                                    (?<Seed_2>[^""]*)
InBlock.gif                                    "".*?""
InBlock.gif                                    (?<Seed_3>[^""]*)
InBlock.gif                                    "".*?""
InBlock.gif                                    (?<Detail>[^""]*)
InBlock.gif                                    .*\n.*>
InBlock.gif                                    (?<Capacity>.*\..*?)
InBlock.gif                                    <.*\n.*>
InBlock.gif                                    (?<Completed>
InBlock.gif                                    .*)<
InBlock.gif                                    
";
InBlock.gif                System.Text.RegularExpressions.RegexOptions options 
= ((System.Text.RegularExpressions.RegexOptions.IgnorePatternWhitespace | System.Text.RegularExpressions.RegexOptions.Multiline) 
InBlock.gif                    
| System.Text.RegularExpressions.RegexOptions.IgnoreCase);
InBlock.gif                System.Text.RegularExpressions.Regex reg 
= new System.Text.RegularExpressions.Regex(regex, options);
InBlock.gif
InBlock.gif                Match MatchResults 
= reg.Match(this.m_SubjectString);
InBlock.gif                
while (MatchResults.Success) 
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    DataRow drw 
= this.m_dsBT.BT.Rows.Find(MatchResults.Groups["Content"].Value.GetHashCode());
InBlock.gif                    
if(drw != null)
ExpandedSubBlockStart.gifContractedSubBlock.gif                    
dot.gif{
InBlock.gif                        drw[
"Seed"= Convert.ToInt32(MatchResults.Groups["Seed"].ToString());
InBlock.gif                        drw[
"Connection"= Convert.ToInt32(MatchResults.Groups["Connection"].ToString());;
InBlock.gif                        MatchResults 
= MatchResults.NextMatch();
InBlock.gif                        
continue;
ExpandedSubBlockEnd.gif                    }

InBlock.gif
InBlock.gif                    dsBT.BTRow row 
= tbl.NewBTRow();
InBlock.gif                    
try
ExpandedSubBlockStart.gifContractedSubBlock.gif                    
dot.gif{
InBlock.gif                        row.BeginEdit();
InBlock.gif                    
InBlock.gif                        row.Content 
= MatchResults.Groups["Content"].ToString();
InBlock.gif                        row.ContentHashCode 
= MatchResults.Groups["Content"].Value.GetHashCode();
InBlock.gif                        row.PublishTime 
= Convert.ToDateTime(DateTime.Now.Year + "-" + MatchResults.Groups["PublishTime"].ToString());
InBlock.gif                        row.Catalog 
= MatchResults.Groups["Catalog"].ToString();
InBlock.gif                        row.Seed 
= Convert.ToInt32(MatchResults.Groups["Seed"].ToString());
InBlock.gif                        row.Connection 
= Convert.ToInt32(MatchResults.Groups["Connection"].ToString());
InBlock.gif                        row.SeedLink 
= MatchResults.Groups["SeedLink"].ToString();
InBlock.gif                        row.Detail 
= MatchResults.Groups["Detail"].ToString();
InBlock.gif                        row.Completed 
= Convert.ToInt32(MatchResults.Groups["Completed"].ToString());
InBlock.gif                        row.History 
= 0;
InBlock.gif
InBlock.gif                        strTemp 
= MatchResults.Groups["Capacity"].ToString();
InBlock.gif                        
if(strTemp.IndexOf("M"- 1 > 0)
ExpandedSubBlockStart.gifContractedSubBlock.gif                        
dot.gif{
InBlock.gif                            strTemp 
= strTemp.Substring(0, strTemp.Length - 3 > 0 ? strTemp.Length - 3 : 0);
InBlock.gif                            row.Capacity 
= (int)Convert.ToSingle(strTemp);
ExpandedSubBlockEnd.gif                        }

InBlock.gif                        
else
ExpandedSubBlockStart.gifContractedSubBlock.gif                        
dot.gif{
InBlock.gif                            strTemp 
= strTemp.Substring(0, strTemp.Length - 3 > 0 ? strTemp.Length - 3 : 0);
InBlock.gif                            row.Capacity 
= (int)(Convert.ToSingle(strTemp) * 1024);
ExpandedSubBlockEnd.gif                        }

InBlock.gif
InBlock.gif                        row.EndEdit();
InBlock.gif                        tbl.Rows.Add(row);
ExpandedSubBlockEnd.gif                    }

InBlock.gif                    
catch(Exception ex)
ExpandedSubBlockStart.gifContractedSubBlock.gif                    
dot.gif{
InBlock.gif                        
this.comboBox1.SelectedIndex = this.comboBox1.SelectedIndex < 4 ? this.comboBox1.SelectedIndex + 1 : 0;
InBlock.gif                        frmMessage message 
= new frmMessage(ex.Message);
InBlock.gif                        message.Show();
InBlock.gif                        
//                        MessageBox.Show(ex.Message);
ExpandedSubBlockEnd.gif
                    }

InBlock.gif                    MatchResults 
= MatchResults.NextMatch();
ExpandedSubBlockEnd.gif                }
 
InBlock.gif                
InBlock.gif                
if(tbl.Rows.Count > 0)
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    
foreach(DataRow row in this.m_dsBT.BT.Rows)
ExpandedSubBlockStart.gifContractedSubBlock.gif                    
dot.gif{
InBlock.gif                        row[
"History"= (int)row["History"+ 1;
ExpandedSubBlockEnd.gif                    }

InBlock.gif                    
this.Text = "上次更新时间:" + DateTime.Now.ToString() + " | 导入" + tbl.Rows.Count + "条数据";
InBlock.gif                    
this.m_WaitDialogForm.Close();
InBlock.gif                    
this.m_dsBT.Merge(tbl);
InBlock.gif                    
this.gridView1.BestFitColumns();
ExpandedSubBlockEnd.gif                }

InBlock.gif                
else
ExpandedSubBlockStart.gifContractedSubBlock.gif                
dot.gif{
InBlock.gif                    
this.Text = "上次更新时间:" + DateTime.Now.ToString();
InBlock.gif                    
this.m_WaitDialogForm.Close();                    
ExpandedSubBlockEnd.gif                }

ExpandedBlockEnd.gif            }
 
None.gif            
catch (Exception ex) 
ExpandedBlockStart.gifContractedBlock.gif            
dot.gif{
InBlock.gif                
this.comboBox1.SelectedIndex = this.comboBox1.SelectedIndex < 4 ? this.comboBox1.SelectedIndex + 1 : 0;
InBlock.gif                
this.m_WaitDialogForm.Close();
InBlock.gif                frmMessage message 
= new frmMessage(ex.Message);
InBlock.gif                message.Show();
InBlock.gif                
//                MessageBox.Show(ex.Message);
ExpandedBlockEnd.gif
            }

程序下载

转载于:https://www.cnblogs.com/waxwork3/archive/2006/02/24/336649.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值