protected void Button1_Click(object sender, EventArgs e)
{
byte[] b = new byte[39824];
string url = this.TextBox1.Text.Trim();
HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
Stream streamReceive = response.GetResponseStream();
Encoding encoding = Encoding.GetEncoding("UTF-8");
StreamReader streamReader = new StreamReader(streamReceive, encoding);
string strResult = streamReader.ReadToEnd();
streamReader.Close();
string aa= stripHtml(strResult).Replace("\n","");
string strNO = cutstr(aa, "注册号:", "企业类型");
string strType = cutstr(aa, "企业类型:", "企业分类");
string strMain = cutstr(aa, "主体名称:", "法定代表人");
string strPople = cutstr(aa, "法定代表人/负责人:", "行政区划");
string strArea = cutstr(aa, "行政区划:", "成立日期");
string strDate = cutstr(aa, "成立日期:", "注册资本");
string strMoney = cutstr(aa, "注册资本:", "经营期限自");
string strStart = cutstr(aa, "经营期限自:", "经营期限至");
string strOver = cutstr(aa, "经营期限至:", "登记机关");
string strRegistration = cutstr(aa, "登记机关:", "企业状态");
string strStates = cutstr(aa, "企业状态:", "地址/住所");
string strAddress = cutstr(aa, "地址/住所:", "经营范围");
string strRange = cutstr(aa, "经营范围:", "许可经营范围");
string strCheck = cutstr(aa, "年检年度:", "年检结果");
string straaa = cutstr(aa, "年检结果:", "公示信息");
//导出EXCEL
string fPath = @"E:\website\模板.xlsx";
string mystring = "Provider=Microsoft.ACE.OLEDB.12.0;Extended Properties='Excel 12.0;HDR=YES';data source=" + fPath;//读EXCEL2003/excel2007
OleDbConnection cnnxls = new OleDbConnection(mystring);
cnnxls.Open();
string sql = "insert into [Sheet1$] (功能区编号,街道编号,企业名称,法人代表,工商注册号,企业性质,注册资金(万元),币种,注册时间,所属行业)values('1','1','" + strMain + "','" + strPople + "','" + strNO + "','" + strType + "','" + strMoney + "','" + "人民币" + "','" + strDate + "','" + strRange + "')";
OleDbCommand command = new OleDbCommand(sql, cnnxls);
command.ExecuteNonQuery();
cnnxls.Close();
}
//取出所有html标签
private string stripHtml(string strHtml)
{
Regex objRegExp = new Regex("<(.|\n)+?>");
string strOutput = objRegExp.Replace(strHtml, "");
strOutput = strOutput.Replace("<", "<");
strOutput = strOutput.Replace(">", ">");
return strOutput;
}
//采集字符
protected string cutstr(string str, string bs, string es)
{
//函数cutstr
//功能切割字符串
//参数说明str,需要切割的字符串,bs,开始字符串,es,结束字符串
//由于我们要使用正则,所以要添加引用using System.Text.RegularExpressions;
Match tempstr = Regex.Match(str, "(" + bs + ").+?(" + es + ")");
//这是很普通的正则,我也解释不明白....
string temp2 = tempstr.ToString();
if (temp2.Length != 0)
{
string temp3 = temp2.Substring(bs.Length, temp2.Length - bs.Length - es.Length);
//此次将取得的字符串的开始结束字符串去掉
return temp3;
}
else
{
return "采集失败!!!";
}
//OK,此段看完可以返回page_load继续了...
}
转载于:https://www.cnblogs.com/VirtualLive/archive/2009/08/27/1555004.html