因为需要,自己写了个批量查询qs的小软件。从网站中抓出需要的数据,格式化显示:
对字符串进行检测处理,先用Replace函数去掉字符串的空格,再用正则表达式匹配,返回匹配的字符串,如果没有匹配,则返回空字符串:
//检测输入字符是否合法
private string CheckText(string waitregexstr)
{
waitregexstr = waitregexstr.Replace(" ", "");
string qsregex = "[qQ][sS][0-9]{12}";
return Regex.Match(waitregexstr,qsregex).Value;
}
获取网页内容。这部分我还是不太会,拿了别人的代码。但它就是用用HttpWebRequest和HttpWebResponse的各个函数与stream来获取网页内容。
#region GetWebContent 根据url返回网页编码
HttpWebRequest httpReq;
HttpWebResponse httpResp;
string strBuff = "";
char[] cbuffer = new char[256];
int byteRead = 0;
//string filename = @"c:\log.txt";
///定义写入流操作
public string GetWebContent(string url)
{
url = url.Replace("\r\n", "");
Uri httpURL = new Uri(url);
///HttpWebRequest类继承于WebRequest,并没有自己的构造函数,需通过WebRequest的Creat方法 建立,并进行强制的类型转换
httpReq = (HttpWebRequest)WebRequest.Create(httpURL);
///通过HttpWebRequest的GetResponse()方法建立HttpWebResponse,强制类型转换
httpResp = (HttpWebResponse)httpReq.GetResponse();
///GetResponseStream()方法获取HTTP响应的数据流,并尝试取得URL中所指定的网页内容
///若成功取得网页的内容,则以System.IO.Stream形式返回,若失败则产生ProtoclViolationException错 误。在此正确的做法应将以下的代码放到一个try块中处理。这里简单处理
Stream respStream = httpResp.GetResponseStream();
///返回的内容是Stream形式的,所以可以利用StreamReader类获取GetResponseStream的内容,并以
//StreamReader类的Read方法依次读取网页源程序代码每一行的内容,直至行尾(读取的编码格式:UTF8)
StreamReader respStreamReader = new StreamReader(respStream, Encoding.UTF8);
byteRead = respStreamReader.Read(cbuffer, 0, 256);
while (byteRead != 0)
{
string strResp = new string(cbuffer, 0, byteRead);
strBuff = strBuff + strResp;
byteRead = respStreamReader.Read(cbuffer, 0, 256);
}
respStream.Close();
return strBuff;
}
#endregion
定义从网页内容中获取特定字符的函数。调用获取网页源码后,对字符串进行多次处理,首先用正则匹配regex.Match匹配到需要的字符串,再用Replace处理函数,得到自己所要显示的函数,这部分如果有大牛有更好的方案,请指教,感谢!
private string operastr( string textboxtext)
{
try
{
string url = "避嫌,这里去掉了网址" + textboxtext.ToUpper() + ".htm";
string webcontent = GetWebContent(url);
string pattern = @"<h3>[\s\S]*?</table>";
string butifulwoman = "";
Regex regex = new Regex(pattern);
if (regex.IsMatch(webcontent))
{
int flag = 0;
string fatwoman = regex.Match(webcontent).Value;
butifulwoman = fatwoman.Replace("<td class=\"info-field\"><div>", "\r\n").Replace("</div></td>", "").Replace("<td class=\"info-value\"><div style=\"\">", ":").Replace("\n", "").Replace("</tr>", "").Replace("<tr>", "").Replace("</tbody></table>", "").Replace("<table class=\"info-table\"><tbody>", "").Replace("<h3>", "").Replace("</h3>", "").Replace(" ", "");
}
loadingGrid.IsOpen = false;
return butifulwoman;
}
catch (Exception ex)
{
return "";
}
}
定义多行字符串的处理方法。根据TextBox的LineCount属性与getLinetext函数,对字符串进行处理
//多行字符串的处理
private void MutilpleStr(TextBox tb,Label lb)
{
int k = 0;
string linestr="";
for (int i = 0; i <tb.LineCount; i++)
{
linestr = CheckText(tb.GetLineText(i));
if (linestr == "")
{
wrong("qs格式错误!");
return;
}
else
{
if (lb.Content==null)
lb.Content = operastr(tb.GetLineText(i));
else
{
lb.Content += "\r\n------------------------------------";
lb.Content += "\r\n" + operastr(tb.GetLineText(i));
}
}
}
}
主体。根据用户输入的qs编号行数,分开一行与多行处理,
if (mutipleText.Text == "")
{
wrong("文本框不能为空!");
// mutipleText.Style =Style("mytextbox");
}
else if (this.mutipleText.LineCount>1)
{
loadingGrid.IsOpen = true;
mutipleText.BorderBrush = Brushes.White;
MutilpleStr(mutipleText,my);
}
else if(this.mutipleText.LineCount==1)
{
if (CheckText(mutipleText.Text).Length < 1)
{
wrong("请输入正确的qs!");
return;
}
mutipleText.BorderBrush = Brushes.White;
string text = mutipleText.Text.Replace(" ", "");
string myneedstr = operastr(text);
my.Content = myneedstr;
}
最新测试,抓取的数据太快会导致返回的qs全部是同一个,因为抓取的速度太快了,有空我改善下
项目已经上传: http://files.cnblogs.com/files/ssvip/qs.rar