public static class taobao_message
{
/// <summary>
/// 读页面的byte转化为string
/// </summary>
/// <param name="url">地址</param>
/// <returns></returns>
public static string webclinet_content(string url)
{
url = url.Replace("http://", "");
System.Net.WebClient client = new WebClient();
byte[] page = client.DownloadData("http://" + url);
string content = System.Text.Encoding.GetEncoding("GB2312").GetString(page);//淘宝的页面编码为gb2312
return content;
}
/// <summary>
/// 读淘宝宝贝的信息
/// 数组内容{宝贝图片地址,宝贝标题,宝贝价格}
/// </summary>
/// <param name="url">宝贝地址</param>
/// <returns></returns>
public static string[] baobei_mess(string url)
{
string content = webclinet_content(url);
string baobei_img =get_taobao(content,1);
string baobei_title = get_taobao(content,2);
string baobei_price = get_taobao(content,3);
string[] arry1 = {baobei_img,baobei_title,baobei_price};
return arry1;;
}
/// <summary>
/// 特定标签内容提取
/// </summary>
/// <param name="content">提取的字符串</param>
/// <param name="type">验证类型0:空; 1:淘宝贝图片;2:宝贝标题;3:宝贝价格;</param>
/// <returns></returns>
public static string get_taobao(string content, int type)
{
string result = "";
string reg = "";
switch (type)
{
case 0: return "";
case 1: reg = @"J_ImgBooth\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>"; break;
case 2: reg = "<h3>(<a[^>]*>)?([^<]*)(</a>)?</h3>"; break;
case 3: reg = "J_StrPrice[^>]*>([^<>]*)(</)"; break;
}
string regex = reg;
Regex re = new Regex(regex);
MatchCollection matches = re.Matches(content);
System.Collections.IEnumerator enu = matches.GetEnumerator();
switch (type)
{
case 0: return "";
case 1:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups["imgUrl"];
} break;
case 2:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[2];
} break;
case 3:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[1];
} break;
}
return result;
}
}
{
/// <summary>
/// 读页面的byte转化为string
/// </summary>
/// <param name="url">地址</param>
/// <returns></returns>
public static string webclinet_content(string url)
{
url = url.Replace("http://", "");
System.Net.WebClient client = new WebClient();
byte[] page = client.DownloadData("http://" + url);
string content = System.Text.Encoding.GetEncoding("GB2312").GetString(page);//淘宝的页面编码为gb2312
return content;
}
/// <summary>
/// 读淘宝宝贝的信息
/// 数组内容{宝贝图片地址,宝贝标题,宝贝价格}
/// </summary>
/// <param name="url">宝贝地址</param>
/// <returns></returns>
public static string[] baobei_mess(string url)
{
string content = webclinet_content(url);
string baobei_img =get_taobao(content,1);
string baobei_title = get_taobao(content,2);
string baobei_price = get_taobao(content,3);
string[] arry1 = {baobei_img,baobei_title,baobei_price};
return arry1;;
}
/// <summary>
/// 特定标签内容提取
/// </summary>
/// <param name="content">提取的字符串</param>
/// <param name="type">验证类型0:空; 1:淘宝贝图片;2:宝贝标题;3:宝贝价格;</param>
/// <returns></returns>
public static string get_taobao(string content, int type)
{
string result = "";
string reg = "";
switch (type)
{
case 0: return "";
case 1: reg = @"J_ImgBooth\b[^<>]*?\bsrc[\s\t\r\n]*=[\s\t\r\n]*[""']?[\s\t\r\n]*(?<imgUrl>[^\s\t\r\n""'<>]*)[^<>]*?/?[\s\t\r\n]*>"; break;
case 2: reg = "<h3>(<a[^>]*>)?([^<]*)(</a>)?</h3>"; break;
case 3: reg = "J_StrPrice[^>]*>([^<>]*)(</)"; break;
}
string regex = reg;
Regex re = new Regex(regex);
MatchCollection matches = re.Matches(content);
System.Collections.IEnumerator enu = matches.GetEnumerator();
switch (type)
{
case 0: return "";
case 1:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups["imgUrl"];
} break;
case 2:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[2];
} break;
case 3:
while (enu.MoveNext() && enu.Current != null)
{
Match match = (Match)(enu.Current);
result += match.Groups[1];
} break;
}
return result;
}
}