由于返回的Stream只能Read,所以转换为二进制流麻烦一些,并且考虑到AMF中流结束的标记为00 00 09,所以写了如下方法得到返回的二进制流
/// <summary>将 Stream 转成 byte[]/// </summary>
public static byte[] StreamToBytes(Stream stream)
{
List<byte> bytes = new List<byte>();
byte b = (byte)stream.ReadByte();
byte b1 = 0, b2 = 0, b3 = 0;
while (true)
{
if (b1 == 0 && b2 == 0 && b3 == 9)
{
break;
}
b1 = b2;
b2 = b3;
b3 = b;
bytes.Add(b3);
b = (byte)stream.ReadByte();
}
return bytes.ToArray();
}
/// <summary> /// 将 Stream 转成 byte[] /// </summary>
public static byte[] StreamToBytes(Stream stream)
{
List<byte> bytes = new List<byte>();
byte b = (byte)stream.ReadByte();
yte b1 = 0, b2 = 0, b3 = 0;
while (true)
{
if (b1 == 0 && b2 == 0 && b3 == 9)
{
break;
}
b1 = b2;
b2 = b3;
b3 = b;
bytes.Add(b3);
b = (byte)stream.ReadByte();
}
return bytes.ToArray();
}
对返回数据的分析第一篇已经说过了,考虑到分析二进制数据比较麻烦,而实际上我们需要的数据就在userIndexes和mediaIndexes之间,所以我用了如下的方法返回最后7天的百度指数
private static List<int> GetIndexs(byte[] data)
{
if (data == null)
{
return null;
}
List<int> result = new List<int>();
string str = Encoding.UTF8.GetString(data);
string start = "userIndexes";
string end = "mediaIndexes";
int startIndex = str.IndexOf(start) + start.Length;
int endIndex = str.IndexOf(end);
string[] temp = str.Substring(startIndex, endIndex - startIndex).Split(',');
result.Add(StringToInt(temp[temp.Length - 1]));
int number = 0;
for (int index = temp.Length - 2; index > temp.Length - 8; index--)
{
int.TryParse(temp[index], out number);
result.Add(number);
}
return result;
//以下为测试代码
//int count = 0;
//foreach (byte item in data)
//{
// Log.Write(item.ToString("X2"));
// Log.Write(" ");
// count++;
// if (count == 16)
// {
// count = 0;
// Log.Write(System.Environment.NewLine);
// }
//}
}
//最后一天的数据由于和mediaIndexes中有包含数据类型长度的字节,特殊处理下
public static int StringToInt(string str)
{
int result = 0;
int number = 0;
for (int i = 0; i < str.Length; i++)
{
number = str[i] - '0';
if (number >= 0 && number <= 9)
{
result = result * 10 + number;
}
else
{
break;
}
}
return result;
}
public static List<int> GetIndexs(byte[] data)
{
if (data == null)
{
return null;
}
List<int> result = new List<int>();
string str = Encoding.UTF8.GetString(data);
string start = "userIndexes";
//string end = "mediaIndexes";
int startIndex = str.IndexOf(start) + start.Length;
//int endIndex = str.IndexOf(end);
//string[] temp = str.Substring(startIndex, endIndex - startIndex).Split(',');
string[] temp = str.Substring(startIndex, data.Length - startIndex).Split(',');
result.Add(StringToInt(temp[temp.Length - 1]));
int number = 0;
for (int index = temp.Length - 2; index > temp.Length - 8; index--)
{
int.TryParse(temp[index], out number);
result.Add(number);
}
return result;
//以下为测试代码
//int count = 0;
//foreach (byte item in data)
//{
// Log.Write(item.ToString("X2"));
// Log.Write(" ");
// count++;
// if (count == 16)
// {
// count = 0;
// Log.Write(System.Environment.NewLine);
// }
//}
}
下面是主函数的调用
private void button1_Click(object sender, EventArgs e)
{
string keyword = "李刚";
string start = "2010-10-20";
string end = "2010-10-25";
List<int> result = Run(keyword, start, end);
foreach (int item in result)
{
//Console.Write(item + " ");
MessageBox.Show(item + " ");
}
}
public static List<int> Run(string keyword, string start, string end)
{
byte[] data = Post.GetData(keyword, start, end);
string gateway = "http://index.baidu.com/gateway.php";
byte[] responseData = Post.GetFlashData(gateway, data);
if (responseData == null)
{
return null;
}
List<int> result = Post.GetIndexs(responseData);
return result;
}
如果还是无法得到的,可以留言我会提供代码样例
最后,实际上,以上的算法效率并不高,百度指数可以同时查询三个关键词,有兴趣的朋友可以研究下处理的方式。