紧接着上一篇博客,通常我们爬虫不知道对方的ip,只知道它们的url,先把客户端代码修改如下:
public string Get(string url, params Encoding[] encoding)
{
string responseText = string.Empty;
_statu = HttpRequestStatus.Busy;
try
{
Regex reg = new Regex("(http://)?(?<name>[^/?]+)");
Match m = reg.Match(url);
var hostName = m.Groups["name"].Value;
IPHostEntry hosts = Dns.GetHostByName(hostName);
if (!url.StartsWith("http://"))
url = "http://" + hostName + "/";
if (!url.EndsWith("/"))
url += "/";
IPEndPoint ipPoint = new IPEndPoint(hosts.AddressList[0], 80);
Socket socket = new Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp);//创建Socket
socket.Connect(ipPoint);
///向服务器发送信息
//{GET /index.php HTTP/1.0Content-Type: application/x-www-form-urlencoded
StringBuilder bufRequest = new StringBuilder();
bufRequest.Append("GET ").Append(url).Append(" HTTP/1.0\r\n");
bufRequest.Append("Content-Type: application/x-www-form-urlencoded\r\n");
bufRequest.Append("\r\n");
string requestContent = bufRequest.ToString();
byte[] bs = Encoding.ASCII.GetBytes(requestContent);
socket.Send(bs);//发送信息