Imports System.Text.RegularExpressions
Imports System.Text
Imports System.Net.Sockets
Module Module1
Sub Main()
Console.WriteLine(HtmlHelp.Get("www.baidu.com", System.Text.Encoding.UTF8))
Console.ReadKey()
End Sub
Public Class HtmlHelp
''' <summary>
''' 发出请求并获取响应
''' </summary>
''' <returns></returns>
Private Shared Function GetResponse(ByVal host As String, ByVal port As Integer, ByVal body As String, ByVal encode As Encoding) As String
Dim strResult As String = String.Empty
Dim bteSend As Byte() = Encoding.ASCII.GetBytes(body)
Dim bteReceive As Byte() = New Byte(1023) {}
Dim intLen As Integer = 0
Using socket As New Socket(AddressFamily.InterNetwork, SocketType.Stream, ProtocolType.Tcp)
Try
socket.ReceiveTimeout = 5000 '超时时间
socket.Connect(host, port)
If socket.Connected Then
socket.Send(bteSend, bteSend.Length, 0)
While (InlineAssignHelper(intLen, socket.Receive(bteReceive, bteReceive.Length, 0))) > 0
strResult += encode.GetString(bteReceive, 0, intLen)
End While
End If
socket.Close()
Catch ex As Exception
Console.WriteLine(ex.Message)
'这里处理错误
'strResult中存储了获取到的网页html
End Try
End Using
Return strResult
End Function
''' <summary>
''' 解析URL
''' </summary>
''' <returns></returns>
Private Shared Function ParseURL(ByVal url As String) As UrlInfo
Dim urlInfo As New UrlInfo()
Dim strTemp As String() = Nothing
urlInfo.Host = ""
urlInfo.Port = 80
urlInfo.File = "/"
urlInfo.Body = ""
Dim intIndex As Integer = url.ToLower().IndexOf("http://")
If intIndex <> -1 Then
url = url.Substring(7)
intIndex = url.IndexOf("/")
If intIndex = -1 Then
urlInfo.Host = url
Else
urlInfo.Host = url.Substring(0, intIndex)
url = url.Substring(intIndex)
intIndex = urlInfo.Host.IndexOf(":")
If intIndex <> -1 Then
strTemp = urlInfo.Host.Split(":"c)
urlInfo.Host = strTemp(0)
Integer.TryParse(strTemp(1), urlInfo.Port)
End If
intIndex = url.IndexOf("?")
If intIndex = -1 Then
urlInfo.File = url
Else
strTemp = url.Split("?"c)
urlInfo.File = strTemp(0)
urlInfo.Body = strTemp(1)
End If
End If
End If
Return urlInfo
End Function
''' <summary>
''' GET请求
''' </summary>
''' <returns></returns>
Public Shared Function [Get](ByVal url As String, ByVal encode As Encoding) As String
Dim urlInfo As UrlInfo = ParseURL(url)
Dim strRequest As String = String.Format("GET {0}?{1} HTTP/1.1" & vbCr & vbLf & "Host:{2}:{3}" & vbCr & vbLf & "Connection:Close" & vbCr & vbLf & vbCr & vbLf, urlInfo.File, urlInfo.Body, urlInfo.Host, urlInfo.Port.ToString())
Return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode)
End Function
''' <summary>
''' POST请求
''' </summary>
''' <returns></returns>
Public Shared Function Post(ByVal url As String, ByVal encode As Encoding) As String
Dim urlInfo As UrlInfo = ParseURL(url)
Dim strRequest As String = String.Format("POST {0} HTTP/1.1" & vbCr & vbLf & "Host:{1}:{2}" & vbCr & vbLf & "Content-Length:{3}" & vbCr & vbLf & "Content-Type:application/x-www-form-urlencoded" & vbCr & vbLf & "Connection:Close" & vbCr & vbLf & vbCr & vbLf & "{4}", urlInfo.File, urlInfo.Host, urlInfo.Port.ToString(), urlInfo.Body.Length, urlInfo.Body)
Return GetResponse(urlInfo.Host, urlInfo.Port, strRequest, encode)
End Function
Private Shared Function InlineAssignHelper(Of T)(ByRef target As T, ByVal value As T) As T
target = value
Return value
End Function
''' <summary>
''' Url结构
''' </summary>
Private Structure UrlInfo
Public Host As String
Public Port As Integer
Public File As String
Public Body As String
End Structure
End Class
End Module