用MASM32编程读取网站首页代码
记得在某个汇编论坛上有网友讨论如何获取一个网站的首页文件名,是index.htm,index.html,index.asp,还是……
于是动手写了这个程序读取网站的返回信息来做测试,发现有些网站的返回信息中的Content-Location值包含了首页文件名,如www.hcny.gov.cn:
/---
HTTP/1.1 200 OK
Content-Length: 34432
Content-Type: text/html
Content-Location: http://www.hcny.gov.cn/index.htm
Last-Modified: Wed, 05 Dec 2007 02:59:18 GMT
Accept-Ranges: bytes
ETag: "244d2d3ea36c81:80a"
Server: Microsoft-IIS/6.0
Date: Wed, 05 Dec 2007 15:05:19 GMT
---/
有些网站则没有,如www.163.com:
/---
HTTP/1.0 200 OK
Date: Wed, 05 Dec 2007 15:01:13 GMT
Server: Apache/2.0.59 (Unix)
Accept-Ranges: bytes
Vary: Accept-Encoding
Content-Length: 127476
Content-Type: text/html; charset=GB2312
Age: 265
X-Cache: HIT from www.163.com
Connection: keep-alive
---/
另外 IE 从 SP2 开始不支持 view-source 了,为了使用 view-source 而装一个 FireFox 似乎有点麻烦,于是加了读取网页代码的功能。目前是读取网站的首页的代码。
其中的一些代码参考了MASM32官方论坛的贴子:
http://www.masm32.com/board/index.php?topic=8197.0
增加了SearchStr()来搜索网页代码结束位置后,程序运行时间要多久一些,改进的方法是参考网站的返回信息中的Content-Length的值,在最后一次从Sock读取数据时进行进行搜索……
; FileName: WebHome.asm
; Author: Purple Endurer
; Functiion: Read a web homepage content
; DevEnv: Win XP SP2 + MASM32 v8
; log
; ------------------------------
; 2007-12-05 Added SearchStr() to search the webpage code end mark string
; 2007-12-04 Can read web homepage content
; 2007-12-03 Created!
;<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
.386
.model flat, stdcall
option casemap: none
include /masm32/ include/windows.inc
include /masm32/ include/kernel32.inc
includelib /masm32/ lib/kernel32.lib
include /masm32/ include/user32.inc
includelib /masm32/ lib/user32.lib
include /masm32/ include/shell32.inc
includelib /masm32/ lib/shell32.lib
include /masm32/ include/wsock32.inc
includelib /masm32/ lib/wsock32.lib
WinMain PROTO : DWORD, : DWORD, : DWORD, : DWORD
btnShow_Click PROTO
fInitInternet proto : dword
fConnect proto lpszHostName: dword,nPortNumber: dword
SendHttpHead proto :LPSTR, :LPSTR
ReadSockData proto :LPSTR, : dword
ResizeConctrol proto : DWORD
SearchStr proto :LPSTR, :LPSTR
m_GotoEnd MACRO
invoke SendMessage, g_hEditVerInfo, EM_SETSEL, 0 , -1
invoke SendMessage, g_hEditVerInfo, EM_SETSEL, -1 , 0
ENDM
m_InsStr MACRO lpszStr: REQ
invoke SendMessage, g_hEditVerInfo, EM_REPLACESEL, FALSE, lpszStr
ENDM
m_GoNextLine MACRO
m_InsStr addr g_szCR
ENDM
m_InsCrStr MACRO lpszStr: REQ
m_GoNextLine
m_InsStr lpszStr
ENDM
WM_FSOCKET equ WM_USER + 0fh
IDC_BtnRead equ 103
IDC_EdtURL equ 105
IDC_EdtHTML equ 107
c_BufLen equ 5000h
c_Resize equ 1
d_TestData equ 1
c_EditURLLeft equ 2
c_EditURLTop equ 5
c_EditURLWidth equ 500
c_EditURLHeight equ 20
c_BtnReadTop equ c_EditURLTop
c_BtnReadLeft equ (c_EditURLLeft+c_EditURLWidth+10)
c_BtnReadWidth equ 70
c_BtnReadHeight equ c_EditURLHeight
c_EdtHTMLLeft equ 2
c_EdtHTMLTop equ 30
c_EdtHTMLWidth equ c_BtnReadLeft+c_BtnReadWidth+2
c_EdtHTMLHeight equ 300
c_WinWidth equ c_EdtHTMLLeft+c_EdtHTMLWidth+10
c_WinHeight equ c_EdtHTMLTop+c_EdtHTMLHeight+30
c_RichEditStyle equ WS_CHILD or WS_VISIBLE or ES_MULTILINE or WS_VSCROLL or ES_AUTOVSCROLL or WS_HSCROLL or ES_NOHIDESEL or ES_SAVESEL or ES_SELECTIONBAR
MAX_STRING_LEN equ 8192 ; 2000h
SYSINFO_RET_OK equ 1
SYSINFO_RET_FAIL equ 2
;sssssssssss
.data
;sssssssssss
g_szClsName label byte
g_szAppName db "HttpDemo", 0
if d_TestData eq 1
g_szTestURL db "www.hcny.gov.cn", 0
endif ;d_TestData
g_szFailIni db "Fail initialize internet connection!", 0
g_szFailGetHostName db "Fail to get host name!", 0
g_szConnect db "Fail to connect!", 0
g_szFailWSAStartup db "Fail to WSAStartup", 0
g_szEnterURL db "请先输入URL!", 0
g_szEditCls db "EDIT", 0
g_szBtnCls db "button", 0
g_szNoRichEdit db "无法载入"
g_szRichEditDLL db "RICHED20.DLL", 0
g_szRichEditClass db "RichEdit20A", 0
g_szBtnReadText db "&R 读取", 0
g_szFmt1 db "GET /%s HTTP/1.1", 0dh, 0ah
db "Host:%s", 0dh, 0ah
db "Accept: */*", 0dh, 0ah
db "User-Agent: Mozilla/4.0"
db "(compatible; MSIE 6.00; Windows 2000)", 0dh, 0ah
db "Connection:Keep-Alive", 0dh, 0ah
db 0dh, 0ah
g_szCR db 0dh, 0ah, 0
g_szHTTP400 db "HTTP/1.1 400 Bad Request", 0
g_szFmt2 db "WSACleanup failed with error %d", 0
;sssssssssss
.data?
;sssssssssss
g_hInstance HINSTANCE ?
g_hWndMain HANDLE ?
g_hEditURL HANDLE ?
g_hBtnRead HANDLE ?
g_hEditVerInfo HANDLE ?
g_hRichEditDLL HANDLE ?
wsadata WSADATA <>
sin sockaddr_in <>
sock dd ?
error_code dd ?
g_szURL db 256 dup(?)
g_buf byte c_BufLen dup (?)
;sssssssssss
.code
;sssssssssss
start:
invoke GetModuleHandle, NULL
mov g_hInstance, eax
invoke LoadLibrary, OFFSET g_szRichEditDLL
.if eax != 0
mov g_hRichEditDLL, eax
invoke WinMain, g_hInstance, NULL, NULL, SW_SHOWDEFAULT
invoke FreeLibrary, g_hRichEditDLL
.else
invoke MessageBox, 0, OFFSET g_szNoRichEdit,/
OFFSET g_szAppName, MB_OK or MB_ICONERROR
.endif
invoke ExitProcess, eax
WinMain proc hInst: DWORD, hPrevInst: DWORD, CmdLine: DWORD, CmdShow: DWORD
LOCAL wc: WNDCLASSEX
LOCAL msg: MSG
LOCAL hwnd: HWND
mov wc.cbSize, SIZEOF WNDCLASSEX
mov wc.style, CS_HREDRAW or CS_VREDRAW
mov wc.lpfnWndProc, OFFSET WndProc
mov wc.cbClsExtra, NULL
mov wc.cbWndExtra, NULL
mov eax, g_hInstance
mov wc.hInstance, eax
mov wc.hbrBackground, COLOR_APPWORKSPACE
mov wc.lpszMenuName, NULL
mov wc.lpszClassName, OFFSET g_szClsName
invoke LoadIcon, NULL, IDI_APPLICATION
mov wc.hIcon, eax
mov wc.hIconSm, eax
invoke LoadCursor, NULL, IDC_ARROW
mov wc.hCursor, eax
invoke RegisterClassEx, addr wc
invoke CreateWindowEx, NULL, ADDR g_szClsName, ADDR g_szAppName,/
WS_OVERLAPPEDWINDOW + WS_VISIBLE, CW_USEDEFAULT, CW_USEDEFAULT,/
c_WinWidth, c_WinHeight, NULL, NULL, hInst, NULL
mov hwnd, eax
.while TRUE
invoke GetMessage, ADDR msg, NULL, 0, 0
.BREAK .IF (! eax)
;--- process keystrokes directly in the message loop
.if msg.message == WM_SYSKEYUP
.if msg.wParam == VK_R ; Alt + R
invoke PostMessage, hwnd, WM_COMMAND, IDC_BtnRead, BM_CLICK
.endif
.endif
; ------------------------------------------------
invoke TranslateMessage, ADDR msg
invoke DispatchMessage, ADDR msg
.endw
mov eax, msg.wParam
ret
WinMain endp
WndProc proc hWnd: HWND, uMsg: UINT, wParam: WPARAM, lParam: LPARAM
LOCAL rect: RECT
LOCAL hdc: DWORD
.if uMsg==WM_CREATE
mov eax, hWnd
mov g_hWndMain, eax
;--- Create URL editbox
invoke CreateWindowEx, NULL, offset g_szEditCls, NULL,
WS_CHILD+WS_VISIBLE+ES_AUTOHSCROLL+WS_BORDER,/
c_EditURLLeft, c_EditURLTop, c_EditURLWidth, c_EditURLHeight,/
hWnd, IDC_EdtURL, g_hInstance, NULL
mov g_hEditURL, eax
;--- Create read button
invoke CreateWindowEx, NULL, offset g_szBtnCls, offset g_szBtnReadText,/
WS_CHILD+WS_VISIBLE, c_BtnReadLeft, c_BtnReadTop, c_BtnReadWidth, c_BtnReadHeight,/
g_hWndMain, IDC_BtnRead, g_hInstance, NULL
mov g_hBtnRead, eax
;--- Create file ver info editbox
;invoke CreateWindowEx, NULL, addr g_szEditCls, NULL,/
; WS_CHILD+WS_VISIBLE+ES_MULTILINE+WS_HSCROLL+WS_VSCROLL+WS_BORDER,/
; c_EdtHTMLLeft, c_EdtHTMLTop, c_EdtHTMLWidth, c_EdtHTMLHeight,/
; g_hWndMain, IDC_EdtHTML, g_hInstance, NULL
invoke CreateWindowEx, WS_EX_CLIENTEDGE, OFFSET g_szRichEditClass,/
NULL, c_RichEditStyle, c_EdtHTMLLeft, c_EdtHTMLTop, c_EdtHTMLWidth, c_EdtHTMLHeight,/
hWnd, IDC_EdtHTML, g_hInstance, 0
mov g_hEditVerInfo, eax
;--- Set the text limit. The default is 64K
invoke PostMessage,g_hEditVerInfo, EM_LIMITTEXT, -1, 0
if d_TestData eq 1
invoke SetWindowText, g_hEditURL, OFFSET g_szTestURL
endif
.elseif uMsg==WM_COMMAND
.IF lParam
mov edx, wParam
mov eax, edx
shr edx, 16
.if dx == BN_CLICKED
.IF ax == IDC_BtnRead
invoke GetWindowText, g_hEditURL, addr g_szURL, sizeof g_szURL
test eax, eax
.if ZERO?
invoke MessageBox, g_hWndMain, addr g_szEnterURL, addr g_szAppName, MB_ICONQUESTION
.else
;invoke MessageBox, g_hWndMain, addr g_szURL, addr g_szAppName, MB_ICONQUESTION
invoke btnShow_Click
.endif
.endif
.ENDIF
.endif
.elseif uMsg==WM_DESTROY
invoke PostQuitMessage, NULL
if c_Resize eq 1
.elseif uMsg==WM_SIZE
invoke ResizeConctrol , lParam
xor eax, eax
jz @F
endif
.else
@@:
invoke DefWindowProc, hWnd, uMsg, wParam, lParam
ret
.endif
xor eax, eax
ret
WndProc endp
btnShow_Click proc
;--- First I initialize the internet and get the socket using this code.
invoke fInitInternet, g_hWndMain
test eax, eax
.if !ZERO?
invoke MessageBox, g_hWndMain, eax, addr g_szAppName, 0
.else
;--- Second, I connect the socket using this code
invoke fConnect, addr g_szURL, 80
test eax, eax
.if !ZERO?
invoke MessageBox, g_hWndMain, eax, addr g_szAppName, 0
.else
invoke SendHttpHead, addr g_szURL, NULL
;--- Read http reponse head msg
invoke ReadSockData, addr g_buf, c_BufLen
mov eax, offset g_buf
mov edi, eax
add edi, c_BufLen-1
.while ( dword ptr [ eax]!=0a0d0a0dh) && ( eax < edi)
inc eax
.endw
.if ( eax < edi)
add eax, 3 ;inc eax
mov byte ptr [ eax], 0
inc eax
;push eax
;invoke MessageBox, g_hWndMain, eax, addr g_szAppName, 0
;pop eax
.else
xor eax, eax
.endif
push eax
m_InsCrStr addr g_buf
;--- write http body msg following http reponse head msg
pop eax
test eax, eax
jz @btnShow_ClickReadBody
m_InsStr eax
@btnShow_ClickReadBody:
;--- Read http body msg
invoke ReadSockData, addr g_buf, c_BufLen
push eax
;--- Search the end mark string
invoke SearchStr, addr g_buf, addr g_szHTTP400
inc eax
jz @F ;no found
dec eax
mov byte ptr [g_buf+ eax], 0
@@:
;--- write http body msg
m_InsCrStr addr g_buf
pop eax
cmp eax, SOCKET_ERROR
je @F
test eax, eax ;cmp eax, 0
jnz @btnShow_ClickReadBody ;jg @btnShow_ClickReadBody
@@:
invoke closesocket, sock
invoke WSACleanup
.if ( eax == SOCKET_ERROR )
invoke WSAGetLastError
invoke wsprintf, addr g_buf, addr g_szFmt2, eax
;invoke MessageBox, g_hWndMain, addr g_buf, addr g_szAppName, 0
m_InsCrStr addr g_buf
.endif
.endif
.endif
ret
btnShow_Click endp
;eax==0, sucess
fInitInternet proc hWnd: dword
; Mad wizard tutorial
invoke WSAStartup, 101h, addr wsadata
test eax, eax
jz @F
mov eax, offset g_szFailWSAStartup
jmp @fInitInternetRet
@@:
;--- Create a stream socket for internet use
invoke socket, AF_INET,SOCK_STREAM, 0 ;AF_UNSPEC;IPPROTO_TCP
;sock = socket (AF_INET, SOCK_STREAM, 0);
.if eax != INVALID_SOCKET
mov sock, eax
;invoke WSAAsyncSelect, sock, hWnd, WM_FSOCKET, FD_CONNECT+FD_READ+FD_CLOSE+FD_WRITE+FD_ACCEPT
xor eax, eax
.else
mov eax, offset g_szFailIni
.endif
@fInitInternetRet:
ret
fInitInternet endp
;eax==0, sucess
fConnect proc uses esi edi lpszHostName: dword, nPortNumber: dword
invoke gethostbyname, lpszHostName
cmp eax, NULL
jne @F
mov eax, offset g_szFailGetHostName
jmp @fConnectRet
@@:
;push eax
;invoke MessageBox, g_hWndMain, (hostent ptr [eax]).h_name, addr g_szAppName, MB_OK
;pop eax
mov eax, (hostent ptr [ eax]).h_list ;mov eax, [eax+12]
mov eax, [ eax] ; copy the pointer to the actual IP address into eax
mov eax, [ eax] ; copy IP address into eax
mov sin.sin_addr.S_un.S_addr, eax ;mov sin.sin_addr, eax
mov sin.sin_family, AF_INET
invoke htons, nPortNumber
mov sin.sin_port, ax
invoke connect, sock, addr sin, sizeof sin
.if eax==SOCKET_ERROR
invoke WSAGetLastError
mov eax, offset g_szConnect
.endif
@fConnectRet:
ret
fConnect endp
SendHttpHead proc lpszHostName: LPSTR, lpParam: LPSTR
invoke wsprintf, addr g_buf, addr g_szFmt1, lpParam, lpszHostName
m_InsStr addr g_buf
;invoke MessageBox, g_hWndMain, addr g_buf, addr g_szAppName, 0
invoke send, sock, addr g_buf, sizeof g_buf, 0
ret
SendHttpHead endp
ReadSockData proc lpszBuffer: LPSTR, dwMax_buf_len: dword
invoke RtlZeroMemory, lpszBuffer, dwMax_buf_len
mov edi, lpszBuffer
mov esi, dwMax_buf_len
dec esi
@@:
push esi
push edi
invoke recv, sock, edi, esi, 0
pop edi
pop esi
add edi, eax
sub esi, eax
jz @get_http_pageRet
;pushad
;invoke MessageBox, g_hWndMain, addr g_szAppName, addr g_szAppName, 0
;popad
cmp eax, 0
jg @B
@get_http_pageRet:
ret
ReadSockData endp
; if eax=-1, no found
; else eax = sub string position
SearchStr proc lpszOrgStr: LPSTR, lpszSubStr: LPSTR
local dwPos: dword
mov edi, lpszSubStr
cmp byte ptr [ edi], 0
je @NoFound
mov esi, lpszOrgStr
mov dwPos, esi
@SearchStrLoop1Begin:
mov al, byte ptr [ esi]
test al, al
jz @NoFound
cmp al, byte ptr [ edi]
jne @SearchStrLoop1Next
@SearchStrLoop2Begin:
inc esi
inc edi
mov ah, byte ptr [ edi]
test ah, ah
jz @Found
mov al, byte ptr [ esi]
test al, al
jz @NoFound
cmp al, ah
je @SearchStrLoop2Begin
mov edi, lpszSubStr
@SearchStrLoop1Next:
inc dwPos
mov esi, dwPos
jmp @SearchStrLoop1Begin
@NoFound:
xor eax, eax ;mov eax, -1
dec eax
jmp @SearchStrRet
@Found:
mov eax, dwPos
sub eax, lpszOrgStr
@SearchStrRet:
ret
SearchStr endp
if c_Resize eq 1
ResizeConctrol PROC wh: DWORD
;--- Get main window width
mov eax, wh
mov ecx, eax
movzx eax, ax ; width
push eax ;push for resizing the ver info edit
sub eax, 4
shr ecx, 16 ; height
sub ecx, c_EditURLHeight+10
invoke MoveWindow, g_hEditVerInfo, 2, c_EdtHTMLTop, eax, ecx, FALSE
;--- Resize the Get button
pop eax
sub eax, 5+c_BtnReadWidth
push eax
invoke MoveWindow, g_hBtnRead, eax, c_BtnReadTop, c_BtnReadWidth, c_BtnReadHeight, TRUE
;--- Resize the file spec editbox
pop eax
sub eax, 10
invoke MoveWindow, g_hEditURL, c_EditURLLeft, c_EditURLTop, eax, c_EditURLHeight, TRUE
ret
ResizeConctrol ENDP
endif ; c_Resize
end start