总结下,Python 下载网页的几种方法
1
fd=urllib2.urlopen(url_link)
data=fd.read()
这是最简洁的一种,当然也是Get的方法
2
通过GET的方法
def GetHtmlSource(url):
try:
htmSource = ''
req = urllib2.Request(url)
fd = urllib2.urlopen(req,"")
while 1:
data = fd.read(1024)
if not len(data):
break
htmSource += data
fd.close()
del fd
del req
htmSource = htmSource.decode('cp936')
htmSource = formatStr(htmSource)
return htmSource
except socket.error, err:
str_err = "%s" % err
return ""
3
通过GET的方法
def GetHtmlSource_Get(htmurl):
htmSource = ""
try:
urlx = httplib.urlsplit(htmurl)
conn = httplib.HTTPConn