今天在公开课里看到直接用get_page函数来获取网页的代码
可自己尝试了下发旋无论是在python2还是python3里头都并不是预先设定好的函数
解决方案如下:
import urllib2
def get_page(url):
return urllib2.urlopen(url).read()
def get_next_target(page):
start_link = page.find('<a href=')
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1:end_quote]
return url, end_quote
def print_all_links(page):
while True:
url, endpos = get_next_target(page)
if url:
print url
page = page[endpos:]
else:
break
print_all_links(get_page('http://xkcd.com/353'))