抓取网页内容
import urllib2 url = "http://www.w3cschool.cc/python/python-tutorial.html" urlfile = urllib2.urlopen(url) html = urlfile.read()
获取 >>> 和 # 之间的内容:
import redef getlist(filename): myfile=open(filename) contents=myfile.read() mylist= re.findall(r"(?<=>>>).*?(?=#)",contents,re.DOTALL) myfile.close() return mylist