#EE论坛爬虫 #by JerryFang #2013.11.13 import re import urllib2 import urllib import cookielib def visitpage(url): nr = urllib2.urlopen(url).read().decode('gbk') print nr z = re.compile('''<a href=.*?class="xi2">(.*?)</a> ''', re.S|re.MULTILINE) t = re.compile('''<em id=".*?">(.*?)</em> ''', re.S|re.MULTILINE) c = re.compile(ur'''<table.*?>(.*?)</table> ''', re.DOTALL|re.MULTILINE) ## author = z.findall(nr) ## time = t.findall(nr) cont = c.findall(nr) ## print author ## print time print cont raw_input('press any key') loginurl =