代码不全
# coding=utf-8
import urllib,urllib2
import re
class QSBK:
#初始化函数
def _init_(self):
self.pageIndex = 1
self.user_agent = ''
self.headers = {'User-Agent':self.user_agent
self.stories = []
#获取网页代码
def getpage(self,pageIndex):
try:
url='http://www.qiushibaike.com/hot/page/'+str(pageIndex)
request = urllib2.Request(url,headers=self.headers)
respons = urllib2.urlopen(request)
return respons.read()
except Exception,e:
print e
def getpageItems(self,pageIndex):
pagecode = self.getpage(pageIndex)
if not pagecode:
print '页面加载失败'
return None
pattern = re.compile(,re.S)
items = re.findall(pattern, pagecode)
pageStories = []
for item in items:
pageStories.append(item[0],item[1],item[2],item[3])
print '作者:',item[0]
print '内容:',item[1]
print '阅读:',item[2]
print '评论:',item[3]
return pageStories
def loadpage(self):
if
spider = QSBK()
spider.getpage(1)