#encoding=utf-8
import re
import requests
import urllib2
import datetime
import MySQLdb
from bs4 import BeautifulSoup
import sys
reload(sys)
sys.setdefaultencoding("utf-8")
class Splider(object):
def __init__(self):
print u'开始爬取内容...'
##用来获取网页源代码
def getsource(self,url):
headers = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2652.0 Safari/537.36'}
req = urllib2.Request(url=url,headers=headers)
socket = urllib2.urlopen(req)
content = socket.read()
socket.close()
return content
##changepage用来生产不同页数的链接
def changepage(self,url,total_page):
now_page = int(re.search('page/(\d+)',url,re.S).group(1))
page