1-首先下载pyspider
pip install pyspider
2-在任意一个文件夹下执行命令 启动pyspider
pyspider 或者 pyspider all
3-执行命令后 创建项目
4-创建项目后 进入项目 编写代码
from pyspider.libs.base_handler import *
from fake_useragent import UserAgent
ua=UserAgent()
from pymongo import MongoClient
class Handler(BaseHandler):
crawl_config = {
'headers': {
'User-Agent': ua.random,
}
}
@every(minutes=4 * 60)
def on_start(self):
self.crawl('https://cc.lianjia.com/ershoufang/nanguanqu/co32/',fetch_type='js', callback=self.index_page)
@config(age= 60)
def index_page(self, response):
maxpage = int(response.etree.xpath('//div[@class="page-box house-lst-page-box"]/a[last()-1]/text()')[0])
for index in range(1,maxpage+1):
baseUrl = response.url.replace('co32','pg%dco32'%index)
index+=1
self.crawl(baseUrl,callback=self.page)
@config(priority=4)
def page(self,response):
for ele in response