a.在items.py里面添加相应的loader类
import scrapy
from scrapy.loader import ItemLoader
#导入预处理器
from scrapy.loader.processors import MapCompose,TakeFirst
def complete_url(origin_url):
#完整的url,把这个url映射到指定的字段当中url = scrapy.Field(
input_processor = MapCompose(complete_url)
)
return "https://tieba.baidu.com" + origin_url
class TiebaItemLoader(ItemLoader):
#一个爬虫的解析业务
default_output_processor = TakeFirst()
class Lab1Item(scrapy.Item):
# define the fields for your item here like:
title = scrapy.Field()
url = scrapy.Field(
input_processor = MapCompose(complete_url)
)