修改setteings文件
ITEM_PIPELINES = {
'mypjt.pipelines.MypjtPipeline': 300,
6 }
编写pipelines文件
import codecs
class MypjtPipeline(object):
def __init__(self):
self.file = codecs.open("mydata1.txt","wb",encoding = "utf-8")
def process_item(self, item, spider):
l = str(item)+'\n'
print(l)
self.file.write(l)
return item
def close_spider(self,spider):
self.file.close()
spider代码
import scrapy
from mypjt.items import MypjtItem
class MyspdSpider(scrapy.Spider):
name = 'myspd'
allowed_domains = ['sina.com.cn']
start_urls = ['http://tech.sina.com.cn/d/s/2016-09-17/doc-ifxvyqwa3324638.shtml']
def parse(self, response):
item = MypjtItem()
item["title"] = response.xpath("/html/head/title/text()")
print (item["title"])
yield item