scrapy爬虫创建文件
遇到这样的目录时,需要按照对应的标题,创建不同的文件夹,将爬取的数据分门别类
# -*- coding: utf-8 -*-
# Define your item pipelines here
#
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: https://doc.scrapy.org/en/latest/topics/item-pipeline.html
import os
class ZuoWenPipeline(object):
def __init__(self):
print(os.getcwd()) # 获取当前所在的路径
if os.path.exists(r'F:\scrapy\xunlian_b\zuo_wen\zuo'):
# 判断这个目录文件是否存在
os.chdir(r'F:\scrapy\xunlian_b\zuo_wen\zuo')
# 改变当前工作路径为
print(os.getcwd())
else:
os.mkdir(r'F:\scrapy\xunlian_b\zuo_wen\zuo')
os.chdir(r'F:\scrapy\xunlian_b\zuo_wen\zuo')
def process_item(self, item, spider):
# title = scrapy.Field()
# title_text = scrapy.Field()
# biao_ti = scrapy.Field()
# nei_rong = scrapy.Field()
print(item['title'])
if os.path.exists(r'F:\\scrapy\\xunlian_b\\zuo_wen\\zuo\\'+item['title']):
os.chdir(r'F:\\scrapy\\xunlian_b\\zuo_wen\\zuo\\' + item['title'])
with open(item['title_text']+'.txt','a',encoding='utf-8') as f:
f.write(item['biao_ti']+'\n')
f.write(item['nei_rong']+'\n')
else:
os.mkdir(r'F:\\scrapy\\xunlian_b\\zuo_wen\\zuo\\'+item['title'])
os.chdir(r'F:\\scrapy\\xunlian_b\\zuo_wen\\zuo\\' + item['title'])
return item
其中os模块
os.getcwd() # 获取当前所在的路径
os.path.exists() # 判断这个目录文件是否存在
os.chdir() # 改变当前工作路径
os.mkdir() # 创建文件夹