数据库的存储:
import pymysql
class mysqlPipeline(object):
def __init__(self):
#数据库的连接
self.connect = pymysql.connections(host="127.0.0.1", user="root", passwd="root", db="test")
#host 数据库地址 localhost一般
#user 数据库用户名
#passwd 数据库密码
#db 数据库名字
self.cursor = self.connect.cursor() #获取连接
def process_item(self, item, spider):
sql = "insert into test value('%s','%s')" #sql语句
self.cursor.execute(sql, (item['name'], item['sex'])) #执行sql语句
self.connect.commit() #提交
def close_spider(self):
self.cursor.close()
self.connect.close()
json格式的写入
import json
class jsonPipleline(object):
def __init__(self):
self.fb = open('test.json', 'w', encoding='utf-8')#创建写入的文件,设置编码为utf-8
def process_item(self, item, spider):
item_json = json.dump(item) #
self.fb.write(item_json+'\n') #进行写入
return item
def close_spider(self, spider):
self.fb.close() #关闭
csv格式的写入
import csv
class csvPipleline(object):
def __init__(self):
self.file = open('test.csv', 'w', newline='')
self.csvwriter = csv.writer(self.file)
self.csvwriter.writerow(['名字', '性别'])
def process_item(self, item, spider):
self.csvwriter.writerow([item["name"], item["sex"]])
return item
def close_spider(self, spider):
self.file.close()