scrapy存储到MongoDB 如果存在则忽略不存在就插入
def process_item(self, item, spider): dict_item = dict(item) try: self.post.update({'a_source_url': dict_item['a_source_url']}, {"$setOnInsert": dict_item}, upsert=True) except: log.msg(dict_item,logging.ERROR) return item
1
2
3
4
5
6
7
8
9
|
def
process_item
(
self
,
item
,
spider
)
:
dict_item
=
dict
(
item
)
try
:
self
.
post
.
update
(
{
'a_source_url'
:
dict_item
[
'a_source_url'
]
}
,
{
"$setOnInsert"
:
dict_item
}
,
upsert
=
True
)
except
:
log
.
msg
(
dict_item
,
logging
.
ERROR
)
return
item
|
scrapy存储到MongoDB 如果存在则更新不存在就插入
def process_item(self, item, spider): dict_item = dict(item) try: self.post.update({'a_source_url': dict_item['a_source_url']}, {"$set": dict_item}, upsert=True) except: log.msg(dict_item,logging.ERROR) return item
1
2
3
4
5
6
7
8
|
def
process_item
(
self
,
item
,
spider
)
:
dict_item
=
dict
(
item
)
try
:
self
.
post
.
update
(
{
'a_source_url'
:
dict_item
[
'a_source_url'
]
}
,
{
"$set"
:
dict_item
}
,
upsert
=
True
)
except
:
log
.
msg
(
dict_item
,
logging
.
ERROR
)
return
item
|
如果update的更新参数upsert:true,也就是如果要更新的文档不存在的话会插入一条新的记录,$setOnInsert操作符会将指定的值赋值给指定的字段,如果要更新的文档存在那么$setOnInsert操作符不做任何处理;