scrapy中pipeline数据去重和更新

class NewsEducationPipeline(object):
    def __init__(self):
        self.connect = pymysql.connect(
            host=settings.MYSQL_HOST,
            db=settings.MYSQL_DBNAME,
            user=settings.MYSQL_USER,
            passwd=settings.MYSQL_PASSWD,
            charset='utf8',
            use_unicode=True)
        self.cursor = self.connect.cursor()
    def process_item(self, item, spider):
        item["crawled"] = datetime.now()
        if item.__class__ == NewsEducationItem:
            crawled = item["crawled"]
            new_id = item["new_id"]
            title  = item["title"]
            url = item["url"]
            intro = item["intro"]
            img = item["img"]
            kl = item["kl"]
            time = item["time"]
            media = item["media"]
            source = item["source"]
            try:
                self.cursor.execute("""select * from new_info where url = %s""", url)
                ret = self.cursor.fetchone()
                if ret:
                    self.cursor.execute(
                        """update new_info set new_id = %s,title = %s,intro = %s,img = %s,
                            url = %s,kl = %s,time = %s,crawled =%s,media =%s,source =%s
                            where url  = %s""",
                        (new_id, title,intro,img,url, kl,time,crawled,media,source,url,))
                else:
                    self.cursor.execute(
                        """insert into new_info(new_id,title,intro,img,url,kl,time,crawled, media,source)
                          value (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)""",
                        (new_id,title,intro,img,url,kl,time,crawled, media,source,))
                    self.connect.commit()
            except Exception as error:
                print("错误")
            return item

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值