srapy mysql_在Scrapy中将项目写入MySQL数据库

我是Scrapy的新手,我有spider代码

class Example_spider(BaseSpider):

name = "example"

allowed_domains = ["www.example.com"]

def start_requests(self):

yield self.make_requests_from_url("http://www.example.com/bookstore/new")

def parse(self, response):

hxs = HtmlXPathSelector(response)

urls = hxs.select('//div[@class="bookListingBookTitle"]/a/@href').extract()

for i in urls:

yield Request(urljoin("http://www.example.com/", i[1:]), callback=self.parse_url)

def parse_url(self, response):

hxs = HtmlXPathSelector(response)

main = hxs.select('//div[@id="bookshelf-bg"]')

items = []

for i in main:

item = Exampleitem()

item['book_name'] = i.select('div[@class="slickwrap full"]/div[@id="bookstore_detail"]/div[@class="book_listing clearfix"]/div[@class="bookstore_right"]/div[@class="title_and_byline"]/p[@class="book_title"]/text()')[0].extract()

item['price'] = i.select('div[@id="book-sidebar-modules"]/div[@class="add_to_cart_wrapper slickshadow"]/div[@class="panes"]/div[@class="pane clearfix"]/div[@class="inner"]/div[@class="add_to_cart 0"]/form/div[@class="line-item"]/div[@class="line-item-price"]/text()').extract()

items.append(item)

return items

管道代码为:

class examplePipeline(object):

def __init__(self):

self.dbpool = adbapi.ConnectionPool('MySQLdb',

db='blurb',

user='root',

passwd='redhat',

cursorclass=MySQLdb.cursors.DictCursor,

charset='utf8',

use_unicode=True

)

def process_item(self, spider, item):

# run db query in thread pool

assert isinstance(item, Exampleitem)

query = self.dbpool.runInteraction(self._conditional_insert, item)

query.addErrback(self.handle_error)

return item

def _conditional_insert(self, tx, item):

print "db connected-=========>"

# create record if doesn't exist.

tx.execute("select * from example_book_store where book_name = %s", (item['book_name']) )

result = tx.fetchone()

if result:

log.msg("Item already stored in db: %s" % item, level=log.DEBUG)

else:

tx.execute("""INSERT INTO example_book_store (book_name,price)

VALUES (%s,%s)""",

(item['book_name'],item['price'])

)

log.msg("Item stored in db: %s" % item, level=log.DEBUG)

def handle_error(self, e):

log.err(e)

运行此后,我得到以下错误

exceptions.NameError: global name 'Exampleitem' is not defined

当我在process_item方法中添加以下代码时,出现上述错误

assert isinstance(item, Exampleitem)

而没有添加这行我得到

**exceptions.TypeError: 'Example_spider' object is not subscriptable

任何人都可以运行此代码,并确保所有项目都保存到数据库中吗?

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
Scrapy提供了异步的Item Pipeline机制,可以方便地将数据存储到MySQL数据库中。具体实现步骤如下: 1. 安装异步MySQL库aiomysql:`pip install aiomysql` 2. 在settings.py中配置MySQL数据库信息: ``` MYSQL_HOST = 'localhost' MYSQL_PORT = 3306 MYSQL_USER = 'root' MYSQL_PASSWORD = 'password' MYSQL_DBNAME = 'database_name' ``` 3. 创建一个异步的MySQL连接池: ``` import aiomysql class MySQLPipeline(object): def __init__(self, mysql_host, mysql_port, mysql_user, mysql_password, mysql_dbname): self.mysql_host = mysql_host self.mysql_port = mysql_port self.mysql_user = mysql_user self.mysql_password = mysql_password self.mysql_dbname = mysql_dbname self.pool = None @classmethod async def from_crawler(cls, crawler): mysql_host = crawler.settings.get('MYSQL_HOST', 'localhost') mysql_port = crawler.settings.get('MYSQL_PORT', 3306) mysql_user = crawler.settings.get('MYSQL_USER', 'root') mysql_password = crawler.settings.get('MYSQL_PASSWORD', 'password') mysql_dbname = crawler.settings.get('MYSQL_DBNAME', 'database_name') obj = cls(mysql_host, mysql_port, mysql_user, mysql_password, mysql_dbname) obj.pool = await aiomysql.create_pool( host=obj.mysql_host, port=obj.mysql_port, user=obj.mysql_user, password=obj.mysql_password, db=obj.mysql_dbname, charset='utf8mb4', autocommit=True, maxsize=10, minsize=1 ) return obj async def process_item(self, item, spider): async with self.pool.acquire() as conn: async with conn.cursor() as cur: sql = "INSERT INTO table_name (field1, field2) VALUES (%s, %s)" await cur.execute(sql, (item['field1'], item['field2'])) return item async def close_spider(self, spider): self.pool.close() await self.pool.wait_closed() ``` 4. 在settings.py中启用MySQLPipeline: ``` ITEM_PIPELINES = { 'myproject.pipelines.MySQLPipeline': 300, } ```
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值