在MySpider工程的基础上,修改Pipeline.py文件,把数据存储到数据库中。
import sqlite3
class MyspiderPipeline:
def __init__(self):
# print("********************************")
self.conn = sqlite3.connect("rooms.db")
self.cur = self.conn.cursor()
self.cur.execute('create table if not exists rooms_tb('
+ 'id integer primary key autoincrement,'
+ 'room,'
+ 'area,'
+ 'name)')
# print("-*"*10)
def process_item(self, item, spider):
# print('room',item['room'])
# 将每个ITEM数据插入数据表中
# print("===============")
self.cur.execute('insert into rooms_tb values(null, ?,?,?)',
(item['room'], item['area'], item['name']))
self.conn.commit()
# return item
# 当蜘蛛关闭时,关闭游标和数据库连接
def close_spider(self, spider):
self.cur.close()
self.conn.close()
新建show.py文件代码如下:
import sqlite3
import pygal
conn = sqlite3.connect("rooms.db")
cur = conn.cursor()
cur.execute("select area, count(area) from rooms_tb group by area")
pie = pygal.Pie()
for col in cur:
pie.add(col[0], col[1])
pie.title = "南山小区房出租分布"
pie.legend_at_bottom = True
pie.render_to_file("temp.svg")
pie.render()
实际效果如图: