思路如下:
- 首先在item中写入需要构造的item
- 将item的key得到,构造dataframe
- 借助dataframe构造数据表
- 最后使用sql构造可以在数据库中自增长的主键
完整代码
# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html
import scrapy
class CompanyInfoItem(scrapy.Item):
"""企业基本信息"""
company_code = scrapy.Field() # 企业代码code
company_name = scrapy.Field() # 企业名称
company_ = scrapy.Field() # 组织形式
province_location = scrapy.Field() # 地域
if __name__ == '__main__':
"""用item生成数据库"""
import pandas as pd
my_item = CompanyInfoItem()
df = pd.DataFrame(my_item.fields)
# 连接数据库
from sqlalchemy import create_engine
db_name = "test_db" # 要保存的数据库名
table_name = "my_item_table" # 要保存的表名
# engine = create_engine("mysql+pymysql://用户名:密码@127.0.0.1:3306/数据库名")
engine = create_engine('mysql+pymysql://testuser:testpassword@localhost:3306/{}'.format(db_name), encoding='utf8')
engine.connect()
# save to database
df.to_sql(table_name, engine, if_exists='replace', index=False)
# 增加主键
with engine.connect() as con:
con.execute("""ALTER TABLE `{}`.`{}` \
ADD COLUMN `id` INT NOT NULL AUTO_INCREMENT, \
ADD PRIMARY KEY (`id`);"""
.format(db_name, table_name))