二十五、python将爬取的内容保存到mysql
--------------------------------------------------------------
#sqlAlchemy的介绍
from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, DateTime, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship, backref
from sqlalchemy.dialects.mysql import LONGTEXT
import requests
from lxml import etree
# 获取段子内容、赞、分享、收藏数
def getJokeOfAllList(basurl='http://www.budejie.com/text/{0}'):
nextPage = True
pageNum = 1
while nextPage:
url = basurl.format(pageNum)
response = requests.get(url)
selector = etree.HTML(response.text)
all = selector.xpath('//*/div[@class="j-r-list"]/ul/li')
for a in all:
joke = a.xpath('div[@class="j-r-list-c"]/div[@class="j-r-list-c-desc"]/a/text()')[0]
like = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-l "]/ul/li[@class="j-r-list-tool-l-up"]/span/text()')[0]
down = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-l "]/ul/li[@class="j-r-list-tool-l-down "]/span/text()')[0]
share = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-ct"]/div[@class="j-r-list-tool-ct-share-c"]/span/text()')[0]
comment = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-r j-r-list-tool-cc"]/ul/li[@class=" f-tac j-comment j-comment-width j-comment-down-width"]/a/span[@class="comment-counts"]/text()')[0]
# print joke, '=====', like, '====', down, '====', share.replace(u"分享??",""), '=====', comment, '====='
yield joke, like, down, share.replace(u"分享??", ""), comment
hasNext = selector.xpath('//a[@class="pagenxt"]')
if hasNext:
pageNum += 1
else:
nextPage = False
# print pageNum
#大概步骤如下:
#1、创建连接
#mysql+mysqlconnector 或者 mysql+mysqldb mysql代表数据库,mysqlconnector代表连接驱动
engine = create_engine('mysql+mysqlconnector://root:root@192.168.216.7:3306/testdb?charset=utf8',echo=True) #echo是否打印sql语句
#2、定义映射
Base = declarative_base() #获取公共类,模板的顶级父类
#定义自己的实体类
class Joke(Base):
__tablename__ = 'jokes'
id = Column(Integer, nullable=True, primary_key=True, index=True)
content = Column(LONGTEXT, nullable=True)
like = Column(Integer, nullable=False)
down = Column(Integer, nullable=False)
share = Column(Integer, nullable=False)
comment = Column(Integer, nullable=False)
#3、映射实例化,创建数据库表
Base.metadata.create_all(engine)
#4、创建会话
Session = sessionmaker(engine)
session = Session()
#5、对象实例持久化
if __name__ == '__main__':
for i,(joke, like, down, share, comment) in enumerate(getJokeOfAllList()):
#print i,joke, like, down, share, comment
#print share.encode("utf-8").replace('??',"")
joke = Joke(content=joke.encode("utf-8"), like=int(like), down=int(down), share=int(share.encode("utf-8").replace('??',"")), comment=int(comment))
print joke.content, joke.like, joke.down, joke.share, joke.comment
session.add(joke)
if i % 10 == 0:
print i
session.commit()
--------------------------------------------------------------