python爬虫需要mysql_python 将爬虫内容保存到mysql中

二十五、python将爬取的内容保存到mysql

--------------------------------------------------------------

#sqlAlchemy的介绍

from sqlalchemy import create_engine

from sqlalchemy.ext.declarative import declarative_base

from sqlalchemy import Column, Integer, String, DateTime, ForeignKey

from sqlalchemy.orm import sessionmaker, relationship, backref

from sqlalchemy.dialects.mysql import LONGTEXT

import requests

from lxml import etree

# 获取段子内容、赞、分享、收藏数

def getJokeOfAllList(basurl='http://www.budejie.com/text/{0}'):

nextPage = True

pageNum = 1

while nextPage:

url = basurl.format(pageNum)

response = requests.get(url)

selector = etree.HTML(response.text)

all = selector.xpath('//*/div[@class="j-r-list"]/ul/li')

for a in all:

joke = a.xpath('div[@class="j-r-list-c"]/div[@class="j-r-list-c-desc"]/a/text()')[0]

like = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-l "]/ul/li[@class="j-r-list-tool-l-up"]/span/text()')[0]

down = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-l "]/ul/li[@class="j-r-list-tool-l-down "]/span/text()')[0]

share = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-ct"]/div[@class="j-r-list-tool-ct-share-c"]/span/text()')[0]

comment = a.xpath('div[@class="j-r-list-tool"]/div[@class="j-r-list-tool-r j-r-list-tool-cc"]/ul/li[@class=" f-tac j-comment j-comment-width  j-comment-down-width"]/a/span[@class="comment-counts"]/text()')[0]

# print joke, '=====', like, '====', down, '====', share.replace(u"分享??",""), '=====', comment, '====='

yield joke, like, down, share.replace(u"分享??", ""), comment

hasNext = selector.xpath('//a[@class="pagenxt"]')

if hasNext:

pageNum += 1

else:

nextPage = False

# print pageNum

#大概步骤如下:

#1、创建连接

#mysql+mysqlconnector 或者mysql+mysqldb    mysql代表数据库,mysqlconnector代表连接驱动

engine = create_engine('mysql+mysqlconnector://root:root@192.168.216.7:3306/testdb?charset=utf8',echo=True) #echo是否打印sql语句

#2、定义映射

Base = declarative_base()  #获取公共类,模板的顶级父类

#定义自己的实体类

class Joke(Base):

__tablename__ = 'jokes'

id = Column(Integer, nullable=True, primary_key=True, index=True)

content = Column(LONGTEXT, nullable=True)

like = Column(Integer, nullable=False)

down = Column(Integer, nullable=False)

share = Column(Integer, nullable=False)

comment = Column(Integer, nullable=False)

#3、映射实例化,创建数据库表

Base.metadata.create_all(engine)

#4、创建会话

Session = sessionmaker(engine)

session = Session()

#5、对象实例持久化

if __name__ == '__main__':

for i,(joke, like, down, share, comment) in enumerate(getJokeOfAllList()):

#print i,joke, like, down, share, comment

#print share.encode("utf-8").replace('??',"")

joke = Joke(content=joke.encode("utf-8"), like=int(like), down=int(down), share=int(share.encode("utf-8").replace('??',"")), comment=int(comment))

print joke.content, joke.like, joke.down, joke.share, joke.comment

session.add(joke)

if i % 10 == 0:

print i

session.commit()

--------------------------------------------------------------

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值