python爬虫初识、python操作MySQL数据库、python对外提供接口

python爬虫初识、python操作MySQL数据库、python对外提供接口

本篇重点

python爬虫初识、python操作MySQL数据库、python对外提供接口。

具体案例:爬取小说实现小说管理系统

代码实现:

1、爬取小说 并存到mysql数据库

book_db.py

# 导入requests库
# -*- coding:utf-8 -*-
import requests
# 导入文件操作库
from bs4 import BeautifulSoup
from tornado.escape import json_encode
import sys
from mysql_DBUtils import MyPymysqlPool
import importlib
importlib.reload(sys)

# 给请求指定一个请求头来模拟chrome浏览器
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
server = 'http://www.biquge.cm'
# 小说地址
book = 'http://www.biquge.cm/2/2042/'
# 定义DB
mysql = MyPymysqlPool("dbMysql")


# 获取章节内容
def get_contents(chapter):
    req = requests.get(url=chapter)
    html = req.content
    html_doc = str(html, 'gbk')
    bf = BeautifulSoup(html_doc, 'html.parser')
    texts = bf.find_all('div', id="content")
    # 获取div标签id属性content的内容 \xa0 是不间断空白符  
    content = texts[0].text.replace('\xa0' * 4, '\n')
    return content


# 写入数据库
def write_db(chapter, content):
    sql = "INSERT INTO novel (title, content) VALUES(%(title)s, %(content)s);"
    param = {"title": chapter, "content": content}
    mysql.insert(sql, param)
# 根据章节获得项目内容
def get_message_by_chapter(chapter):
    sql = "select * from novel where title = %(title)s"
    param = {"title":chapter}
    result = mysql.getOne(sql,param)
    contentMessage = result['content'].decode()
    return contentMessage
# 获得所有章节
def get_titles():
    sql = "select title from novel"
    result = mysql.getAll(sql)
    list = []
    for i in result:
        list.append(i['title'].decode())
    return list

数据库配置

dbMysqlConfig.cnf

[dbMysql]
host = localhost
port = 3306
user = root
password = root
db_name = book

mysql_DBUtils.py

#!/usr/bin/python3
# -*- coding:utf-8 -*-
import pymysql, os, configparser
from pymysql.cursors import DictCursor
from DBUtils.PooledDB import PooledDB
import json


class Config(object):
    """
    # Config().get_content("user_information")
    配置文件里面的参数
    [dbMysql]
    host = 192.168.1.101
    port = 3306
    user = root
    password = python123
    """

    def __init__(self, config_filename="dbMysqlConfig.cnf"):
        file_path = os.path.join(os.path.dirname(__file__), config_filename)
        self.cf = configparser.ConfigParser()
        self.cf.read(file_path)

    def get_sections(self):
        return self.cf.sections()

    def get_options(self, section):
        return self.cf.options(section)

    def get_content(self, section):
        result = {}
        for option in self.get_options(section):
            value = self.cf.get(section, option)
            result[option] = int(value) if value.isdigit() else value
        return result


class BasePymysqlPool(object):
    def __init__(self, host, port, user, password, db_name):
        self.db_host = host
        self.db_port = int(port)
        self.user = user
        self.password = str(password)
        self.db = db_name
        self.conn = None
        self.cursor = None


class MyPymysqlPool(BasePymysqlPool):
    """
    MYSQL数据库对象,负责产生数据库连接 , 此类中的连接采用连接池实现
        获取连接对象:conn = Mysql.getConn()
        释放连接对象;conn.close()或del conn
    """
    # 连接池对象
    __pool = None

    def __init__(self, conf_name=None):
        self.conf = Config().get_content(conf_name)
        super(MyPymysqlPool, self).__init__(**self.conf)
        # 数据库构造函数,从连接池中取出连接,并生成操作游标
        self._conn = self.__getConn()
        self._cursor = self._conn.cursor()

    def __getConn(self):
        """
        @summary: 静态方法,从连接池中取出连接
        @return MySQLdb.connection
        """
        if MyPymysqlPool.__pool is None:
            __pool = PooledDB(creator=pymysql,
                              mincached=1,
                              maxcached=20,
                              host=self.db_host,
                              port=self.db_port,
                              user=self.user,
                              passwd=self.password,
                              db=self.db,
                              use_unicode=False,
                              charset="utf8",
                              cursorclass=DictCursor)
        return __pool.connection()

    def getAll(self, sql, param=None):
        """
        @summary: 执行查询,并取出所有结果集
        @param sql:查询SQL,如果有查询条件,请只指定条件列表,并将条件值使用参数[param]传递进来
        @param param: 可选参数,条件列表值(元组/列表)
        @return: result list(字典对象)/boolean 查询到的结果集
        """
        if param is None:
            count = self._cursor.execute(sql)
        else:
            count = self._cursor.execute(sql, param)
        if count > 0:
            result = self._cursor.fetchall()
        else:
            result = False
        return result

    def getOne(self, sql, param=None):
        """
        @summary: 执行查询,并取出第一条
        @param sql:查询SQL,如果有查询条件,请只指定条件列表,并将条件值使用参数[param]传递进来
        @param param: 可选参数,条件列表值(元组/列表)
        @return: result list/boolean 查询到的结果集
        """
        if param is None:
            count = self._cursor.execute(sql)
        else:
            count = self._cursor.execute(sql, param)
        if count > 0:
            result = self._cursor.fetchone()
        else:
            result = False
        return result

    def getMany(self, sql, num, param=None):
        """
        @summary: 执行查询,并取出num条结果
        @param sql:查询SQL,如果有查询条件,请只指定条件列表,并将条件值使用参数[param]传递进来
        @param num:取得的结果条数
        @param param: 可选参数,条件列表值(元组/列表)
        @return: result list/boolean 查询到的结果集
        """
        if param is None:
            count = self._cursor.execute(sql)
        else:
            count = self._cursor.execute(sql, param)
        if count > 0:
            result = self._cursor.fetchmany(num)
        else:
            result = False
        return result

    def insertMany(self, sql, values):
        """
        @summary: 向数据表插入多条记录
        @param sql:要插入的SQL格式
        @param values:要插入的记录数据tuple(tuple)/list[list]
        @return: count 受影响的行数
        """
        count = self._cursor.executemany(sql, values)
        return count

    def __query(self, sql, param=None):
        if param is None:
            count = self._cursor.execute(sql)
        else:
            count = self._cursor.execute(sql, param)
        return count

    def update(self, sql, param=None):
        """
        @summary: 更新数据表记录
        @param sql: SQL格式及条件,使用(%s,%s)
        @param param: 要更新的  值 tuple/list
        @return: count 受影响的行数
        """
        return self.__query(sql, param)

    def insert(self, sql, param=None):
        """
        @summary: 更新数据表记录
        @param sql: SQL格式及条件,使用(%s,%s)
        @param param: 要更新的  值 tuple/list
        @return: count 受影响的行数
        """
        return self.__query(sql, param)

    def delete(self, sql, param=None):
        """
        @summary: 删除数据表记录
        @param sql: SQL格式及条件,使用(%s,%s)
        @param param: 要删除的条件 值 tuple/list
        @return: count 受影响的行数
        """
        return self.__query(sql, param)

    def begin(self):
        """
        @summary: 开启事务
        """
        self._conn.autocommit(0)

    def end(self, option='commit'):
        """
        @summary: 结束事务
        """
        if option == 'commit':
            self._conn.commit()
        else:
            self._conn.rollback()

    def dispose(self, isEnd=1):
        """
        @summary: 释放连接池资源
        """
        if isEnd == 1:
            self.end('commit')
        else:
            self.end('rollback')
        self._cursor.close()
        self._conn.close()


# if __name__ == '__main__':
#     mysql = MyPymysqlPool("dbMysql")
#     sqlAll = "select * from novel limit 1"
#     result = mysql.getOne(sqlAll)
#     print(result)
#     # print(result['title'])
#     # print(result['title'].decode('utf-8'))
#     # print(result.decode('utf-8'))
#     # print(chardet.detect(result['title']))
#     data = {
#         'no': 1,
#         'name': 'Runoob',
#         'url': 'http://www.runoob.com'
#     }
#     json_str = json.dumps(result)
#     print(json_str)
#     # 释放资源
#     mysql.dispose()

对外提供api接口

book_api.py

import tornado.ioloop
import tornado.web
from tornado.escape import json_encode
from book_db import get_message_by_chapter,get_titles

class MainHandler(tornado.web.RequestHandler):

    """解决JS跨域请求问题"""
    def set_default_headers(self):
        self.set_header('Access-Control-Allow-Origin', '*')
        self.set_header('Access-Control-Allow-Methods', 'POST, GET')
        self.set_header('Access-Control-Max-Age', 1000)
        self.set_header('Access-Control-Allow-Headers', '*')
        self.set_header('Content-type', 'application/json')

    def get(self):
        """get请求"""

        """根据章节title获得文章内容"""
        searchMessage = self.get_argument('searchMessage')
        data = get_message_by_chapter(searchMessage)
        print(data)
        result = {
            "result": 'success',
            'status': True,
            'code': 200,
            'data': str(data)
        }
        self.write(json_encode(result))

class MainHandler1(tornado.web.RequestHandler):

    """解决JS跨域请求问题"""
    def set_default_headers(self):
        self.set_header('Access-Control-Allow-Origin', '*')
        self.set_header('Access-Control-Allow-Methods', 'POST, GET')
        self.set_header('Access-Control-Max-Age', 1000)
        self.set_header('Access-Control-Allow-Headers', '*')
        self.set_header('Content-type', 'application/json')

    def get(self):
        """get请求"""

        """获得文章title"""
        data = get_titles()
        result = {
            "result": 'success',
            'status': True,
            'code': 200,
            'data': data
        }
        self.write(json_encode(result))

"""接收请求"""
application = tornado.web.Application([(r"/getMessage", MainHandler),(r"/getTitle", MainHandler1)])


if __name__ == "__main__":
    application.listen(8868)
    tornado.ioloop.IOLoop.instance().start()

前台h5代码

index.html

<!DOCTYPE html>
<html lang="zh">
<head>
    <meta charset="UTF-8">
    <meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>test</title>
    <link rel="stylesheet" type="text/css" href="js/layui/css/layui.css">
    <script src="js/jquery-3.1.1.min.js" type="text/javascript"></script>
    <script src="layer-v3.1.1/layer/layer.js" type="text/javascript"></script>
    <script src="js/layui/layui.js" type="text/javascript"></script>
</head>
<body>
<div class="layui-container">
    <div class="layui-form-item" style="margin-top: 20px">
        <select class="layui-select" name="city" id="sel" lay-verify="required"></select>
    </div>
    <div class="layui-input-inline" style="margin-top: 10px;margin-right: 10px;margin-bottom: 10px">
        <button id="a_getmessage" type="button" class="layui-btn layui-btn-sm layui-btn-primary">查看</button>
    </div>
    <div class="layui-form-item layui-form-text">
        <div class="layui-input-block">
            <textarea placeholder="章节内容" style="height: 800px" class="layui-textarea" id="textarea-msg"></textarea>
        </div>
    </div>
</div>
<script src="js/test.js" type="text/javascript"></script>
</body>
</html>

js 部分

test.js

$(function () {
    $.ajax({
        type: 'GET',
        url: "http://localhost:8868/getTitle",
        success: function (res) {
            //获取select
            var sel = document.getElementById('sel');
//因为一会要用str来拼接,所以要先声明一下
            var str = "";
            // console.log(sel);看能否获取到select
//遍历循环,将每一项都拼接到str中
            for(var i=0;i<res.data.length;i++) {
//试着获取data里的每一个值看能否获取到
//拼接str,把data里面的所有值都放在option标签里
                str += '<option>' + res.data[i] + '</option>';
            }
            //把拼接好的str放到select标签里
            sel.innerHTML = str;
        },
        error: function (XMLHttpResponse, textStatus, errorThrown) {
            console.log(XMLHttpResponse.readyState);
            console.log(textStatus.state);
            console.log(errorThrown);
        }
    });
});
$('#a_getmessage').click(function () {
    let messag = $('#chapter').val();
    $('#textarea-msg').val('');
    // if (isNull(messag)) {
    let val = $('#sel option:selected').val();
        $.ajax({
            type: 'GET',
            url: "http://localhost:8868/getMessage",
            data: {"searchMessage": val},
            success: function (res) {
                console.log(res.data);
                $('#textarea-msg').val(res.data);
            },
            error: function (XMLHttpResponse, textStatus, errorThrown) {
                console.log(XMLHttpResponse.readyState);
                console.log(textStatus.state);
                console.log(errorThrown);
            }
        });
    // } else {
    //     layer.msg('查询内容不能为空');
    // }
});

//判断是否为空
function isNull(nameVal) {
    if (nameVal == null || nameVal == "" || nameVal == undefined) {
        return false
    }
    return true
}

最终实现效果截图

在这里插入图片描述
在这里插入图片描述

  • 2
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值