python爬虫初识、python操作MySQL数据库、python对外提供接口
本篇重点
python爬虫初识、python操作MySQL数据库、python对外提供接口。
具体案例:爬取小说实现小说管理系统
代码实现:
1、爬取小说 并存到mysql数据库
book_db.py
# 导入requests库
# -*- coding:utf-8 -*-
import requests
# 导入文件操作库
from bs4 import BeautifulSoup
from tornado.escape import json_encode
import sys
from mysql_DBUtils import MyPymysqlPool
import importlib
importlib.reload(sys)
# 给请求指定一个请求头来模拟chrome浏览器
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
server = 'http://www.biquge.cm'
# 小说地址
book = 'http://www.biquge.cm/2/2042/'
# 定义DB
mysql = MyPymysqlPool("dbMysql")
# 获取章节内容
def get_contents(chapter):
req = requests.get(url=chapter)
html = req.content
html_doc = str(html, 'gbk')
bf = BeautifulSoup(html_doc, 'html.parser')
texts = bf.find_all('div', id="content")
# 获取div标签id属性content的内容 \xa0 是不间断空白符
content = texts[0].text.replace('\xa0' * 4, '\n')
return content
# 写入数据库
def write_db(chapter, content):
sql = "INSERT INTO novel (title, content) VALUES(%(title)s, %(content)s);"
param = {"title": chapter, "content": content}
mysql.insert(sql, param)
# 根据章节获得项目内容
def get_message_by_chapter(chapter):
sql = "select * from novel where title = %(title)s"
param = {"title":chapter}
result = mysql.getOne(sql,param)
contentMessage = result['content'].decode()
return contentMessage
# 获得所有章节
def get_titles():
sql = "select title from novel"
result = mysql.getAll(sql)
list = []
for i in result:
list.append(i['title'].decode())
return list
数据库配置
dbMysqlConfig.cnf
[dbMysql]
host = localhost
port = 3306
user = root
password = root
db_name = book
mysql_DBUtils.py
#!/usr/bin/python3
# -*- coding:utf-8 -*-
import pymysql, os, configparser
from pymysql.cursors import DictCursor
from DBUtils.PooledDB import PooledDB
import json
class Config(object):
"""
# Config().get_content("user_information")
配置文件里面的参数
[dbMysql]
host = 192.168.1.101
port = 3306
user = root
password = python123
"""
def __init__(self, config_filename="dbMysqlConfig.cnf"):
file_path = os.path.join(os.path.dirname(__file__), config_filename)
self.cf = configparser.ConfigParser()
self.cf.read(file_path)
def get_sections(self):
return self.cf.sections()
def get_options(self, section):
return self.cf.options(section)
def get_content(self, section):
result = {}
for option in self.get_options(section):
value = self.cf.get(section, option)
result[option] = int(value) if value.isdigit() else value
return result
class BasePymysqlPool(object):
def __init__(self, host, port, user, password, db_name):
self.db_host = host
self.db_port = int(port)
self.user = user
self.password = str(password)
self.db = db_name
self.conn = None
self.cursor = None
class MyPymysqlPool(BasePymysqlPool):
"""
MYSQL数据库对象,负责产生数据库连接 , 此类中的连接采用连接池实现
获取连接对象:conn = Mysql.getConn()
释放连接对象;conn.close()或del conn
"""
# 连接池对象
__pool = None
def __init__(self, conf_name=None):
self.conf = Config().get_content(conf_name)
super(MyPymysqlPool, self).__init__(**self.conf)
# 数据库构造函数,从连接池中取出连接,并生成操作游标
self._conn = self.__getConn()
self._cursor = self._conn.cursor()
def __getConn(self):
"""
@summary: 静态方法,从连接池中取出连接
@return MySQLdb.connection
"""
if MyPymysqlPool.__pool is None:
__pool = PooledDB(creator=pymysql,
mincached=1,
maxcached=20,
host=self.db_host,
port=self.db_port,
user=self.user,
passwd=self.password,
db=self.db,
use_unicode=False,
charset="utf8",
cursorclass=DictCursor)
return __pool.connection()
def getAll(self, sql, param=None):
"""
@summary: 执行查询,并取出所有结果集
@param sql:查询SQL,如果有查询条件,请只指定条件列表,并将条件值使用参数[param]传递进来
@param param: 可选参数,条件列表值(元组/列表)
@return: result list(字典对象)/boolean 查询到的结果集
"""
if param is None:
count = self._cursor.execute(sql)
else:
count = self._cursor.execute(sql, param)
if count > 0:
result = self._cursor.fetchall()
else:
result = False
return result
def getOne(self, sql, param=None):
"""
@summary: 执行查询,并取出第一条
@param sql:查询SQL,如果有查询条件,请只指定条件列表,并将条件值使用参数[param]传递进来
@param param: 可选参数,条件列表值(元组/列表)
@return: result list/boolean 查询到的结果集
"""
if param is None:
count = self._cursor.execute(sql)
else:
count = self._cursor.execute(sql, param)
if count > 0:
result = self._cursor.fetchone()
else:
result = False
return result
def getMany(self, sql, num, param=None):
"""
@summary: 执行查询,并取出num条结果
@param sql:查询SQL,如果有查询条件,请只指定条件列表,并将条件值使用参数[param]传递进来
@param num:取得的结果条数
@param param: 可选参数,条件列表值(元组/列表)
@return: result list/boolean 查询到的结果集
"""
if param is None:
count = self._cursor.execute(sql)
else:
count = self._cursor.execute(sql, param)
if count > 0:
result = self._cursor.fetchmany(num)
else:
result = False
return result
def insertMany(self, sql, values):
"""
@summary: 向数据表插入多条记录
@param sql:要插入的SQL格式
@param values:要插入的记录数据tuple(tuple)/list[list]
@return: count 受影响的行数
"""
count = self._cursor.executemany(sql, values)
return count
def __query(self, sql, param=None):
if param is None:
count = self._cursor.execute(sql)
else:
count = self._cursor.execute(sql, param)
return count
def update(self, sql, param=None):
"""
@summary: 更新数据表记录
@param sql: SQL格式及条件,使用(%s,%s)
@param param: 要更新的 值 tuple/list
@return: count 受影响的行数
"""
return self.__query(sql, param)
def insert(self, sql, param=None):
"""
@summary: 更新数据表记录
@param sql: SQL格式及条件,使用(%s,%s)
@param param: 要更新的 值 tuple/list
@return: count 受影响的行数
"""
return self.__query(sql, param)
def delete(self, sql, param=None):
"""
@summary: 删除数据表记录
@param sql: SQL格式及条件,使用(%s,%s)
@param param: 要删除的条件 值 tuple/list
@return: count 受影响的行数
"""
return self.__query(sql, param)
def begin(self):
"""
@summary: 开启事务
"""
self._conn.autocommit(0)
def end(self, option='commit'):
"""
@summary: 结束事务
"""
if option == 'commit':
self._conn.commit()
else:
self._conn.rollback()
def dispose(self, isEnd=1):
"""
@summary: 释放连接池资源
"""
if isEnd == 1:
self.end('commit')
else:
self.end('rollback')
self._cursor.close()
self._conn.close()
# if __name__ == '__main__':
# mysql = MyPymysqlPool("dbMysql")
# sqlAll = "select * from novel limit 1"
# result = mysql.getOne(sqlAll)
# print(result)
# # print(result['title'])
# # print(result['title'].decode('utf-8'))
# # print(result.decode('utf-8'))
# # print(chardet.detect(result['title']))
# data = {
# 'no': 1,
# 'name': 'Runoob',
# 'url': 'http://www.runoob.com'
# }
# json_str = json.dumps(result)
# print(json_str)
# # 释放资源
# mysql.dispose()
对外提供api接口
book_api.py
import tornado.ioloop
import tornado.web
from tornado.escape import json_encode
from book_db import get_message_by_chapter,get_titles
class MainHandler(tornado.web.RequestHandler):
"""解决JS跨域请求问题"""
def set_default_headers(self):
self.set_header('Access-Control-Allow-Origin', '*')
self.set_header('Access-Control-Allow-Methods', 'POST, GET')
self.set_header('Access-Control-Max-Age', 1000)
self.set_header('Access-Control-Allow-Headers', '*')
self.set_header('Content-type', 'application/json')
def get(self):
"""get请求"""
"""根据章节title获得文章内容"""
searchMessage = self.get_argument('searchMessage')
data = get_message_by_chapter(searchMessage)
print(data)
result = {
"result": 'success',
'status': True,
'code': 200,
'data': str(data)
}
self.write(json_encode(result))
class MainHandler1(tornado.web.RequestHandler):
"""解决JS跨域请求问题"""
def set_default_headers(self):
self.set_header('Access-Control-Allow-Origin', '*')
self.set_header('Access-Control-Allow-Methods', 'POST, GET')
self.set_header('Access-Control-Max-Age', 1000)
self.set_header('Access-Control-Allow-Headers', '*')
self.set_header('Content-type', 'application/json')
def get(self):
"""get请求"""
"""获得文章title"""
data = get_titles()
result = {
"result": 'success',
'status': True,
'code': 200,
'data': data
}
self.write(json_encode(result))
"""接收请求"""
application = tornado.web.Application([(r"/getMessage", MainHandler),(r"/getTitle", MainHandler1)])
if __name__ == "__main__":
application.listen(8868)
tornado.ioloop.IOLoop.instance().start()
前台h5代码
index.html
<!DOCTYPE html>
<html lang="zh">
<head>
<meta charset="UTF-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>test</title>
<link rel="stylesheet" type="text/css" href="js/layui/css/layui.css">
<script src="js/jquery-3.1.1.min.js" type="text/javascript"></script>
<script src="layer-v3.1.1/layer/layer.js" type="text/javascript"></script>
<script src="js/layui/layui.js" type="text/javascript"></script>
</head>
<body>
<div class="layui-container">
<div class="layui-form-item" style="margin-top: 20px">
<select class="layui-select" name="city" id="sel" lay-verify="required"></select>
</div>
<div class="layui-input-inline" style="margin-top: 10px;margin-right: 10px;margin-bottom: 10px">
<button id="a_getmessage" type="button" class="layui-btn layui-btn-sm layui-btn-primary">查看</button>
</div>
<div class="layui-form-item layui-form-text">
<div class="layui-input-block">
<textarea placeholder="章节内容" style="height: 800px" class="layui-textarea" id="textarea-msg"></textarea>
</div>
</div>
</div>
<script src="js/test.js" type="text/javascript"></script>
</body>
</html>
js 部分
test.js
$(function () {
$.ajax({
type: 'GET',
url: "http://localhost:8868/getTitle",
success: function (res) {
//获取select
var sel = document.getElementById('sel');
//因为一会要用str来拼接,所以要先声明一下
var str = "";
// console.log(sel);看能否获取到select
//遍历循环,将每一项都拼接到str中
for(var i=0;i<res.data.length;i++) {
//试着获取data里的每一个值看能否获取到
//拼接str,把data里面的所有值都放在option标签里
str += '<option>' + res.data[i] + '</option>';
}
//把拼接好的str放到select标签里
sel.innerHTML = str;
},
error: function (XMLHttpResponse, textStatus, errorThrown) {
console.log(XMLHttpResponse.readyState);
console.log(textStatus.state);
console.log(errorThrown);
}
});
});
$('#a_getmessage').click(function () {
let messag = $('#chapter').val();
$('#textarea-msg').val('');
// if (isNull(messag)) {
let val = $('#sel option:selected').val();
$.ajax({
type: 'GET',
url: "http://localhost:8868/getMessage",
data: {"searchMessage": val},
success: function (res) {
console.log(res.data);
$('#textarea-msg').val(res.data);
},
error: function (XMLHttpResponse, textStatus, errorThrown) {
console.log(XMLHttpResponse.readyState);
console.log(textStatus.state);
console.log(errorThrown);
}
});
// } else {
// layer.msg('查询内容不能为空');
// }
});
//判断是否为空
function isNull(nameVal) {
if (nameVal == null || nameVal == "" || nameVal == undefined) {
return false
}
return true
}
最终实现效果截图