1. MySQL
1.1pymysql
- 建立数据库连接db =
pymysql.connect(...)
- 参数host:连接的mysql主机,如果本机是’127.0.0.1’
- 参数port:连接的mysql主机的端口,默认是3306
- 参数database:数据库的名称
- 参数user:连接的用户名
- 参数password:连接的密码
- 参数charset:通信采用的编码方式,推荐使用utf8
- 创建游标对象cur = db.cursor()
- 游标方法: cur.execute(“insert …”)
- 提交到数据库或者获取数据 : db.commit()
- 关闭游标对象 :cur.close()
- 断开数据库连接 :db.close()
# -*- coding: utf-8 -*-
import re
import requests
import pymysql
db = pymysql.connect(
host="localhost",
user="root",
password="1314",
charset="utf8",
database="xiaoyi"
)
cursor = db.cursor() # cursor为光标
db_data = '''create table if not exists movies(
title varchar(20) primary key,
roles varchar(100) null,
timer varchar(20) null
);'''
cursor.execute(db_data) # 创建表数据结构
class MaoYanSpider:
def __init__(self):
self.url = 'https://maoyan.com/board/4?offset=0'
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
}
def get_html(self, url):
"""请求的功能函数"""
response = requests.get(url=url, headers=self.headers)
return response.text
def parse_html(self, html):
"""提取数据"""
regex = '<div class="movie-item-info">.*?title="(.*?)".*?<p class="star">(.*?)</p>.*?<p class="releasetime">(.*?)</p>'
r_list = re.findall(regex, html, re.S)
self.save(r_list)
def save(self, datas):
for data in datas:
li = [
data[0].strip(),
data[1].strip(),
data[2].strip()
]
print(li)
detail_info = "insert into movies values(%s,%s,%s);"
# 存入数据
cursor.execute(detail_info, li)
# 提交事务
db.commit()
def crawl(self):
"""程序的入口函数"""
html = self.get_html(url=self.url)
self.parse_html(html)
spider = MaoYanSpider()
spider.crawl()
1.2 peewee
peewee是Python编程语言下的一款ORM
框架。O是object,也就是对象的意思,R是relation,翻译成中文是关系,也就是关系数据库中数据表的意思,M是mapping,是映射的意思。在ORM
框架中,它帮我们把类和数据表进行了一个映射,可以让我们通过类和类对象就能操作它所对应的表中的数据。ORM
框架还有一个功能,它可以根据我们设计的类自动帮我们生成数据库中的表,省去了我们自己建表的过程。
安装:pip install peewee
from peewee import *
db = MySQLDatabase("spider", host="127.0.0.1", port=3306, user="root", password="123456")
class Person(Model):
name = CharField(max_length=20)
birthday = DateField(null=True)
class Meta:
database = db # This model uses the "people.db" database.
if __name__ == '__main__':
from datetime import date
db.create_tables([Person])
字段类型 | MySQL |
---|---|
BigIntegerField | bigint |
IntegerField | int |
SmallIntegerField | smallint |
FloatField | Float |
DoubleField | Double |
DecimalField | Decimal |
CharField | varchar |
FixedCharField | char |
TextField | text |
BlobField | blob |
DateTimeField | DateTime |
DateField | Date |
TimeField | Time |
2. Excel
python内置模块中是没有提供处理Excel文件的模块,想要在python中操作Excel是需要安装第三方模块openpyxl
,这个模块中集成了python操作Excel的相关功能。
pip install openpyxl
from openpyxl import load_workbook
# 拿到Excel
workbook = load_workbook("pyxl.xlsx")
# 获取sheet
# 1 获取到所有的sheet名
# print(workbook.sheetnames)
# 2. 选择sheet
# sheet = workbook["Sheet1"]
# print(sheet)
# 基于索引获取
sheet = workbook.worksheets[0]
# print(sheet)
#
#
# for name in workbook.sheetnames:
# sheet = workbook[name]
# cell = sheet.cell(1, 1)
# print(cell.value)
# 获取到单元格
# cell = sheet.cell(1,1)
# cell = sheet["c1"]
# print(cell.value)
#
# for row in sheet.rows:
# print(row[1].value)