爬虫数据存储-MySQL

数据存储-MySQL

MySQL 安装

安装与启动

安装教程:https://www.shouxieziti.cn/27044.html
下载地址:https://downloads.mysql.com/archives/installer/
版本:8.0.17

net start mysql 启动 mysql 服务器
net stop mysql 关闭 mysql 服务器

mysql -u root -p  #输入后填写密码145***189登录
mysqladmin -u root -p password #修改mysql登录密码
exit; # 退出mysql

#在shell中的命令

select version(); #查看版本 
select now(); # 查看当前时间
#端口号 3306

create database aaa; # '创建库'
show databases; #'显示所有的库'
use aaa;  # '使用库'
show tables;  # '显示当前库下所有表信息'
alert table stu engine=innodb  #修改引擎不支持事务
select database(); #查看当前使用的数据库
drop database aaa; #删除库

概念简介

E_R模型: Entity实体(表)   Relationship关系(一对一,一对多,多对多)
三范式:  列不可拆分;唯一标志;引用主键
数据的完整性:
	数据类型:int,decimal;char,varchar,text;datetime;bit
	约束:主键(primary key);非空(not null);唯一(unique);默认(default);外键(foreign)
	逻辑删除与物理删除

关系:一对一,一对多 ,多对多    不建议表与表循环闭合的关系

'约束关系'
多表约束,产生关系,若有对应关联表数据,就会报错(逻辑删除解决此问题)
constraint
cascade 级联关系(删除,所有关联的表全部删除)
restrict限制关系
set null外键制空
not action 没有

MySQL基本操作

#端口号 3306
"""数据库操作"""
create database; # 创建库aaa,
create database aaa charset utf8; # 创建库aaa,编码格式为utf8
show databases; # 显示所有的库
use aaa;  # 切换数据库
select database(); #查看当前使用的数据库
drop database aaa; #删除库

"""表操作"""
show tables;  # 显示当前库下所有表信息

create table animal(  #添加表
	id int auto_increment primary key,
    name varchar(10) not null,
    gender bit not null
); 

alter table animal add birthday datetime; #向表中添加列

rename table animal to dog; # 修改表名
show create table dog; #查看建表的命令
desc dog; #查看表列的设置

drop table dog # 删表

"""数据操作"""
insert into dog values(2,'banana',1);#向表中插入数据
insert into dog(name,gender) values('cite',1); #缺省插入数据
insert into dog values(4,'day',0),(5,'easy',1); #向表中插入多条数据
insert into dog(name,gender) values('fifth',0),('giggle',1); #缺省多条数据

select * from dog #查看表内所有数据

update dog set name = 'zigzag' where id = 4;  #修改数据

delete from dog where id = 4;# 删除数据

MySQL查询操作

""" 创建表格 """
create table stu(
    id int auto_increment primary key,
    name varchar(10) not null,
    birthday datetime,
    gender bit default 0,
    isdelete bit default 0,
    address varchar(100),
    score int
);

""" 添加数据 """
insert into stu values(1,'小妹','2018-01-01',0,0,'北京',90),
(2,'信息','2017-01-01',0,0,'上海',80),
(3,'订单','2016-01-01',0,0,'上海',90),
(4,'哈哈','2015-01-01',1,0,'上海',67),
(5,'让人','2018-01-01',0,0,'北京',56),
(6,'解决','2017-01-01',1,0,null,114),
(7,'啊啊','2017-01-01',0,0,null,98),
(8,'头条','2016-01-01',0,0,'天津',86),
(9,'色粉','2016-01-01',1,0,'山东',55);

""" 条件运算 """
# >;  <;  =;  >=;  <=;  !=;
select * from stu where id > 7;
select id,name from stu where id >= 7;
select id,name from stu where id != 7;
select id,name from stu where name <> '啊啊';  #不等于

""" 逻辑运算 """
# and; or; not in;
select id,name from stu where id > 3 and gender = 1;
select id,name from stu where id < 3 or gender = 1;

""" 模糊查询 """
# %多个字符  _一个字符
select id,name,birthday from stu where birthday like '2018%';
select id,name,birthday from stu where name like '头%' or name like '%小%';
select id,name,birthday from stu where birthday like '2_18-01-01%';

""" 范围查询 """
# in ; not in; between
select * from stu where id in(1,3,5,7);
select * from stu where id not in(1,3,5,7);
select * from stu where id between 1 and 5; #1-5间
select * from stu where id between 1 and 5 and gender=1; #1-5间
select * from stu where id in(1,3,4,5,7) and gender=1;
select * from stu where id between (1,3) and (5,7); # 不行

""" 判断空 """
# null ; not null
select * from stu where address is null;
select * from stu where address is not null and gender = 0;

""" 查询符号优先级 """
# () not 条件 and or

""" 聚合查询 """
# count max min sum avg
select count(*) from stu; # 统计个数
select count(*) from stu where gender = 0; # 统计个数
select max(score) from stu; # 最大值
select min(score) from stu; # 最小值
select sum(score) from stu where gender = 0; # 求和
select avg(score) from stu where gender = 0; # 平均值

""" 分组查询 """
# group by; having
select gender,count(*) from stu group by gender;#性别分组后的ge'shu
select gender as '性别',count(*) from stu group by gender; #起别名
select gender,count(*) from stu group by gender having gender=0; #分组后再筛选
select gender,max(score) from stu group by gender; #分组后的总分

""" 排序 """
# order by
select * from stu where gender = 0 order by score; #升序
select * from stu where gender = 0 order by score desc; #降序

""" 分页 """
#limit
select * from stu limit 0,3; #限制个数  实际:限制每页数据个数

""" 去重 """
select distinct birthday from stu; # 查看生日字段的范围

MySQL多表

使用展示-建多表

"""建表"""

create table sub(
    id int auto_increment primary key not null,
    title varchar(10) not null
);

create table stu(
    id int auto_increment primary key,
    name varchar(20) not null,
    birthday datetime,
    gender bit default 0,
    isdelete bit default 0,
    address varchar(100),
    score int(10)
);

create table scores(
    id int auto_increment primary key,
    score decimal(5,2),
    stuid int,
    subid int,
    foreign key(stuid) references stu(id),
    foreign key(subid) references sub(id)   
);


"""插入数据"""
insert into sub values(0,"语文"),(0,"数学"),(0,"英语"),(0,"科学");

insert into stu values(1,'小妹','2018-01-01',0,0,'北京',90),
(2,'信息','2017-01-01',0,0,'上海',80),
(3,'订单','2016-01-01',0,0,'上海',90),
(4,'哈哈','2015-01-01',1,0,'上海',67),
(5,'让人','2018-01-01',0,0,'北京',56),
(6,'解决','2017-01-01',1,0,null,114),
(7,'啊啊','2017-01-01',0,0,null,98),
(8,'头条','2016-01-01',0,0,'天津',86),
(9,'色粉','2016-01-01',1,0,'山东',55);

insert into scores values(0,80,9,1),(0,56,9,4),(0,90.6,9,3),(0,55,6,2),(0,44,1,1),(0,80,4,3),(0,45,5,1),(0,85,2,2);

链接查询

inner join 多表查询
left join 左链接
right join  右链接

#学生每科的成绩 inner join
select stu.name,sub.title,scores.score
from scores
inner join stu on scores.stuid = stu.id
inner join sub on scores.subid = sub.id
;
#学生每科的成绩 left join
select sub.title,scores.score
from sub
left join scores on scores.subid = sub.id
;

#学生每科的成绩 right join
select sub.title,scores.score
from sub
right join scores on scores.subid = sub.id
;

#学生的平均分 
select stu.name,avg(scores.score)
from scores
inner join stu on scores.stuid = stu.id
group by stu.name
;

#男生的总分
select stu.name,sum(scores.score)
from scores
inner join stu on scores.stuid = stu.id
where stu.gender = 1
group by stu.name
;


#科目的总分
select sub.title,sum(scores.score)
from scores
inner join sub on scores.subid = sub.id
group by sub.title
;

#部分学生的最高分
select stu.name,avg(scores.score),max(scores.score)
from scores
inner join stu on scores.stuid = stu.id
where stu.id<5
group by stu.name
;

MySQL视图

view:封装sql语句
# 创建视图(创建后相当于table)
create view abcd as
select stu.name,scores.score 
from scores
inner join stu on scores.stuid = stu.id;

select * from abcd; # 查询视图
drop view abcd # 删除视图

MySQL自关联

省--城市--区--街道   联动查找   (数据较少无需4张表)
China   id name pid  
"""创建表"""
create table china(
	id int primary key,
	name varchar(100),
	pid int,
	foreign key(pid) references china(id)
	);
	
"""导SQLIT的文件数据"""
source 10china.sql;  #  切换路径至文件所在   乱码 -->文件转为ASCII

select * from china where pid is NULL;# 查询省份

#云南有多少市
select city.* from china as city
inner join china as pro on city.pid = pro.id
where pro.name = "云南省";

#石家庄市下的三级行政区
select dis.* from china as dis
inner join china as city on dis.pid = city.id
where city.name = "石家庄市";

MySQL事物处理

- 四大特性(ACID)
    A   原子性
  	C   一致性
	I    隔离性
	D   持久性
-原理:
	begin开缓存区,commit存入数据库生效
	
alert table stu engine=innodb  #修改引擎不支持事务
# 开启事务 begin  相当于开了缓存区 仅在当前服务器生效
# 提交事务  commit
# 事务回滚 rollback 缓存中返回上一步
insert into sub values(0,'math');

MySQL效率问题

-索引查询  效率最高
-数据库的数据类型越小越好
-数据库的数据能用整型
-不适用NULL,用0/特殊符号代替
-创建索引 create index aaa on stu(name);
-查看索引  show index from stu ;
-删除所有 delete index aaa on stu
-验证时间 set Profiling=1;内容查询内容;show profiles;

MySQL与python

# 下载包
# pip install pymysql -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com

import pymysql
try:
    # 链接数据库
    conn  = pymysql.Connect(
        host='localhost',
        port=3306,
        db='aaa',
        user='root',
        password='145***189',
        charset='utf8'

    )
    # 建库,建表,数据操作
    cur = conn.cursor()  #创建游标对象
    # sql = "insert into sub values (0,'ggg')"#增加一条数据
    # sql = "update sub set title='123' where id=1" #修改数据
    # sql = 'delete from sub where id=8'#删除数据

    sql = "select * from sub" # 查询数据
    result = cur.execute(sql)
    result = cur.fetchall()
    print(result)

    #提交事务
    conn.commit()
    #关闭游标
    cur.close()
    # 取消数据库链接
    conn.close()
except Exception as e:
    print(str(e))

MySQL备份与恢复

# 备份
mysqldump -u root -p aaa scores > C:\Users\jenny\Desktop\scores.sql
# 恢复
mysql -u root -p day70 <  C:\Users\jenny\Desktop\scores.sql
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
当然可以!以下是一个简单的示例代码,用于将爬取到的数据存储MySQL 数据库中: ```python # 导入所需的库 import pymysql # 连接到 MySQL 数据库 connection = pymysql.connect( host='localhost', user='your_username', password='your_password', db='your_database_name', charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor ) # 创建表(如果不存在) create_table_query = ''' CREATE TABLE IF NOT EXISTS `spider_data` ( `id` INT AUTO_INCREMENT PRIMARY KEY, `title` VARCHAR(255), `content` TEXT ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; ''' with connection.cursor() as cursor: cursor.execute(create_table_query) connection.commit() # 将爬取到的数据插入到数据库中 def insert_data(title, content): insert_query = 'INSERT INTO `spider_data` (`title`, `content`) VALUES (%s, %s)' with connection.cursor() as cursor: cursor.execute(insert_query, (title, content)) connection.commit() # 爬虫代码部分(这里只是简单示例) def spider(): # 爬取数据的过程... title = "示例标题" content = "示例内容" # 将数据插入数据库 insert_data(title, content) # 执行爬虫代码 spider() # 关闭数据库连接 connection.close() ``` 请注意,这只是一个简单的示例,实际情况可能会根据你的具体需求而有所变化。确保你已经安装了 `pymysql` 库,并根据你的实际情况修改连接数据库的参数、创建表的语句和爬虫代码部分。希望对你有所帮助!
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值