运行环境:Windows 10
技术栈:python3,MySQL8.x
编写目的
MySQL是我们研究开发时最常用的关系型数据库。当需要向MySQL数据库中插入大量数据时,一次一条地插入显然不得劲。本文将介绍如何使用Python一键为MySQL插入万条数据。
效果展示
只需要运行一下python脚本,就能达到下面的样子。
MySQL数据库表
我的user表是这样设计的:
字段名 | 类型 | 说明 |
---|---|---|
id | int | 用户ID,自增长主键 |
username | varchar | 用户名 |
password | varchar | 密码 |
name | varchar | 真实姓名 |
varchar | 电子邮件 | |
phone | varchar | 手机号码 |
address | varchar | 地址 |
imgUrl | varchar | 头像图片URL |
create_time | timestamp | 用户创建时间 |
当然,sql语句也有:
CREATE TABLE `user` (
`id` bigint NOT NULL AUTO_INCREMENT COMMENT '主键',
`username` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '用户名',
`password` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NOT NULL COMMENT '密码',
`name` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '姓名',
`email` varchar(64) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '邮箱',
`phone` varchar(20) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '联系电话',
`address` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '住址',
`imgUrl` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci DEFAULT NULL COMMENT '头像位置',
`create_time` timestamp NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT '注册时间',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=2056 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
除了user表以外,还有一个area表,专门存放中国的地址区域的,下载地址:https://download.csdn.net/download/qq_52835677/88122670
如果下载不了,私信找我要。
Python脚本详解
1.用到的python库
xpinyin模块是一个Python第三方库,用于将汉字转换为拼音。需要安装一下:
pip install xpinyin
其他的库在python3里都自带的。总共就用到了这些:
import datetime
import pymysql
import random
import string
from xpinyin import Pinyin
2.配置MySQL连接信息
要使用pymysql库连接到MySQL数据库,我们需要配置下面的信息:
conn = pymysql.connect(
host='localhost',
port=3306,
user='root',
password='123456',
database='vs718'
)
这些信息要根据自己电脑里安装的MySQL做更改。
3.编写方法,生成随机内容
- name 真实姓名
def random_chinese_name():
chars = '赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦尤许何吕施张孔曹严华金魏陶姜戚谢邹喻柏水窦章云苏潘葛奚范彭郎鲁韦昌马苗凤花方俞任袁柳酆骆高夏蔡田樊胡凌霍虞万支柯昝管卢莫经房裘缪干解应宗丁宣贲邓郁单杭洪包诸左石崔吉钮龚程嵇邢滑裴陆荣翁荀羊於惠甄魏家封芮羿储靳汲邴糜松井段富巫乌焦巴弓牧隗山谷车侯宓蓬全郗班仰秋仲伊宫宁仇栾暴甘钭厉戎祖武符刘景詹束龙叶幸司韶郜黎蓟薄印宿白怀蒲邰从鄂索咸籍赖卓蔺屠蒙池乔阳麻胥能苍双闻莘党翟谭贡劳逄姬申扶堵冉宰郦雍郤璩桑桂濮牛寿通边扈燕冀僪浦尚农温别庄晏柴瞿阎充慕连茹习宦艾鱼容向古易慎戈廖庾终暨居衡步都耿满弘匡国文寇广禄阙东欧殳沃利蔚越夔隆师巩厍聂晁勾敖融冷訾辛阚那简饶空曾毋沙乜养鞠须丰巢关蒯相查后荆红游竺权逯盖益桓公仉督晋楚闫法汝鄢涂钦岑逯赛银官伯赏南宫墨哈谯笪年爱阳佟言福百家姓终'
name = ''.join(random.choice(chars) for i in range(random.randint(2, 3)))
return name
首先我们定义一串字符变量,里面包含中文的百家姓,然后随机选择2-3个字,拼接成最后的真实姓名。
- username 登录用户名
def random_username(name):
pinyin = Pinyin()
pinyin_name = pinyin.get_pinyin(name, '')
return pinyin_name
将中文名字转换为对应的拼音,作为登录用户名
- password 登录密码
def random_password():
chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
password_length = random.randint(4, 8)
password = ''.join(random.choice(chars) for i in range(password_length))
# 如果你想生成加密后的密码,这里举例,使用bcrypt库对密码进行哈希处理。
# hashed_password = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt())
# return hashed_password.decode('utf-8')
return password
生成随机的4-8位数字和字母组合的密码
- email 电子邮件
email_services = [
'qq', '163', '126', 'sina', 'sohu', 'yeah', 'aliyun', 'tom',
'21cn', 'hotmail', 'live', 'outlook'
]
email_domains = {
'qq': ['qq.com', 'vip.qq.com', 'foxmail.com'],
'163': ['163.com', '126.com', 'yeah.net'],
'126': ['126.com', '163.com', 'yeah.net'],
'sina': ['sina.com', 'sina.cn'],
'sohu': ['sohu.com'],
'yeah': ['yeah.net'],
'aliyun': ['aliyun.com'],
'tom': ['tom.com'],
'21cn': ['21cn.com'],
'hotmail': ['hotmail.com', 'outlook.com', 'live.com', 'msn.com'],
'live': ['live.com', 'outlook.com'],
'outlook': ['outlook.com', 'hotmail.com']
}
def random_email(uname):
service = random.choice(email_services)
domains = email_domains.get(service)
domain = random.choice(domains)
return uname + '@' + domain
定义了一些常见的邮件服务提供商,然后根据用户名拼接成电子邮件的格式。
- phone 电话
def random_phone():
return '1' + ''.join(random.choice(string.digits) for i in range(10))
生成以1开头的11位数字,作为电话号码
- address 住址
# 从 area 表中随机选择一个省份、城市和区县,并将它们拼接成随机地址返回
def random_address():
with conn.cursor() as cursor:
# 随机选择一个省份
cursor.execute("SELECT * FROM area WHERE arealevel = 1 ORDER BY RAND() LIMIT 1")
province = cursor.fetchone()
if province:
province_name = province[1]
province_id = province[0]
# 随机选择一个城市
cursor.execute("SELECT * FROM area WHERE parentid = %s AND arealevel = 2 ORDER BY RAND() LIMIT 1",
(province_id,))
city = cursor.fetchone()
if city:
city_name = city[1]
city_id = city[0]
# 随机选择一个区县
cursor.execute("SELECT * FROM area WHERE parentid = %s AND arealevel = 3 ORDER BY RAND() LIMIT 1",
(city_id,))
district = cursor.fetchone()
if district:
district_name = district[1]
# 拼接随机地址并返回
return province_name + city_name + district_name
return None
生成的是…省…市…区
- imgUrl 用户头像url
def random_imgUrl(username):
imgUrl = '/assets/images/' + username + str(random.randint(0, 99)).zfill(2) + '.jpg'
return imgUrl
在url的用户名后面添加了两位随机数。
- create_time 用户注册时间
def random_time():
# 生成一个随机日期时间对象
year = random.randint(2000, 2023)
month = random.randint(1, 12)
day = random.randint(1, 28)
hour = random.randint(0, 23)
minute = random.randint(0, 59)
second = random.randint(0, 59)
dt = datetime.datetime(year, month, day, hour, minute, second)
# 将日期时间对象格式化为字符串并返回
return dt.strftime('%Y-%m-%d %H:%M:%S')
精确到秒了,因为数据库表设计时是timestamp类型。
4.完整代码
最后的for i in range(5):表示生成5条数据,这个数字可以任意更改
# -*- coding:utf-8 -*-
# author: YP
# date:2023/7/29
import datetime
import pymysql
import random
import string
from xpinyin import Pinyin
# 连接数据库
conn = pymysql.connect(
host='localhost',
port=3306,
user='root',
password='1234',
database='vs718'
)
# 预定义邮件服务商列表
email_services = [
'qq', '163', '126', 'sina', 'sohu', 'yeah', 'aliyun', '139',
'tom', '139', '21cn', 'hotmail', 'live', 'outlook'
]
email_domains = {
'qq': ['qq.com', 'vip.qq.com', 'foxmail.com'],
'163': ['163.com', '126.com', 'yeah.net'],
'126': ['126.com', '163.com', 'yeah.net'],
'sina': ['sina.com', 'sina.cn'],
'sohu': ['sohu.com'],
'yeah': ['yeah.net'],
'aliyun': ['aliyun.com'],
'139': ['139.com'],
'tom': ['tom.com'],
'21cn': ['21cn.com'],
'hotmail': ['hotmail.com', 'outlook.com', 'live.com', 'msn.com'],
'live': ['live.com', 'hotmail.com', 'outlook.com', 'msn.com'],
'outlook': ['outlook.com', 'hotmail.com', 'live.com', 'msn.com']
}
# 生成随机手机号
def random_phone():
return '1' + ''.join(random.choice(string.digits) for i in range(10))
# 生成随机电子邮件地址
def random_email(uname):
service = random.choice(email_services)
domains = email_domains.get(service)
domain = random.choice(domains)
return uname + '@' + domain
# 从 area 表中随机选择一个省份、城市和区县,并将它们拼接成随机地址返回
def random_address():
with conn.cursor() as cursor:
# 随机选择一个省份
cursor.execute("SELECT * FROM area WHERE arealevel = 1 ORDER BY RAND() LIMIT 1")
province = cursor.fetchone()
if province:
province_name = province[1]
province_id = province[0]
# 随机选择一个城市
cursor.execute("SELECT * FROM area WHERE parentid = %s AND arealevel = 2 ORDER BY RAND() LIMIT 1",
(province_id,))
city = cursor.fetchone()
if city:
city_name = city[1]
city_id = city[0]
# 随机选择一个区县
cursor.execute("SELECT * FROM area WHERE parentid = %s AND arealevel = 3 ORDER BY RAND() LIMIT 1",
(city_id,))
district = cursor.fetchone()
if district:
district_name = district[1]
# 拼接随机地址并返回
return province_name + city_name + district_name
return None
def random_address():
address = ChinaAddress()
return address.get_random_address()
# 生成随机创建时间
def random_time():
# 生成一个随机日期时间对象
year = random.randint(2000, 2023)
month = random.randint(1, 12)
day = random.randint(1, 28)
hour = random.randint(0, 23)
minute = random.randint(0, 59)
second = random.randint(0, 59)
dt = datetime.datetime(year, month, day, hour, minute, second)
# 将日期时间对象格式化为字符串并返回
return dt.strftime('%Y-%m-%d %H:%M:%S')
def random_chinese_name():
chars = '赵钱孙李周吴郑王冯陈褚卫蒋沈韩杨朱秦尤许何吕施张孔曹严华金魏陶姜戚谢邹喻柏水窦章云苏潘葛奚范彭郎鲁韦昌马苗凤花方俞任袁柳酆骆高夏蔡田樊胡凌霍虞万支柯昝管卢莫经房裘缪干解应宗丁宣贲邓郁单杭洪包诸左石崔吉钮龚程嵇邢滑裴陆荣翁荀羊於惠甄魏家封芮羿储靳汲邴糜松井段富巫乌焦巴弓牧隗山谷车侯宓蓬全郗班仰秋仲伊宫宁仇栾暴甘钭厉戎祖武符刘景詹束龙叶幸司韶郜黎蓟薄印宿白怀蒲邰从鄂索咸籍赖卓蔺屠蒙池乔阳麻胥能苍双闻莘党翟谭贡劳逄姬申扶堵冉宰郦雍郤璩桑桂濮牛寿通边扈燕冀僪浦尚农温别庄晏柴瞿阎充慕连茹习宦艾鱼容向古易慎戈廖庾终暨居衡步都耿满弘匡国文寇广禄阙东欧殳沃利蔚越夔隆师巩厍聂晁勾敖融冷訾辛阚那简饶空曾毋沙乜养鞠须丰巢关蒯相查后荆红游竺权逯盖益桓公仉督晋楚闫法汝鄢涂钦岑逯赛银官伯赏南宫墨哈谯笪年爱阳佟言福百家姓终'
name = ''.join(random.choice(chars) for i in range(random.randint(2, 3)))
return name
def random_username(name):
# 将中文名转换为拼音
pinyin = Pinyin()
pinyin_name = pinyin.get_pinyin(name, '')
return pinyin_name
def random_password():
# 生成随机的4-8位数字和字母组合的密码
chars = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
password_length = random.randint(4, 8)
password = ''.join(random.choice(chars) for i in range(password_length))
return password
def random_imgUrl(username):
# 在用户名后面添加两位随机数
imgUrl = '/assets/images/' + username + str(random.randint(0, 99)).zfill(2) + '.jpg'
return imgUrl
# 生成数据并插入数据库
try:
with conn.cursor() as cursor:
for i in range(5):
name = random_chinese_name()
username = random_username(name)
password = random_password()
email = random_email(username)
phone = random_phone()
address = random_address()
imgUrl = random_imgUrl(username)
create_time = random_time()
sql = "INSERT INTO user (username, password, name, email, phone, address, imgUrl, create_time) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)"
values = (username, password, name, email, phone, address, imgUrl, create_time)
cursor.execute(sql, values)
print(i, name, '插入成功')
conn.commit()
except Exception as e:
print('插入数据失败:', e)
finally:
conn.close()