有时候需要下载一些视频上传到数据库,网上很多抓取的方式用起来都不是很方便,自己写了一个能够用的抓取视频的脚本,并且自动提取视频封面,上传到七牛云,最后按照规则写到数据库。大大提高了工作效率。
pip需要下载的工具
#http工具
pip install requests
#tqdm是一个方便且易于扩展的Python进度条
pip install tqdm
#这个用于读取视频截取视频封面
pip install opencv-python -i https://pypi.douban.com/simple/
#mysql工具类
pip install pymysql
#七牛云
pip install qiniu
引入所需要的包
import math
import requests
from tqdm import tqdm
import cv2
import os
import time
import pymysql
from qiniu import Auth, put_file
import random
初始化七牛云与数据库的配置
# region qiniu config 需要填写你的 Access Key 和 Secret Key
access_key = '自己的access_key '
secret_key = '自己的secret_key '
# 构建鉴权对象
q = Auth(access_key, secret_key)
# 要上传的空间
bucket_name = '七牛云创建的空间'
# 上传目录
directory_name='video/'
# endregion
# region mysql 配置
mysqlConn = pymysql.connect(
host='xxx.xxx.xxx.xxx', # MySQL服务端的IP地址
port=3306, # MySQL默认PORT地址(端口号)
user='root', # 用户名
password='密码', # 密码,也可以简写为passwd
database='db', # 库名称,也可以简写为db
charset='utf8mb4' # 字符编码
)
# endregion
上传七牛云
def upload(key, file):
# 生成上传 Token,可以指定过期时间等
key=directory_name+key
token = q.upload_token(bucket_name, key, 3600)
ret, info = put_file(token, key, file)
assert ret['key'] == key
print("上传七牛云成功:文件名:" + key)
下载视频保存到本地
iterable:可迭代的对象 默认None
total:进度条总长度大小(int or float)默认None
desc:进度条的前缀内容(str)默认None
unit:进度条的单位(str)默认 it ,实际表带为 it / s
# 下载视频
def down_video(url, is_upload):
# 请求视频地址
# stream=True的作用是仅让响应头被下载,连接保持打开状态
res = requests.get(url, stream=True)
# 确定整个文件的大小
size = int(res.headers['Content-Length']) / 1024
# 为了解决这个精度条进度的错误需要向上取整
size = math.ceil(size)
global video_save_path
video_save_path = dir_name + '/' + video_name
with open(video_save_path, 'wb') as file:
print("视频大小是:", size, 'k,开始下载...')
for data in tqdm(iterable=res.iter_content(1024), total=size, unit='k', desc=video_name):
file.write(data)
print("视频下载完成:" + video_name)
if is_upload:
upload(video_name, video_save_path)
视频抓取第一针作为封面
# 抓取第一针图片做封面
def capture_img(is_upload):
video = cv2.VideoCapture(video_save_path)
success, image = video.read()
index = 1
if video.isOpened():
source, image = video.read()
while index < 30:
success, image = video.read()
index += 1
if image is not None:
imag = cv2.imwrite(dir_name + '/' + image_name, image)
if imag:
print("封面抓取成功:" + dir_name + '/' + image_name)
if is_upload:
upload(image_name, dir_name + '/' + image_name)
写入数据库
#写入数据库
def insert_db(title):
try:
cursor = mysqlConn.cursor()
sql = "INSERT INTO `xsp_news_info` (`picture_addr`, `video_addr`, `title`,`user_code`, `avatar`, `nickname`,is_show,lbs,create_time) values (%s,%s,%s,%s,%s,%s,0,'北京市',now())"
image = 'http://域名' + image_name
video = 'http://域名' + video_name
user_code = str('111111111')
avatar = '默认图标'
nickname = str('大胡子叔叔')
row = cursor.execute(sql, (image, video, title, user_code, avatar, nickname))
mysqlConn.commit()
print("写入数据库成功:入库" + str(row) + "条记录")
except Exception as e:
print(e)
mysqlConn.rollback()
finally:
cursor.close()
mysqlConn.close()
初始化一下保存目录
# 生成目录与文件名
def init():
# 文件名生成
name = time.strftime('%Y%m%d_%H%M%S_') + str(random.randint(1, 9))
global image_name
global video_name
image_name = name + '.jpg'
video_name = name + '.mp4'
# 存放目录生成
if not os.path.exists(os.getcwd() + '/video'):
os.mkdir(os.getcwd() + '/video')
global dir_name
dir_name = os.getcwd() + '/video/' + name
if not os.path.exists(dir_name):
os.mkdir(dir_name)
填入视频下载地址
title 主要是为了保存到数据库的标题
url是视频的播放地址
if __name__ == '__main__':
print(f'操作开始==================================')
global title
title = '撩裙杀合集来了'
url = 'https://v2.kwaicdn.com/upic/2022/10/30/19/BMjAyMjEwMzAxOTQxNTZfOTQ0MjI3OThfODc2MTYyMTEzODBfMV8z_hd15_B6c8f7ab4de1e321d62f51d31191acaac.mp4?pkey=AAXXpUs7206A2BMeVuJrJA3-5cD9TGlV5TFRG4i6ip9sqYmgoB6QP_bNXJg8Et26BXJAhF-zMvZd2DSILK6Y2bEfkOqYuTVzwOCN2mihF4V54pdsocCG2UckjUzGODPJml4&tag=1-1669259527-unknown-0-qhb3xo0wzc-41da8a0c03768e68&clientCacheKey=3xtbqvzzqikxa9k_hd15.mp4&di=71f6f369&bp=14944&tt=hd15&ss=vp'
init()
open(dir_name + '/' + title + '.txt', 'wb')
down_video(url, True)
capture_img(True)
insert_db(title)
print("操作结束==================================")