代码实现无水印视频抓取并根据视频按帧取图。
你喜欢的抖音美女,每一帧都是壁纸,上码:
import requests
import re
from urllib import request
import cv2
import argparse
import os
import urllib
def get_parse(url):
resp = requests.get(url)
web_url = resp.url
item_id = re.search('/(\d+)/', web_url).group(1).strip()
print(item_id)
api = 'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={}'
info_api = api.format(item_id)
resp = requests.get(info_api)
content = resp.text
urls = re.findall('"(http.*?)"', content)
return urls, item_id
def get_need(urls):
need_urls = []
for url in urls:
if 'large' in url:
need_urls.append(url)
elif 'playwm' in url:
need_urls.append(url)
else:
print('移除项:', url)
return need_urls
def get_down(need_urls, item_id):
for i, url in enumerate(need_urls):
if 'playwm' in url:
file_name = down_video(url, i, item_id)
# else:
# 用于下载图片
# down_pic(url, i, item_id)
return file_name
def down_video(url, i, item_id):
headers = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Mobile Safari/537.36'
}
# 替换链接参数,使用手机heasers获取无水印视频
url = re.sub('playwm', 'play', url)
file_name = str(item_id) + "_" + str(i) + '.mp4'
# start_time = time.time()
requests.packages.urllib3.disable_warnings()
resp = requests.get(url, headers=headers, stream=True, verify=False)
print(resp.headers['content-length'])
with open('cache/' + file_name, 'wb') as f:
for i in resp.iter_content(50):
if i:
f.write(i)
print('视频下载成功', url)
return file_name
# 图片下载方法,暂不调用
def down_pic(url, i, item_id):
file_name = str(item_id) + '_' + str(i) + '.jpeg'
try:
request.urlretrieve(url, 'cache/' + file_name)
print('下载成功', url)
except:
print('下载异常', url)
def parse_args(file_name):
parser = argparse.ArgumentParser(description='Process pic')
parser.add_argument('--input', help='video to process', dest='input', default=None, type=str)
parser.add_argument('--output', help='pic to store', dest='output', default=None, type=str)
# default为间隔多少帧截取一张图片
parser.add_argument('--skip_frame', dest='skip_frame', help='skip number of video', default=10, type=int)
# input为输入视频的路径 ,output为输出存放图片的路径
args = parser.parse_args(['--input', 'cache/' + file_name, '--output', 'cache/'])
return args
def process_video(i_video, o_video, num):
cap = cv2.VideoCapture(i_video)
num_frame = cap.get(cv2.CAP_PROP_FRAME_COUNT)
expand_name = '.jpg'
if not cap.isOpened():
print("Please check the path.")
cnt = 0
count = 0
while 1:
ret, frame = cap.read()
cnt += 1
if cnt % num == 0:
count += 1
cv2.imwrite(os.path.join(o_video, str(count) + expand_name), frame)
if not ret:
break
if __name__ == '__main__':
# 视频下载部分
url = input('输入链接:')
url = url.strip()
urls, item_id = get_parse(url)
need_urls = get_need(urls)
file_name = get_down(need_urls, item_id)
# 图片截取部分
args = parse_args(file_name)
if not os.path.exists(args.output):
os.makedirs(args.output)
print('执行截取:')
print(args)
process_video(args.input, args.output, args.skip_frame)