功能:xlsx文档读写操作,多线程操作,视频/图像下载,视频抽针
注:视频抽针时使用ffmpeg工具。
#-*- coding:utf-8 -*-
import numpy as np
import os
import requests
import csv
import cv2
import threading
import threadpool
import time
import xlrd
import json
import shutil
import pdb
from PIL import Image
from openpyxl import Workbook
import random
def SaveXlsx():
path = "./bilibili_tera.txt"
lines = open(path).readlines()
random.shuffle(lines)
random.shuffle(lines)
lines = lines[:800]
workbooks = Workbook()
booksheet = workbooks.active
# excelTitle = ['nid', 'odyssey_service_name', 'msg', 'detail']
# booksheet.append(excelTitle)
import pdb;pdb.set_trace()
for line in lines:
line = line.strip().split("\t")
booksheet.append(line)
# workbooks.save('./bilibili_tera_800.xlsx')
class SaveVideoBaseUrl:
def __init__(self, path=None, savedir=None):
self._path = path
self._savedir = savedir
def ReadExcel(self, excelPath=None):
if excelPath is not None: self._path = excelPath
# import pdb;pdb.set_trace()
urls = []
workbook = xlrd.open_workbook(self._path)
for sheet_names in workbook.sheet_names():
print(sheet_names)
sheet = workbook.sheet_by_name(sheet_names)
# nrows = sheet.nrows
# ncols = sheet.ncols
for i, line in enumerate(sheet.get_rows()):
if i == 0: continue
#import pdb;pdb.set_trace()
line = line[5].value
line = line.strip('"').strip("'")
urls.append(line)
if i%1000 == 0: print(line)
# print(sheet_names)
self._urls = urls
return urls
def ReadExcelGetData(self, excelPath=None):
if excelPath is not None: self._path = excelPath
import pdb;pdb.set_trace()
results = []
norms = []
porns = []
cnt = 0
workbook = xlrd.open_workbook(self._path)
for sheet_names in workbook.sheet_names():
print(sheet_names)
if 'Sheet' != sheet_names: continue
sheet = workbook.sheet_by_name(sheet_names)
# nrows = sheet.nrows
# ncols = sheet.ncols
for i, line in enumerate(sheet.get_rows()):
if i == 0: continue
# import pdb;pdb.set_trace()
url = line[5].value
imgName = url.split('/')[-1]
ourlabel = line[3].value
truelabel = line[9].value
if truelabel == "正常": norms.append(imgName)
if truelabel != "正常": porns.append(imgName)
if truelabel != "正常" and ourlabel == "正常":
# if truelabel == "正常" and ourlabel != "正常":
results.append(imgName)
# print(cnt)
print(len(results))
return results, norms, porns
def ReadExcelFromMicroVideoanime(self, excelPath=None):
if excelPath is not None: self._path = excelPath
import pdb;pdb.set_trace()
results = []
norms = []
porns = []
cnt = 0
workbook = xlrd.open_workbook(self._path)
for sheet_names in workbook.sheet_names():
print(sheet_names)
if 'Sheet1' != sheet_names: continue
sheet = workbook.sheet_by_name(sheet_names)
# nrows = sheet.nrows
# ncols = sheet.ncols
for i, line in enumerate(sheet.get_rows()):
if i == 0: continue
# import pdb;pdb.set_trace()
url = line[2].value
results.append(url)
# print(cnt)
print(len(results))
return results
def down_images(line):
url = line.strip()
id_name = line.split('/')[-1]
print(url)
class_dir = './normal_videos/normal_anime/'
video_path = os.path.join(class_dir, id_name)
try:
r = requests.get(url)
with open(video_path, 'wb') as code:
code.write(r.content)
except:
pass
def extract_frames(line):
video_path = line.strip()
#video_path = line
print(video_path)
data_dir = './datasets/xiaoshipin_disu/images4/neg_kiss/'
if not os.path.exists(data_dir):
os.makedirs(data_dir)
video_name = line[line.rindex('/')+1:-5]
image_dir = os.path.join(data_dir,video_name)
#image_dir = "./"+video_name
if os.path.isfile(video_path):
if not os.path.isdir(image_dir):
os.mkdir(image_dir)
#else:
# return 0
image_des = image_dir+'/image_%05d.jpg'
#cmd = './vulgar/code/tools/ffmpeg -i '+ video_path+' -r 3 -vsync 2 -f image2 '+ image_des
#cmd = 'ffmpeg -i '+ video_path+' -r 8 -vframes 30 -vsync 2 -f image2 '+ image_des
#cmd = './ffmpeg -i '+ video_path+' -threads 8 -vf fps=1 -q:v 2 '+ image_des
# cmd = './ffmpeg -i '+ video_path+' -threads 8 -vf "scale=320:320,fps=1" -q:v 2 '+ image_des
cmd = './ffmpeg -i '+ video_path+' -threads 8 -vf "scale=320:320,fps=16" -q:v 2 '+ image_des
print(cmd)
os.system(cmd)
def down_images2(line):
line = line.strip().split(' ')
id_name = line[0]
url = line[1]
print(url)
class_dir = './datasets/baiduwentu_seqing/20201219/test_samples/'
video_path = os.path.join(class_dir, id_name)
try:
r = requests.get(url)
with open(video_path, 'wb') as code:
code.write(r.content)
except:
pass
def Del_invalid_images(line):
imgPath = line.strip()
try:
image = Image.open(imgPath).convert('RGB')
except:
os.remove(imgPath)
print("del {}".format(imgPath))
pass
if __name__ == "__main__":
# Savedata2()
# xlsxpath= './microvideoanime_train_normals_5w.xlsx'
# vid_handle = SaveVideoBaseUrl()
# lines = vid_handle.ReadExcelFromMicroVideoanime(xlsxpath)
inputfile = './datasets/xiaoshipin_disu/videos4/neg_kiss_path.txt'
lines = open(inputfile).readlines()
print(len(lines))
# print(lines[0])
# pdb.set_trace()
# extract_frames(lines[0])
# inputfile = './dataset/test_sousuotupian_fengkong/20210119/target_imgs'
# lines = os.listdir(inputfile)
# lines = [os.path.join(inputfile, line) for line in lines]
# inputfile = './dataset/test_sousuotupian_fengkong/20210119/target_imgs.txt'
# inputfile = './datasets/xiaoshipin_disu/shenVlouxiongtexie.txt'
# pdb.set_trace()
# lines = open(inputfile).readlines()
# Del_invalid_images(lines[0])
begin_time=time.time()
#如此,当子线程完成任务,循环去搜索新任务的时候,发现任务队列为空,则会阻塞式地去等待任务队列里插入新的任务,当线程阻塞的时候,用不到python解释器中的内容。即使主线程强制杀死子线程,子线程也不会报错。
pool = threadpool.ThreadPool(100, poll_timeout=None) #消除部分错误
n_requests = threadpool.makeRequests(extract_frames, lines)
[pool.putRequest(req) for req in n_requests]
pool.wait()
end_time=time.time()
print('all use time=',(end_time - begin_time),'s')
pass
# run:python2 down_videos.py