xlsx文件读写操作、多线程操作、视频/图像下载、视频抽针

功能:xlsx文档读写操作,多线程操作,视频/图像下载,视频抽针

注:视频抽针时使用ffmpeg工具。

#-*- coding:utf-8 -*-
import numpy as np
import os
import requests
import csv
import cv2
import threading
import threadpool
import time
import xlrd
import json
import shutil
import pdb
from PIL import Image

from openpyxl import Workbook
import random
def SaveXlsx():
    path = "./bilibili_tera.txt"
    lines = open(path).readlines()
    random.shuffle(lines)
    random.shuffle(lines)
    lines = lines[:800]

    workbooks = Workbook()
    booksheet = workbooks.active
    # excelTitle = ['nid', 'odyssey_service_name', 'msg', 'detail']
    # booksheet.append(excelTitle)
    import pdb;pdb.set_trace()
    for line in lines:
        line = line.strip().split("\t")
        booksheet.append(line)

    # workbooks.save('./bilibili_tera_800.xlsx')

class SaveVideoBaseUrl:
    def __init__(self, path=None, savedir=None):
         self._path = path
         self._savedir = savedir

    def ReadExcel(self, excelPath=None):
        if excelPath is not None: self._path = excelPath
        # import pdb;pdb.set_trace()
        urls = []
        workbook = xlrd.open_workbook(self._path)
        for sheet_names in workbook.sheet_names():
            print(sheet_names)
            sheet = workbook.sheet_by_name(sheet_names)
            # nrows = sheet.nrows
            # ncols = sheet.ncols
            for i, line in enumerate(sheet.get_rows()):
                if i == 0: continue
                #import pdb;pdb.set_trace()
                line = line[5].value
                line = line.strip('"').strip("'")
                urls.append(line)
                if i%1000 == 0: print(line)
            # print(sheet_names)
        self._urls = urls
        return urls

    def ReadExcelGetData(self, excelPath=None):
        if excelPath is not None: self._path = excelPath
        import pdb;pdb.set_trace()
        results = []
        norms = []
        porns = []
        cnt = 0
        workbook = xlrd.open_workbook(self._path)
        for sheet_names in workbook.sheet_names():
            print(sheet_names)
            if 'Sheet' != sheet_names: continue
            sheet = workbook.sheet_by_name(sheet_names)
            # nrows = sheet.nrows
            # ncols = sheet.ncols
            for i, line in enumerate(sheet.get_rows()):
                if i == 0: continue
                # import pdb;pdb.set_trace()

                url = line[5].value
                imgName = url.split('/')[-1]
                ourlabel = line[3].value
                truelabel = line[9].value
                if truelabel == "正常": norms.append(imgName)
                if truelabel != "正常": porns.append(imgName)
                if truelabel != "正常" and ourlabel == "正常":
                # if truelabel == "正常" and ourlabel != "正常":
                    results.append(imgName)
        # print(cnt)
        print(len(results))
        return results, norms, porns

    def ReadExcelFromMicroVideoanime(self, excelPath=None):
        if excelPath is not None: self._path = excelPath
        import pdb;pdb.set_trace()
        results = []
        norms = []
        porns = []
        cnt = 0
        workbook = xlrd.open_workbook(self._path)
        for sheet_names in workbook.sheet_names():
            print(sheet_names)
            if 'Sheet1' != sheet_names: continue
            sheet = workbook.sheet_by_name(sheet_names)
            # nrows = sheet.nrows
            # ncols = sheet.ncols
            for i, line in enumerate(sheet.get_rows()):
                if i == 0: continue
                # import pdb;pdb.set_trace()
                url = line[2].value
                results.append(url)
                    
        # print(cnt)
        print(len(results))
        return results


def down_images(line):
    url = line.strip()
    id_name = line.split('/')[-1]
    print(url)
    class_dir = './normal_videos/normal_anime/'

    video_path = os.path.join(class_dir, id_name)
    try:
        r = requests.get(url)
        with open(video_path, 'wb') as code:
            code.write(r.content)
    except:
        pass


def extract_frames(line):
    video_path = line.strip()
    #video_path = line
    print(video_path)
    data_dir = './datasets/xiaoshipin_disu/images4/neg_kiss/'
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    video_name = line[line.rindex('/')+1:-5]
    image_dir = os.path.join(data_dir,video_name)
    #image_dir = "./"+video_name
    if os.path.isfile(video_path):
        if not os.path.isdir(image_dir):
            os.mkdir(image_dir)
        #else:
        #     return 0
        image_des = image_dir+'/image_%05d.jpg'
        #cmd = './vulgar/code/tools/ffmpeg -i '+ video_path+' -r 3  -vsync 2  -f image2 '+ image_des
        #cmd = 'ffmpeg -i '+ video_path+'  -r 8 -vframes 30  -vsync 2  -f image2 '+ image_des
        #cmd = './ffmpeg -i '+ video_path+'  -threads 8 -vf fps=1 -q:v 2   '+ image_des
        # cmd = './ffmpeg -i '+ video_path+'  -threads 8 -vf "scale=320:320,fps=1"  -q:v 2   '+ image_des
        cmd = './ffmpeg -i '+ video_path+'  -threads 8 -vf "scale=320:320,fps=16"  -q:v 2   '+ image_des
        print(cmd)
        os.system(cmd)


def down_images2(line):
    line = line.strip().split(' ')
    id_name = line[0]
    url = line[1]
    print(url)
    class_dir = './datasets/baiduwentu_seqing/20201219/test_samples/'

    video_path = os.path.join(class_dir, id_name)
    try:
        r = requests.get(url)
        with open(video_path, 'wb') as code:
            code.write(r.content)
    except:
        pass

def Del_invalid_images(line):
    imgPath = line.strip()
    try:
        image = Image.open(imgPath).convert('RGB')
    except:
        os.remove(imgPath)
        print("del {}".format(imgPath))
        pass

if __name__ == "__main__":
    # Savedata2()
    # xlsxpath= './microvideoanime_train_normals_5w.xlsx'
    # vid_handle = SaveVideoBaseUrl()
    # lines = vid_handle.ReadExcelFromMicroVideoanime(xlsxpath)
    inputfile = './datasets/xiaoshipin_disu/videos4/neg_kiss_path.txt'
    lines = open(inputfile).readlines()
    print(len(lines))
    # print(lines[0])
    # pdb.set_trace()
    # extract_frames(lines[0])

    # inputfile = './dataset/test_sousuotupian_fengkong/20210119/target_imgs'
    # lines = os.listdir(inputfile)
    # lines = [os.path.join(inputfile, line) for line in lines]
    # inputfile = './dataset/test_sousuotupian_fengkong/20210119/target_imgs.txt'
    # inputfile = './datasets/xiaoshipin_disu/shenVlouxiongtexie.txt'
    # pdb.set_trace()
    # lines = open(inputfile).readlines()
    # Del_invalid_images(lines[0])

    begin_time=time.time()
    #如此,当子线程完成任务,循环去搜索新任务的时候,发现任务队列为空,则会阻塞式地去等待任务队列里插入新的任务,当线程阻塞的时候,用不到python解释器中的内容。即使主线程强制杀死子线程,子线程也不会报错。
    pool = threadpool.ThreadPool(100, poll_timeout=None) #消除部分错误
    n_requests = threadpool.makeRequests(extract_frames, lines)
    [pool.putRequest(req) for req in n_requests]
    pool.wait()
    end_time=time.time()
    print('all use time=',(end_time - begin_time),'s')
    pass
  

# run:python2 down_videos.py

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值