常用小功能代码——python

将视频按照一定时间截取图片并且保存,或者说隔几帧保存一下一幅图像

import os
import subprocess

def convert_video_images(source_path, output_path):
    fileTypes = ['.avi', '.mp4', '.flv', '.mov', '.mkv']
    if not os.path.exists(source_path):
        print("not exit sourcepath :%s" % (source_path))
        return
    if not os.path.exists(output_path):
        os.mkdir(output_path)

    for video_file in os.listdir(source_path):
        print(video_file)
    for video_file in os.listdir(source_path):
        video_name = os.path.join(source_path, video_file)

        if os.path.splitext(video_name)[1] not in fileTypes:
            print("skip %s" % video_file)
            continue
        if video_name == ".DS_Store":
            print(video_name)
            continue

        print("video name:", video_name)

        strcmd = 'ffmpeg -i ' + video_name + ' -vf  "scale=1920:1080,fps=1/5" ' + output_path + '/' + \
                 os.path.splitext(video_file)[0] + '%5d.png'
        print(strcmd)

        subprocess.call(strcmd, shell=True)
        print('the video has generated image') 


// 按特定字符分离字符串
//string_ = name_name_string
//print(string_.split('_')

此方法需要安装FFmpeg,安装步骤网上有很多,不在介绍。代码中的fps=1/5,意味着5秒取一帧。

对视频依次取出每一帧,然后对该帧处理后在合成新的视频

    i = 1
    cap = cv2.VideoCapture('输入文件路径.mp4')
    fps = cap.get(cv2.CAP_PROP_FPS)
    print(fps)
    size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
            int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
    print(size)
    fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', '2')
    # 也可选择该编解码器
    # fourcc = cv2.VideoWriter_fourcc(*'XVID')
    outVideo = cv2.VideoWriter('输出文件路径', fourcc, fps, size)
    if cap.isOpened():
        rval, frame = cap.read()
        print('ture')
    else:
        rval = False
        print('False')
    while rval:
        rval, frame = cap.read()
        #对每一帧进行处理后,产生新一帧:my_image
        my_image = my_demo(sess, net, frame)
        cv2.imshow('test', my_image)
        outVideo.write(my_image)
        cv2.waitKey(1)
    cap.release()
    outVideo.release()
    cv2.destroyAllWindows()

对xml结点内容进行修改:例子为将name结点内容guai修改为Guai

def _load_pascal_annotation(filename):
    tree = ET.parse(filename)
    objs = tree.findall('object')
    for ix, obj in enumerate(objs):
        if obj.find('name').text.lower().strip() == 'guai':
            print('the Guai is the key but now it is guai ')
            key_word = 'Guai'
            obj.find('name').text = key_word
            tree.write(filename)
            print(filename)
        if obj.find('name').text.lower().strip() == 'texiao':
            print('the Texiao is the key but now it is texiao ')
            key_word = 'Texiao'
            obj.find('name').text = key_word
            tree.write(filename)
            print(filename)

将xml结点内容输入到txt文件内,针对文件夹内包括文件夹的:

def nodeToTxt():
    source_path = '/media/ksyun/PROJECTDATA/810_xml/xml'
    text_file = '/home/ksyun/11/11.txt'
    writer_file = open(text_file,'a')
    for names in os.listdir(source_path):
        xml_flie = os.path.join(source_path, names)
        print xml_flie
        for name in os.listdir(xml_flie):
            image_name_png = os.path.join(xml_flie, name)
            name_image = os.path.splitext(name)[0]
            isNone = _load_pascal_annotation(image_name_png)
            if isNone is True:
                print ('is None')
            else:
                writer_file.write(name_image + '\n')

替换带有关键字的某一行:

def replace_txt_line_inf():
    file='ori.txt'
    file_des = 'des.txt'
    # 循环读取旧文件
    file_data = ""
    with open(file, "r", encoding="utf-8") as f:
        for line in f.readlines():
            if line.find("des_inf") >= 0:
                line = line.replace(line, 'new_inf\n')
            file_data += line
    with open(file_des, "w", encoding="utf-8") as f:
        f.write(file_data)

移动或者拷贝文件从一个文件夹到另一个文件夹:

def mymovefile(srcfile, dstfile):
    if not os.path.isfile(srcfile):
        print "%s not exist!" % (srcfile)
    else:
        fpath, fname = os.path.split(dstfile)  # 分离文件名和路径
        if not os.path.exists(fpath):
            os.makedirs(fpath)  # 创建路径
        shutil.move(srcfile, dstfile)  # 移动文件
        print "move %s -> %s" % (srcfile, dstfile)


def mycopyfile(srcfile, dstfile):
    if not os.path.isfile(srcfile):
        print "%s not exist!" % (srcfile)
    else:
        fpath, fname = os.path.split(dstfile)  # 分离文件名和路径
        if not os.path.exists(fpath):
            os.makedirs(fpath)  # 创建路径,
#dstfile可以是文件夹的名字,此时移动后的srcfile里的文件保持原来的名字
        shutil.copyfile(srcfile, dstfile)  # 复制文件
        print "copy %s -> %s" % (srcfile, dstfile)

TensorFlow中pb文件与pbtxt文件的相互转换(其实就是参数as_text是否为真):


import tensorflow as tf
from tensorflow.python.platform import gfile
from google.protobuf import text_format
 
def convert_pb_to_pbtxt(filename):
  with gfile.FastGFile(filename,'rb') as f:
    graph_def = tf.GraphDef()
 
    graph_def.ParseFromString(f.read())
 
    tf.import_graph_def(graph_def, name='')
 
    tf.train.write_graph(graph_def, './', 'protobuf.pbtxt', as_text=True)
  return
 
def convert_pbtxt_to_pb(filename):
  """Returns a `tf.GraphDef` proto representing the data in the given pbtxt file.
  Args:
    filename: The name of a file containing a GraphDef pbtxt (text-formatted
      `tf.GraphDef` protocol buffer data).
  """
  with tf.gfile.FastGFile(filename, 'r') as f:
    graph_def = tf.GraphDef()
 
    file_content = f.read()
 
    # Merges the human-readable string in `file_content` into `graph_def`.
    text_format.Merge(file_content, graph_def)
    tf.train.write_graph( graph_def , './' , 'protobuf.pb' , as_text = False )

在image上进行画框,并保存画过框的图像

比如在进行图像检测时,如何将检测出来的图像上的目标进行画框,并将该图像保存。依次延伸到视频,对视频的每一祯进行处理:

def draw_bounding_box_on_image_array(image,
                                     ymin,
                                     xmin,
                                     ymax,
                                     xmax,
                                     color='red',
                                     thickness=4,
                                     display_str_list=(),
                                     use_normalized_coordinates=True):

  image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
  draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
                             thickness, display_str_list,
                             use_normalized_coordinates)
  np.copyto(image, np.array(image_pil))

def draw_bounding_box_on_image(image,
                               ymin,
                               xmin,
                               ymax,
                               xmax,
                               color='red',
                               thickness=5,
                               display_str_list=(),
                               use_normalized_coordinates=True):

  draw = ImageDraw.Draw(image)
  im_width, im_height = 1, 1
  if use_normalized_coordinates:
    (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                  ymin * im_height, ymax * im_height)
  else:
    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
  draw.line([(left, top), (left, bottom), (right, bottom),
             (right, top), (left, top)], width=thickness, fill=color)
  try:
    font = ImageFont.truetype('arial.ttf', 24)
  except IOError:

    font = ImageFont.load_default()

  # If the total height of the display strings added to the top of the bounding
  # box exceeds the top of the image, stack the strings below the bounding box
  # instead of above.
  display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
  # Each display_str has a top and bottom margin of 0.05x.
  total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)

  if top > total_display_str_height:
    text_bottom = top
  else:
    text_bottom = bottom + total_display_str_height
  # Reverse list and print from bottom to top.
  for display_str in display_str_list[::-1]:
    text_width, text_height = font.getsize(display_str)
    margin = np.ceil(0.05 * text_height)
    draw.rectangle(
        [(left, text_bottom - text_height - 2 * margin), (left + text_width,
                                                          text_bottom)],
        fill=color)
    draw.text(
        (left + margin, text_bottom - text_height - margin),
        display_str,
        fill='black',
        font=font)
    text_bottom -= text_height - 2 * margin



def my_vis_detections(im, class_name, dets, color, thresh=0.5):
    """Draw detected bounding boxes."""
    inds = np.where(dets[:, -1] >= thresh)[0]
    if len(inds) == 0:
        return
    for i in inds:
        bbox = dets[i, :4]
        score = dets[i, -1]

        ymin, xmin, ymax, xmax = bbox[1], bbox[0], bbox[3], bbox[2]
        draw_bounding_box_on_image_array(im,
                                         ymin,
                                         xmin,
                                         ymax,
                                         xmax,
                                         color=color,
                                         thickness=4,
                                         display_str_list=['{:s} {:.3f}'.format(class_name, score)],
                                         use_normalized_coordinates=True
                                         )

def my_demo(sess, net, im):

    scores, boxes = im_detect(sess, net, im)
    im = im[:, :, (2, 1, 0)]

    CONF_THRESH = 0.8
    NMS_THRESH = 0.3
    for cls_ind, cls in enumerate(CLASSES[1:]):
        color = CLASSBBOXCOLOR[cls_ind]
        cls_ind += 1  # because we skipped background
        cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
        cls_scores = scores[:, cls_ind]
        dets = np.hstack((cls_boxes,
                          cls_scores[:, np.newaxis])).astype(np.float32)
        keep = nms(dets, NMS_THRESH)
        dets = dets[keep, :]

        my_vis_detections(im, cls, dets, color, thresh=CONF_THRESH)

    return im

该例子时我在已经通过faster-rcnn进行训练,通过im_detect(sess,net,im)函数后,出来的时对应的每一幅图像的目标检测结果,包括每一个目标的得分以及box。最终的cls和dets时经过赛选后的目标。通过my_vis_detextions()函数对每一类包含的目标进行画框(可以对框的颜色以及粗细进行设置)

还有一种方法为:

def my_detect(image_path, out_boxes):

    image = Image.open(image_path)
    out_boxes = out_boxes
    thickness = (image.size[0] + image.size[1]) // 900  + 1
    for i in out_boxes:
        box = i[0:4]
        draw = ImageDraw.Draw(image)
        top = box[1]
        left = box[0]
        bottom = box[3]
        right = box[2]
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))


        for i in range(thickness):
            draw.rectangle(
                [left + i, top + i, right - i, bottom - i],
                outline=CLASSBBOXCOLOR[0])
        del draw
    # image.show()
    image.save('facedata/IMG_6940_result.JPG')

python中PIL.Image和OpenCV图像格式相互转换

PIL.Image转换成OpenCV格式:

import cv2
from PIL import Image
import numpy

image = Image.open("plane.jpg")
image.show()
img = cv2.cvtColor(numpy.asarray(image),cv2.COLOR_RGB2BGR)
cv2.imshow("OpenCV",img)
cv2.waitKey()

OpenCV转换成PIL.Image格式:

import cv2
from PIL import Image
import numpy

img = cv2.imread("plane.jpg")
cv2.imshow("OpenCV",img)
image = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
image.show()
image.save(save_path,format='png', quality=95)
cv2.waitKey()

判断图像数据是否是OpenCV格式: isinstance(img, np.ndarray)

numpy 与OpenCV、PIL.Image格式相互转换

import numpy as np
from PIL import Image
import cv2

PIL:
Image -> numpy:
    img = Image.open(input_img) 
    imge = np.array(img)   // 此时读出来是numpy格式的数组
    img = img.astype(np.float32)

numpy -> Image:    
      re_image = Image.fromarray(numpy_image.astype(np.uint8)).convert('RGB')

矩阵格式为uint8,如果是uint16等其他不支持的格式会报错:
in fromarray raise TypeError("Cannot handle this data type")
TypeError: Cannot handle this data type

OpenCV:
   image -> numpy  
1. input_image = cv2.imread(input_img) // print(tpye(input_image)) :结果 <class 'str'>、打印 print(input_image),出来时的数组。
   image_data = cv2.resize(input_image, (image_height, image_width))  //print(image_data),结果:<class 'numpy.ndarray'>
   img_resized = image_data.astype(np.float32)

2. input_image = cv2.imread(input_img)
   numpy_data =  np.array(input_image) //打印结果 <class 'numpy.ndarray'>

numpy -> image  
// 显示图片活保存图片
   cv2.imwrite(output_img, result_image.astype(np.uint8))  // 保存图片. result_image 为numpy的array
   cv2.imshow('show-text', result_image.astype(np.uint8))    // 显示图片

利用已有的目标坐标信息,生成xml文件(匹配LabelImg制作目标检测de工具)


def savePascalVocFormat(filename, shapes, imagePath,imageShape=[1920, 1080, 3]):
    imgFolderPath = os.path.dirname(imagePath)
    imgFolderName = os.path.split(imgFolderPath)[-1]
    imgFileName = os.path.basename(imagePath)

    writer = PascalVocWriter(imgFolderName, imgFileName,
                                imageShape, localImgPath=imagePath)
    writer.verified = False
    for shape in shapes:
        label = shape['label']
        # Add Chris
        difficult = int(shape['difficult'])
        bndbox = shape['bndbox']
        writer.addBndBox(bndbox[0], bndbox[1], bndbox[2], bndbox[3], label, difficult)

    writer.save(targetFile=filename)
    return

在这里可能要调用一个PascalVocWriter的python类,链接地址为:生成pascal数据集的xml文件程序-Python代码类资源-CSDN下载

Python: next()、 iter()用法

tt = iter([99,2,3,4,5])
input_1 = next(tt)
input_2 = next(tt)
input_3 = next(tt)
input_4 = next(tt)
input_5 = next(tt)
print(input_1)
print(input_2)
print(input_3)
print(input_4)
print(input_5)

打印结果:
99
2
3
4
5

python:  [::-1]; [::-2], [:-1] 用法

a='python'
b=a[::-1]
print(b) #nohtyp
c=a[::-2]
print(c) #nhy
#从后往前数的话,最后一个位置为-1
d=a[:-1]  #从位置0到位置-1之前的数
print(d)  #pytho
e=a[:-2]  #从位置0到位置-2之前的数
print(e)  #pyth

a=[1,2,3,4,5]
print(a[2::-1])  # 倒序输出第3个元素之后的元素:[3,2,1]
print(a[1::2])  # 从第2个元素起,步长为2取元素:[2,4]

图片翻转实现方式:

    dd = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
    print(dd)
    print(np.shape(dd))
    print(':, :, ::-1')
    print(dd[:, :, ::-1])
    print(np.shape(dd[:, :, ::-1]))
    print('::-1')
    print(dd[::-1])
    print(np.shape(dd[::-1]))
    print(':, ::-1')
    print(dd[:, ::-1])
    print(np.shape(dd[:, ::-1]))
输出:
[[[ 1  2  3]
  [ 4  5  6]]
 [[ 7  8  9]
  [10 11 12]]]
(2, 2, 3)
:, :, ::-1
[[[ 3  2  1]
  [ 6  5  4]]
 [[ 9  8  7]
  [12 11 10]]]
(2, 2, 3)
::-1
[[[ 7  8  9]
  [10 11 12]]
 [[ 1  2  3]
  [ 4  5  6]]]
(2, 2, 3)
:, ::-1
[[[ 4  5  6]
  [ 1  2  3]]
 [[10 11 12]
  [ 7  8  9]]]
(2, 2, 3)

 总结:[:, :, ::-1] 通道翻转,如rgb->bgr

[::-1],上下翻转,沿宽轴线翻转。

[:, ::-1] 左右翻转,沿高轴线翻转。

opencv 读取或保存带有中文路径图片的方法

# 读取
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)
# 保存
cv2.imencode('.jpg', src)[1].tofile(save_path)
# for example

cv2.imwrite("路径//my_image.jpg", frame) # 错误演示
cv2.imencode('.jpg', frame)[1].tofile('路径/my_image.jpg') //正确

PIL  Image对图片进行crop,并进行压缩成jpeg格式

截图坐标:box(b1,a1,b2,a2)

cbox(b1,a1,b2,a2)

img = Image.open(image_path)
img_data = img.crop(cbox)
# img_data_array = np.array(img_data),用numpy转换后才是矩阵形式的图片
#图片进行jpeg压缩存储,quality 默认质量是75
img.save(des_path, format='JPEG', quality=95)

# 左上角到右下角,逐行进行。
img = Image.open(image_name_big)
            w = img.width
            h = img.height
            s_h = w / 224
            s_w = h / 224
            for i in range(int(s_w)):
                for j in range(int(s_h)):
                    # 从图像的左上角
                    cbox = [j * 224, i * 224, (j + 1) * 224, (i + 1) * 224]
                    print(cbox)

将图片进行拼接

def compose_image(IMAGE_SIZE=224,IMAGE_ROW=4,IMAGE_COLUMN=4):

    save_image_path = '/home/szx/Desktop/dd/total_image.png' 

    to_image = Image.new('RGB', (IMAGE_COLUMN * IMAGE_SIZE, IMAGE_ROW * IMAGE_SIZE))  
    index_num = 1
    root_path = '/home/szx/Desktop/dd/index_'
    for y in range(0, IMAGE_ROW):
        for x in range(0, IMAGE_COLUMN):
            image_path = root_path + str(index_num) + '.png'
            from_image = Image.open(image_path)
            to_image.paste(from_image, (x * IMAGE_SIZE, y * IMAGE_SIZE))
            index_num = index_num + 1
    to_image.save(save_image_path) 

图像质量评价,psnr和ssim的计算

可以自己根据理论知识编写,也可以直接调用skimge相关模块或tensorflow:引用该博客,建议直接使用skimage

import numpy as np
import math
import skimage

def ssim_unit8(ori_img_path, des_image_path):

    tf_img1 = tf.image.decode_image(tf.read_file(ori_img_path))
    tf_img2 = tf.image.decode_image(tf.read_file(des_image_path))

    return tf.image.ssim(tf_img1, tf_img2, max_val=255)

def psnr_unit8(ori_img_path, des_image_path):

    tf_img1 = tf.image.decode_image(tf.read_file(ori_img_path))
    tf_img2 = tf.image.decode_image(tf.read_file(des_image_path))

    return tf.image.psnr(tf_img1, tf_img2, max_val=255)

def psnr(img1, img2):
    mse = np.mean((img1 / 255. - img2 / 255.) ** 2)
    if mse < 1.0e-10:
        return 100
    PIXEL_MAX = 1
    return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))

if __name__ == '__main__':

    im1 = cv2.imread('/home/szx/Desktop/brazil_QP_Patch=23_469.png')
    im2 = cv2.imread('/home/szx/Desktop/brazil_QP_Patch=44_469.png')
    psnr = skimage.measure.compare_psnr(im1, im2, 255)
    print(psnr)
    psnr_2 = psnr2(im1,im2)
    print(psnr_2)
    # ssim的计算
    ssim = skimage.measure.compare_ssim(im1, im2, data_range=255)

对图像进行高斯模糊:

from PIL import Image
from PIL import ImageFilter

im = Image.open('source_path.png')
crop_image_file_path = 'des_path.png'
im = im.filter(ImageFilter.GaussianBlur(radius=10))
im.save(crop_image_file_path, format='png', quality=95)
im.show()

将图像进行jpeg或jpeg2000 进行压缩pillow官网文档

一: jpeg2000 可能需要从openjpeg上下载,然后利用pillow框架进行调用

from PIL import Image
#jpeg2000  现将图片存储为j2k格式,然后读取后在存成.png格式(主要是看压缩后的效果)
#主要参数 quality_mode='dB', quality_layers=,
list_inf = [24,26,28,30,32,34,37,41,46]
for i in range(0, 9):
    des_path_2 = root_path + '_' + str(i) + '.j2k'
    dd = list_inf[i]
    print(dd)
    tt.save(des_path_2, quality_mode='dB', quality_layers=[dd])
    tt1 = Image.open(des_path_2)
    des_path = root_path + '_' + str(i) + '.png'
    tt1.save(des_path)

#jpeg 格式比较简单,i为质量,从0 到95  95为最好。
img = Image.open(src_iamge_path)
img.save(des_path, format='JPEG', quality=i)

图片添加噪声:

skimage.util.random_noise(image, mode=‘gaussian’, seed=None, clip=True, **kwargs)

 image为输入图像数据,类型应为ndarray,输入后将转换为浮点数。
 mode选择添加噪声的类别。字符串str类型。应为以下几种之一:
‘gaussian’高斯加性噪声。
‘localvar’  高斯加性噪声,每点具有特定的局部方差。
‘poisson’  泊松分布的噪声。
‘salt’ 盐噪声,随机用1替换像素。属于高灰度噪声。
‘pepper’ 胡椒噪声,随机用0或-1替换像素,属于低灰度噪声。
‘s&p’ 椒盐噪声,两种噪声同时出现,呈现出黑白杂点。
‘speckle’ 使用out = image + n *图像的乘法噪声,其中n是具有指定均值和方差的均匀噪声

 seed 类型为int。

将在生成噪声之前设置随机种子,以进行有效的伪随机比较。
clip类型为bool。

若为True(default)则在加入‘speckle’, ‘poisson’, 或 ‘gaussian’这三种噪声后进行剪切以保证图像数据点都在[0,1]或[-1.1]之间。若为False,则数据可能超出这个范围。
mean: float 随机分布的均值,用于’gaussian’和‘speckle’。 默认为0。
 var: float 随机分布的方差,(标准差^2)用于’gaussian’和‘speckle’。 默认为0.01。
 local_vars:ndarray 图像每个像素点处的局部方差,正浮点数矩阵,和图像同型,用于‘localvar’.
 amount:float 椒盐噪声像素点替换的比例,在[0,1]之间。用于‘salt’, ‘pepper’,和 ‘salt & pepper’. 默认 : 0.05
 salt_vs_pepper : float 盐噪声和胡椒噪声的比例,在[0,1]之间。数字越大代表用1替换越多(more salt). 默认 : 0.5
 输出
 out : ndarray
 输出为浮点图像数据,在[0,1]或[-1,1]之间。Skimage读取图像后格式为(height, width, channel)。注意RGB图像数据若为浮点数则范围为[0,1],若为整型则范围为[0,255]。

读写csv文件,excel文件:

import csv
def writer_csv():
    with open("test.csv", "w") as csvfile:
        writer = csv.writer(csvfile)
        for i in range(10):
            writer.writerow(["index", "a_name", "b_name"])

def reader_csv():
    with open("test.csv", "r") as csvfile:
       reader = csv.reader(csvfile)  
       for line in reader:
    

对于excel,读取使用xlrd,写入使用xlwt模块,这两个模块均可使用pip install xlrd和pip install xlwt 安装

# 读取    
    import xlrd
    file_path = 'video_test.xlsx'
    workbook = xlrd.open_workbook(file_path)
    # 3、获取一个工作表
    # 按sheet名获取sheet
    table = workbook.sheet_by_name("Sheet1")
    aa = table.cell(0, 0).value
    bb = table.cell(0, 1).value
    c = table.cell(1, 0).value
    print(aa, bb, c)

#写入 
    import xlwt
    workbook = xlwt.Workbook(encoding='utf-8')
#创建sheet
    workSheet = workbook.add_sheet('Sheet1')
# 向sheet里面写入信息
    workSheet.write(0, 0, label='name')
    workSheet.write(0, 1, label=int(666))
#保存
    workbook.save('video_test.xlsx')

图像信息熵:

图像熵是一种特征的统计形式,它反映了图像中平均信息量的多少。图像的一维熵表示图像中灰度分布的聚集特征所包含的信息量,令Pi 表示图像中灰度值为i的像素所占的比例,则定义灰度图像的一元灰度熵为:

Python代码:

def caculate_entropy():
    tmp = []
    for i in range(256):
        tmp.append(0)
    val = 0
    k = 0
    res = 0
# 黑白方式读取图片,单通道
    image = cv2.imread('/home/szx/Desktop/brazil_QP=23_306.png', 0)
    img = np.array(image)
    for i in range(len(img)):
        for j in range(len(img[i])):
            val = img[i][j]
            tmp[val] = float(tmp[val] + 1)
            k = float(k + 1)
    for i in range(len(tmp)):
        tmp[i] = float(tmp[i] / k)
    for i in range(len(tmp)):

        if tmp[i] == 0:
            res = res
        else:
            res = float(res - tmp[i] * (math.log(tmp[i]) / math.log(2.0)))
    print(res)

YUV视频文件的读取存储:

def getFrameNUm(filename, bitdepth, W, H):

    bytesPerPixel = math.ceil(bitdepth / 8)
    framePixels =  bytesPerPixel*H * W * 3 // 2
    fp = open(filename, 'rb')
    fp.seek(0,2)
    maxFrameNum = fp.tell()
    maxFrameNum=maxFrameNum//framePixels

    # print("bitdepth:",bitdepth,"framePix:",framePixels)
    return maxFrameNum

def readyuv420_byframe(filename, bitdepth, W, H, startframe, totalframe, show=False):
    # 从第startframe(含)开始读(0-based),共读totalframe帧
 
    uv_H = H // 2
    uv_W = W // 2
 
    if bitdepth == 8:
        Y = np.zeros((H, W), np.uint8)
        U = np.zeros((uv_H, uv_W), np.uint8)
        V = np.zeros((uv_H, uv_W), np.uint8)
    elif bitdepth == 10:
        Y = np.zeros((H, W), np.uint16)
        U = np.zeros((uv_H, uv_W), np.uint16)
        V = np.zeros((uv_H, uv_W), np.uint16)
 
    bytesPerPixel = math.ceil(bitdepth / 8)
    seekPixels = startframe * H * W * 3 // 2
    fp = open(filename, 'rb')
    fp.seek(bytesPerPixel * seekPixels)

    for i in range(totalframe):
        Y=fp.read(H*W*bytesPerPixel)
        U=fp.read(H//2*W//2*bytesPerPixel)
        V=fp.read(H//2*W//2*bytesPerPixel)
        if bitdepth == 10:
            Y=np.reshape(np.fromstring(Y,dtype=np.uint16),(H,W))
            U=np.reshape(np.fromstring(U,dtype=np.uint16),(H//2,W//2))
            V=np.reshape(np.fromstring(V,dtype=np.uint16),(H//2,W//2))
            Y=np.array(Y).view('<u2')
            U=np.array(U).view('<u2')
            V=np.array(V).view('<u2')
        else:
            Y=np.reshape(np.fromstring(Y,dtype=np.uint8),(H,W))
            U=np.reshape(np.fromstring(U,dtype=np.uint8),(H//2,W//2))
            V=np.reshape(np.fromstring(V,dtype=np.uint8),(H//2,W//2))

        yield Y,U,V


def writeyuv420p(filename, W, H, Y, U, V):
    uv_H = H // 2
    uv_W = W // 2
    fp = open(filename, 'wb')
    fp.write(Y)
    fp.write(U)
    fp.write(V)
    fp.close()

def writeyuv420p10le(filename, W, H, Y, U, V):
    uv_H = H // 2
    uv_W = W // 2
    fp = open(filename, 'wb')
    Y=Y.view('<u2')
    U=U.view('<u2')
    V=V.view('<u2')
    fp.write(Y)
    fp.write(U)
    fp.write(V)

    fp.close()

其中的一些参数解释:

i有符号整型,'i1', 'i2', 'i4', 'i8' 对应 int8, int16, int32, int64
u无符号整型,'u1', 'u2', 'u4', 'u8' 对应 uint8, uint16, uint32, uint64
f浮点型,'f2', 'f4', 'f8' 对应 float16, float32, float64

下载 Fairchild HDR dataset

Fairchild HDR dataset网站提供了100余张HDR图像,但是没有提供打包下载,采用如下方法批量下载:
首先用wget下载整个网页:

wget -c -r -np -k -L -p http://rit-mcsl.org/fairchild//HDRPS/HDRthumbs.html

然后在下载目录下有一个Thumbs文件夹,里面的图像是HDR图像压缩后LDR图像,提取所有图像的文件名,更改文件的后缀,得到下载链接(所有的HDR文件都在http://www.cis.rit.edu/fairchild/HDRPS/EXRs/)下

files = dir('./*.jpg');
for i=1:1:length(files)
    [file_path,file_name,file_ext] = fileparts(files(i).name);
    name = strcat(file_name,'.exr');
    downloadLink = strcat('http://www.cis.rit.edu/fairchild/HDRPS/EXRs/',name,'\n');
    fprintf(downloadLink)
end

得到的下载连接放入link.txt文件中

http://www.cis.rit.edu/fairchild/HDRPS/EXRs/507.exr
http://www.cis.rit.edu/fairchild/HDRPS/EXRs/AhwahneeGreatLounge.exr
http://www.cis.rit.edu/fairchild/HDRPS/EXRs/AirBellowsGap.exr
....
....
....

利用wget进行下载:

wget -i link.txt

numpy 找出矩阵中大于某个值的个数:

比如3*4的随机矩阵大于0.1的个数

    nums = np.random.rand(4, 3)
    print(nums)
    y = (nums > 0.1)
    print(y)
    z = nums[y]
    print(z.size)

将小于某个数换成另外一个数

array([[-4, -4, -5,  2,  1],
       [-1, -2, -1,  3,  3],
       [-1, -2,  3, -5,  3],
       [ 0, -3, -5,  1, -4],
       [ 0,  3,  1,  3, -4]])
# 方式一
>>> np.maximum(a, 0)
array([[0, 0, 0, 2, 1],
       [0, 0, 0, 3, 3],
       [0, 0, 3, 0, 3],
       [0, 0, 0, 1, 0],
       [0, 3, 1, 3, 0]])
# 方式二
>>> (a + abs(a)) / 2
array([[0, 0, 0, 2, 1],
       [0, 0, 0, 3, 3],
       [0, 0, 3, 0, 3],
       [0, 0, 0, 1, 0],
       [0, 3, 1, 3, 0]])
# 方式三
>>> b = a.copy()
>>> b[b < 0] = 0
>>> b
array([[0, 0, 0, 2, 1],
       [0, 0, 0, 3, 3],
       [0, 0, 3, 0, 3],
       [0, 0, 0, 1, 0],
       [0, 3, 1, 3, 0]])
# 方式四
>>> np.where(a > 0, a, 0)
array([[0, 0, 0, 2, 1],
       [0, 0, 0, 3, 3],
       [0, 0, 3, 0, 3],
       [0, 0, 0, 1, 0],
       [0, 3, 1, 3, 0]])

加载LUTs文件进行调色

from PIL import Image
import math

LUT_3D_SIZE = 32

def cubeIndex(r, g, b):
    return int(r + g * LUT_3D_SIZE + b * LUT_3D_SIZE * LUT_3D_SIZE)

def mix(colorL, colorH, color_c):
    return colorL + (colorH - colorL) * (color_c - math.floor(color_c))

save_path = 'LTR.jpg'
img = Image.open("cat.jpg")
# img.show()
bitmap = img.load()
fd = open('LTR.cube')
lines = fd.readlines()
rgbFloatCube = []
cubeDataStart = False
for l in lines:
    if cubeDataStart:
        rgbStr = l.split(" ")
        if len(rgbStr) == 3:
            rgbFloat = (float(rgbStr[0]), float(rgbStr[1]), float(rgbStr[2]))
            rgbFloatCube.append(rgbFloat)
    if l.startswith("LUT_3D_SIZE 32"):
        cubeDataStart = True

for x in range(img.size[0]):
    for y in range(img.size[1]):
        pixelColor = bitmap[x, y]
        red = pixelColor[0] / 255.0 * (LUT_3D_SIZE - 1)
        green = pixelColor[1] / 255.0 * (LUT_3D_SIZE - 1)
        blue = pixelColor[2] / 255.0 * (LUT_3D_SIZE - 1)
        redH = math.ceil(red)
        redL = math.floor(red)
        greenH = math.ceil(green)
        greenL = math.floor(green)
        blueH = math.ceil(blue)
        blueL = math.floor(blue)
        indexH = cubeIndex(redH, greenH, blueH)
        indexL = cubeIndex(redL, greenL, blueL)
        toColorH = rgbFloatCube[indexH]
        toColorL = rgbFloatCube[indexL]
        toR = mix(toColorL[0], toColorH[0], red)
        toG = mix(toColorL[1], toColorH[1], green)
        toB = mix(toColorL[2], toColorH[2], blue)
        toColor2 = (int(toR * 255), int(toG * 255), int(toB * 255))
        bitmap[x, y] = toColor2
# img.show()
img.save(save_path)

神网推理阶段,由于feature map的长宽是奇数导致的恢复图像后长宽不一致问题解决思路

h = height >> 2 << 2
w = width >> 2 << 2
img_crop = img[:h,:w,:]
img_crop_sr=sr(img_crop)
img_sr=bilinear(img)
img_sr[:h,:w,:]=img_crop_sr

参考网络中的stride=2的个数,对宽高的移位数做修改,这里是以2为例

python设置绝对路径:

import sys
from os.path import dirname, abspath
path = dirname(dirname(abspath(__file__)))
sys.path.append(path)

python PDF转PNG图片

安装:

pip install fitz
pip install PyMuPDF
import fitz

pdfPath pdf文件的路径
imgPath 图像要保存的文件夹
zoom_x x方向的缩放系数
zoom_y y方向的缩放系数
rotation_angle 旋转角度

def pdf_image(pdfPath,imgPath,zoom_x,zoom_y,rotation_angle):
   
    pdf = fitz.open(pdfPath)
    # 逐页读取PDF
    for pg in range(0, pdf.pageCount):
        page = pdf[pg]
        # 设置缩放和旋转系数
        trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
        pm = page.getPixmap(matrix=trans, alpha=False)
        # 开始写图像
        pm.writePNG(imgPath+str(pg)+".png")
    pdf.close()
   
pdf_image(r="name.pdf","path/",3,3,0)
zoom_x和zoom_y一般取相同值,值越大,图像分辨率越高。

使用scipy进行图像滤波

from scipy import ndimage
from scipy import special
ndimage.filters.convolve(img_H, conv_kernel) 
def circular_lowpass_kernel(cutoff=1.5, kernel_size=21, pad_to=0):
    """2D sinc filter, ref: https://dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter

    Args:
        cutoff (float): cutoff frequency in radians (pi is max)
        kernel_size (int): horizontal and vertical size, must be odd.
        pad_to (int): pad kernel size to desired size, must be odd or zero.
    """
    assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
    kernel = np.fromfunction(
        lambda x, y: cutoff * special.j1(cutoff * np.sqrt(
            (x - (kernel_size - 1) / 2)**2 + (y - (kernel_size - 1) / 2)**2)) / (2 * np.pi * np.sqrt(
                (x - (kernel_size - 1) / 2)**2 + (y - (kernel_size - 1) / 2)**2)), [kernel_size, kernel_size])
    kernel[(kernel_size - 1) // 2, (kernel_size - 1) // 2] = cutoff**2 / (4 * np.pi)
    kernel = kernel / np.sum(kernel)
    if pad_to > kernel_size:
        pad_size = (pad_to - kernel_size) // 2
        kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
    return kernel

生成低通滤波器的卷积核。其中cutoff越大越清晰。kernelsize越大,震铃现象越明显。cutoff越小,也会稍微影响kernelsize产生的震玲现象,比如说cutoff_kernelsize = 0.75_13 图像模糊,但是震玲不明显,1.5_13组合的情况下,图像清晰,但是有震玲

ImageDraw和ImageFont 在图片中添加文字
from PIL import Image, ImageDraw, ImageFont
img = Image.open('im_path.png')
# 需从系统中选择字体,30是文字大小
setFont = ImageFont.truetype('/usr/share/fonts/truetype/arphic/ukai.ttc', 30)
text = "亚古兽要进化"
draw = ImageDraw.Draw(img)
# 添加文字的坐标,fill添加字体的颜色(R,G,B)
draw.text((40,200),text,font=setFont,fill=(255, 255, 255),direction=None)
img.show()

numpy 排序

1. 使用numpy.sort() 函数返回输入数组的排序副本(改变原数组)

numpy.sort(a, axis, kind, order)

主要解释下order这个参数

  • order: 如果数组包含字段,则是要排序的字段
    array_dtype = np.dtype([('name', 'S100'),('result',float)])
    array_List = np.array([('xiaoming', 100.0),('xiaohong', 99.5),('xiaotian', 89.7),('xiaoding', 109.5)], dtype=array_dtype)

    res = np.sort(array_List,order='result')
    for i in res:
        tt = i[0]
        tt = tt.decode('utf-8')
        print(os.path.join('res/dfsd',tt))

np.dtype中   S100  标识是字符串,100标识长度。

打印出来的字符串会有b。在python中也会有其他u,f

U

字符串前加、字符串前加 u,后面字符串以 Unicode 格式 进行编码,一般用在中文字符串前面,防止因为源码储存格式问题,导致再次使用时出现乱码。

b

b" "前缀表示:后面字符串是bytes 类型。网络编程中,服务器和浏览器只认bytes 类型数据。在 Python3 中,bytes 和 str 的互相转换方式是

str.encode('utf-8')
bytes.decode('utf-8')

f

以 f开头表示在字符串内支持大括号内的python 表达式

print(f'{name} done in {time.time() - t0:.2f} s')

输出

processing done in 1.00 s

2. 使用numpy.argsort() 函数返回的是数组值从小到大的索引值。

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值