将视频按照一定时间截取图片并且保存,或者说隔几帧保存一下一幅图像。
import os
import subprocess
def convert_video_images(source_path, output_path):
fileTypes = ['.avi', '.mp4', '.flv', '.mov', '.mkv']
if not os.path.exists(source_path):
print("not exit sourcepath :%s" % (source_path))
return
if not os.path.exists(output_path):
os.mkdir(output_path)
for video_file in os.listdir(source_path):
print(video_file)
for video_file in os.listdir(source_path):
video_name = os.path.join(source_path, video_file)
if os.path.splitext(video_name)[1] not in fileTypes:
print("skip %s" % video_file)
continue
if video_name == ".DS_Store":
print(video_name)
continue
print("video name:", video_name)
strcmd = 'ffmpeg -i ' + video_name + ' -vf "scale=1920:1080,fps=1/5" ' + output_path + '/' + \
os.path.splitext(video_file)[0] + '%5d.png'
print(strcmd)
subprocess.call(strcmd, shell=True)
print('the video has generated image')
// 按特定字符分离字符串
//string_ = name_name_string
//print(string_.split('_')
此方法需要安装FFmpeg,安装步骤网上有很多,不在介绍。代码中的fps=1/5,意味着5秒取一帧。
对视频依次取出每一帧,然后对该帧处理后在合成新的视频
i = 1
cap = cv2.VideoCapture('输入文件路径.mp4')
fps = cap.get(cv2.CAP_PROP_FPS)
print(fps)
size = (int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)),
int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)))
print(size)
fourcc = cv2.VideoWriter_fourcc('M', 'P', '4', '2')
# 也可选择该编解码器
# fourcc = cv2.VideoWriter_fourcc(*'XVID')
outVideo = cv2.VideoWriter('输出文件路径', fourcc, fps, size)
if cap.isOpened():
rval, frame = cap.read()
print('ture')
else:
rval = False
print('False')
while rval:
rval, frame = cap.read()
#对每一帧进行处理后,产生新一帧:my_image
my_image = my_demo(sess, net, frame)
cv2.imshow('test', my_image)
outVideo.write(my_image)
cv2.waitKey(1)
cap.release()
outVideo.release()
cv2.destroyAllWindows()
对xml结点内容进行修改:例子为将name结点内容guai修改为Guai
def _load_pascal_annotation(filename):
tree = ET.parse(filename)
objs = tree.findall('object')
for ix, obj in enumerate(objs):
if obj.find('name').text.lower().strip() == 'guai':
print('the Guai is the key but now it is guai ')
key_word = 'Guai'
obj.find('name').text = key_word
tree.write(filename)
print(filename)
if obj.find('name').text.lower().strip() == 'texiao':
print('the Texiao is the key but now it is texiao ')
key_word = 'Texiao'
obj.find('name').text = key_word
tree.write(filename)
print(filename)
将xml结点内容输入到txt文件内,针对文件夹内包括文件夹的:
def nodeToTxt():
source_path = '/media/ksyun/PROJECTDATA/810_xml/xml'
text_file = '/home/ksyun/11/11.txt'
writer_file = open(text_file,'a')
for names in os.listdir(source_path):
xml_flie = os.path.join(source_path, names)
print xml_flie
for name in os.listdir(xml_flie):
image_name_png = os.path.join(xml_flie, name)
name_image = os.path.splitext(name)[0]
isNone = _load_pascal_annotation(image_name_png)
if isNone is True:
print ('is None')
else:
writer_file.write(name_image + '\n')
替换带有关键字的某一行:
def replace_txt_line_inf():
file='ori.txt'
file_des = 'des.txt'
# 循环读取旧文件
file_data = ""
with open(file, "r", encoding="utf-8") as f:
for line in f.readlines():
if line.find("des_inf") >= 0:
line = line.replace(line, 'new_inf\n')
file_data += line
with open(file_des, "w", encoding="utf-8") as f:
f.write(file_data)
移动或者拷贝文件从一个文件夹到另一个文件夹:
def mymovefile(srcfile, dstfile):
if not os.path.isfile(srcfile):
print "%s not exist!" % (srcfile)
else:
fpath, fname = os.path.split(dstfile) # 分离文件名和路径
if not os.path.exists(fpath):
os.makedirs(fpath) # 创建路径
shutil.move(srcfile, dstfile) # 移动文件
print "move %s -> %s" % (srcfile, dstfile)
def mycopyfile(srcfile, dstfile):
if not os.path.isfile(srcfile):
print "%s not exist!" % (srcfile)
else:
fpath, fname = os.path.split(dstfile) # 分离文件名和路径
if not os.path.exists(fpath):
os.makedirs(fpath) # 创建路径,
#dstfile可以是文件夹的名字,此时移动后的srcfile里的文件保持原来的名字
shutil.copyfile(srcfile, dstfile) # 复制文件
print "copy %s -> %s" % (srcfile, dstfile)
TensorFlow中pb文件与pbtxt文件的相互转换(其实就是参数as_text是否为真):
import tensorflow as tf
from tensorflow.python.platform import gfile
from google.protobuf import text_format
def convert_pb_to_pbtxt(filename):
with gfile.FastGFile(filename,'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name='')
tf.train.write_graph(graph_def, './', 'protobuf.pbtxt', as_text=True)
return
def convert_pbtxt_to_pb(filename):
"""Returns a `tf.GraphDef` proto representing the data in the given pbtxt file.
Args:
filename: The name of a file containing a GraphDef pbtxt (text-formatted
`tf.GraphDef` protocol buffer data).
"""
with tf.gfile.FastGFile(filename, 'r') as f:
graph_def = tf.GraphDef()
file_content = f.read()
# Merges the human-readable string in `file_content` into `graph_def`.
text_format.Merge(file_content, graph_def)
tf.train.write_graph( graph_def , './' , 'protobuf.pb' , as_text = False )
在image上进行画框,并保存画过框的图像
比如在进行图像检测时,如何将检测出来的图像上的目标进行画框,并将该图像保存。依次延伸到视频,对视频的每一祯进行处理:
def draw_bounding_box_on_image_array(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=4,
display_str_list=(),
use_normalized_coordinates=True):
image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color,
thickness, display_str_list,
use_normalized_coordinates)
np.copyto(image, np.array(image_pil))
def draw_bounding_box_on_image(image,
ymin,
xmin,
ymax,
xmax,
color='red',
thickness=5,
display_str_list=(),
use_normalized_coordinates=True):
draw = ImageDraw.Draw(image)
im_width, im_height = 1, 1
if use_normalized_coordinates:
(left, right, top, bottom) = (xmin * im_width, xmax * im_width,
ymin * im_height, ymax * im_height)
else:
(left, right, top, bottom) = (xmin, xmax, ymin, ymax)
draw.line([(left, top), (left, bottom), (right, bottom),
(right, top), (left, top)], width=thickness, fill=color)
try:
font = ImageFont.truetype('arial.ttf', 24)
except IOError:
font = ImageFont.load_default()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights = [font.getsize(ds)[1] for ds in display_str_list]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights)
if top > total_display_str_height:
text_bottom = top
else:
text_bottom = bottom + total_display_str_height
# Reverse list and print from bottom to top.
for display_str in display_str_list[::-1]:
text_width, text_height = font.getsize(display_str)
margin = np.ceil(0.05 * text_height)
draw.rectangle(
[(left, text_bottom - text_height - 2 * margin), (left + text_width,
text_bottom)],
fill=color)
draw.text(
(left + margin, text_bottom - text_height - margin),
display_str,
fill='black',
font=font)
text_bottom -= text_height - 2 * margin
def my_vis_detections(im, class_name, dets, color, thresh=0.5):
"""Draw detected bounding boxes."""
inds = np.where(dets[:, -1] >= thresh)[0]
if len(inds) == 0:
return
for i in inds:
bbox = dets[i, :4]
score = dets[i, -1]
ymin, xmin, ymax, xmax = bbox[1], bbox[0], bbox[3], bbox[2]
draw_bounding_box_on_image_array(im,
ymin,
xmin,
ymax,
xmax,
color=color,
thickness=4,
display_str_list=['{:s} {:.3f}'.format(class_name, score)],
use_normalized_coordinates=True
)
def my_demo(sess, net, im):
scores, boxes = im_detect(sess, net, im)
im = im[:, :, (2, 1, 0)]
CONF_THRESH = 0.8
NMS_THRESH = 0.3
for cls_ind, cls in enumerate(CLASSES[1:]):
color = CLASSBBOXCOLOR[cls_ind]
cls_ind += 1 # because we skipped background
cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
cls_scores = scores[:, cls_ind]
dets = np.hstack((cls_boxes,
cls_scores[:, np.newaxis])).astype(np.float32)
keep = nms(dets, NMS_THRESH)
dets = dets[keep, :]
my_vis_detections(im, cls, dets, color, thresh=CONF_THRESH)
return im
该例子时我在已经通过faster-rcnn进行训练,通过im_detect(sess,net,im)函数后,出来的时对应的每一幅图像的目标检测结果,包括每一个目标的得分以及box。最终的cls和dets时经过赛选后的目标。通过my_vis_detextions()函数对每一类包含的目标进行画框(可以对框的颜色以及粗细进行设置)
还有一种方法为:
def my_detect(image_path, out_boxes):
image = Image.open(image_path)
out_boxes = out_boxes
thickness = (image.size[0] + image.size[1]) // 900 + 1
for i in out_boxes:
box = i[0:4]
draw = ImageDraw.Draw(image)
top = box[1]
left = box[0]
bottom = box[3]
right = box[2]
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
for i in range(thickness):
draw.rectangle(
[left + i, top + i, right - i, bottom - i],
outline=CLASSBBOXCOLOR[0])
del draw
# image.show()
image.save('facedata/IMG_6940_result.JPG')
python中PIL.Image和OpenCV图像格式相互转换
PIL.Image转换成OpenCV格式:
import cv2
from PIL import Image
import numpy
image = Image.open("plane.jpg")
image.show()
img = cv2.cvtColor(numpy.asarray(image),cv2.COLOR_RGB2BGR)
cv2.imshow("OpenCV",img)
cv2.waitKey()
OpenCV转换成PIL.Image格式:
import cv2
from PIL import Image
import numpy
img = cv2.imread("plane.jpg")
cv2.imshow("OpenCV",img)
image = Image.fromarray(cv2.cvtColor(img,cv2.COLOR_BGR2RGB))
image.show()
image.save(save_path,format='png', quality=95)
cv2.waitKey()
判断图像数据是否是OpenCV格式: isinstance(img, np.ndarray)
numpy 与OpenCV、PIL.Image格式相互转换
import numpy as np
from PIL import Image
import cv2
PIL:
Image -> numpy:
img = Image.open(input_img)
imge = np.array(img) // 此时读出来是numpy格式的数组
img = img.astype(np.float32)
numpy -> Image:
re_image = Image.fromarray(numpy_image.astype(np.uint8)).convert('RGB')
矩阵格式为uint8,如果是uint16等其他不支持的格式会报错:
in fromarray raise TypeError("Cannot handle this data type")
TypeError: Cannot handle this data type
OpenCV:
image -> numpy
1. input_image = cv2.imread(input_img) // print(tpye(input_image)) :结果 <class 'str'>、打印 print(input_image),出来时的数组。
image_data = cv2.resize(input_image, (image_height, image_width)) //print(image_data),结果:<class 'numpy.ndarray'>
img_resized = image_data.astype(np.float32)
2. input_image = cv2.imread(input_img)
numpy_data = np.array(input_image) //打印结果 <class 'numpy.ndarray'>
numpy -> image
// 显示图片活保存图片
cv2.imwrite(output_img, result_image.astype(np.uint8)) // 保存图片. result_image 为numpy的array
cv2.imshow('show-text', result_image.astype(np.uint8)) // 显示图片
利用已有的目标坐标信息,生成xml文件(匹配LabelImg制作目标检测de工具)
def savePascalVocFormat(filename, shapes, imagePath,imageShape=[1920, 1080, 3]):
imgFolderPath = os.path.dirname(imagePath)
imgFolderName = os.path.split(imgFolderPath)[-1]
imgFileName = os.path.basename(imagePath)
writer = PascalVocWriter(imgFolderName, imgFileName,
imageShape, localImgPath=imagePath)
writer.verified = False
for shape in shapes:
label = shape['label']
# Add Chris
difficult = int(shape['difficult'])
bndbox = shape['bndbox']
writer.addBndBox(bndbox[0], bndbox[1], bndbox[2], bndbox[3], label, difficult)
writer.save(targetFile=filename)
return
在这里可能要调用一个PascalVocWriter的python类,链接地址为:生成pascal数据集的xml文件程序-Python代码类资源-CSDN下载
Python: next()、 iter()用法
tt = iter([99,2,3,4,5])
input_1 = next(tt)
input_2 = next(tt)
input_3 = next(tt)
input_4 = next(tt)
input_5 = next(tt)
print(input_1)
print(input_2)
print(input_3)
print(input_4)
print(input_5)
打印结果:
99
2
3
4
5
python: [::-1]; [::-2], [:-1] 用法
a='python'
b=a[::-1]
print(b) #nohtyp
c=a[::-2]
print(c) #nhy
#从后往前数的话,最后一个位置为-1
d=a[:-1] #从位置0到位置-1之前的数
print(d) #pytho
e=a[:-2] #从位置0到位置-2之前的数
print(e) #pyth
a=[1,2,3,4,5]
print(a[2::-1]) # 倒序输出第3个元素之后的元素:[3,2,1]
print(a[1::2]) # 从第2个元素起,步长为2取元素:[2,4]
图片翻转实现方式:
dd = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(dd)
print(np.shape(dd))
print(':, :, ::-1')
print(dd[:, :, ::-1])
print(np.shape(dd[:, :, ::-1]))
print('::-1')
print(dd[::-1])
print(np.shape(dd[::-1]))
print(':, ::-1')
print(dd[:, ::-1])
print(np.shape(dd[:, ::-1]))
输出:
[[[ 1 2 3]
[ 4 5 6]]
[[ 7 8 9]
[10 11 12]]]
(2, 2, 3)
:, :, ::-1
[[[ 3 2 1]
[ 6 5 4]]
[[ 9 8 7]
[12 11 10]]]
(2, 2, 3)
::-1
[[[ 7 8 9]
[10 11 12]]
[[ 1 2 3]
[ 4 5 6]]]
(2, 2, 3)
:, ::-1
[[[ 4 5 6]
[ 1 2 3]]
[[10 11 12]
[ 7 8 9]]]
(2, 2, 3)
总结:[:, :, ::-1] 通道翻转,如rgb->bgr
[::-1],上下翻转,沿宽轴线翻转。
[:, ::-1] 左右翻转,沿高轴线翻转。
opencv 读取或保存带有中文路径图片的方法
# 读取
img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), 1)
# 保存
cv2.imencode('.jpg', src)[1].tofile(save_path)
# for example
cv2.imwrite("路径//my_image.jpg", frame) # 错误演示
cv2.imencode('.jpg', frame)[1].tofile('路径/my_image.jpg') //正确
PIL Image对图片进行crop,并进行压缩成jpeg格式
截图坐标:box(b1,a1,b2,a2)
cbox(b1,a1,b2,a2)
img = Image.open(image_path)
img_data = img.crop(cbox)
# img_data_array = np.array(img_data),用numpy转换后才是矩阵形式的图片
#图片进行jpeg压缩存储,quality 默认质量是75
img.save(des_path, format='JPEG', quality=95)
# 左上角到右下角,逐行进行。
img = Image.open(image_name_big)
w = img.width
h = img.height
s_h = w / 224
s_w = h / 224
for i in range(int(s_w)):
for j in range(int(s_h)):
# 从图像的左上角
cbox = [j * 224, i * 224, (j + 1) * 224, (i + 1) * 224]
print(cbox)
将图片进行拼接
def compose_image(IMAGE_SIZE=224,IMAGE_ROW=4,IMAGE_COLUMN=4):
save_image_path = '/home/szx/Desktop/dd/total_image.png'
to_image = Image.new('RGB', (IMAGE_COLUMN * IMAGE_SIZE, IMAGE_ROW * IMAGE_SIZE))
index_num = 1
root_path = '/home/szx/Desktop/dd/index_'
for y in range(0, IMAGE_ROW):
for x in range(0, IMAGE_COLUMN):
image_path = root_path + str(index_num) + '.png'
from_image = Image.open(image_path)
to_image.paste(from_image, (x * IMAGE_SIZE, y * IMAGE_SIZE))
index_num = index_num + 1
to_image.save(save_image_path)
图像质量评价,psnr和ssim的计算
可以自己根据理论知识编写,也可以直接调用skimge相关模块或tensorflow:引用该博客,建议直接使用skimage
import numpy as np
import math
import skimage
def ssim_unit8(ori_img_path, des_image_path):
tf_img1 = tf.image.decode_image(tf.read_file(ori_img_path))
tf_img2 = tf.image.decode_image(tf.read_file(des_image_path))
return tf.image.ssim(tf_img1, tf_img2, max_val=255)
def psnr_unit8(ori_img_path, des_image_path):
tf_img1 = tf.image.decode_image(tf.read_file(ori_img_path))
tf_img2 = tf.image.decode_image(tf.read_file(des_image_path))
return tf.image.psnr(tf_img1, tf_img2, max_val=255)
def psnr(img1, img2):
mse = np.mean((img1 / 255. - img2 / 255.) ** 2)
if mse < 1.0e-10:
return 100
PIXEL_MAX = 1
return 20 * math.log10(PIXEL_MAX / math.sqrt(mse))
if __name__ == '__main__':
im1 = cv2.imread('/home/szx/Desktop/brazil_QP_Patch=23_469.png')
im2 = cv2.imread('/home/szx/Desktop/brazil_QP_Patch=44_469.png')
psnr = skimage.measure.compare_psnr(im1, im2, 255)
print(psnr)
psnr_2 = psnr2(im1,im2)
print(psnr_2)
# ssim的计算
ssim = skimage.measure.compare_ssim(im1, im2, data_range=255)
对图像进行高斯模糊:
from PIL import Image
from PIL import ImageFilter
im = Image.open('source_path.png')
crop_image_file_path = 'des_path.png'
im = im.filter(ImageFilter.GaussianBlur(radius=10))
im.save(crop_image_file_path, format='png', quality=95)
im.show()
将图像进行jpeg或jpeg2000 进行压缩:pillow官网文档
一: jpeg2000 可能需要从openjpeg上下载,然后利用pillow框架进行调用
from PIL import Image
#jpeg2000 现将图片存储为j2k格式,然后读取后在存成.png格式(主要是看压缩后的效果)
#主要参数 quality_mode='dB', quality_layers=,
list_inf = [24,26,28,30,32,34,37,41,46]
for i in range(0, 9):
des_path_2 = root_path + '_' + str(i) + '.j2k'
dd = list_inf[i]
print(dd)
tt.save(des_path_2, quality_mode='dB', quality_layers=[dd])
tt1 = Image.open(des_path_2)
des_path = root_path + '_' + str(i) + '.png'
tt1.save(des_path)
#jpeg 格式比较简单,i为质量,从0 到95 95为最好。
img = Image.open(src_iamge_path)
img.save(des_path, format='JPEG', quality=i)
图片添加噪声:
skimage.util.random_noise(image, mode=‘gaussian’, seed=None, clip=True, **kwargs)
image为输入图像数据,类型应为ndarray,输入后将转换为浮点数。
mode选择添加噪声的类别。字符串str类型。应为以下几种之一:
‘gaussian’高斯加性噪声。
‘localvar’ 高斯加性噪声,每点具有特定的局部方差。
‘poisson’ 泊松分布的噪声。
‘salt’ 盐噪声,随机用1替换像素。属于高灰度噪声。
‘pepper’ 胡椒噪声,随机用0或-1替换像素,属于低灰度噪声。
‘s&p’ 椒盐噪声,两种噪声同时出现,呈现出黑白杂点。
‘speckle’ 使用out = image + n *图像的乘法噪声,其中n是具有指定均值和方差的均匀噪声
seed 类型为int。
将在生成噪声之前设置随机种子,以进行有效的伪随机比较。
clip类型为bool。
若为True(default)则在加入‘speckle’, ‘poisson’, 或 ‘gaussian’这三种噪声后进行剪切以保证图像数据点都在[0,1]或[-1.1]之间。若为False,则数据可能超出这个范围。
mean: float 随机分布的均值,用于’gaussian’和‘speckle’。 默认为0。
var: float 随机分布的方差,(标准差^2)用于’gaussian’和‘speckle’。 默认为0.01。
local_vars:ndarray 图像每个像素点处的局部方差,正浮点数矩阵,和图像同型,用于‘localvar’.
amount:float 椒盐噪声像素点替换的比例,在[0,1]之间。用于‘salt’, ‘pepper’,和 ‘salt & pepper’. 默认 : 0.05
salt_vs_pepper : float 盐噪声和胡椒噪声的比例,在[0,1]之间。数字越大代表用1替换越多(more salt). 默认 : 0.5
输出
out : ndarray
输出为浮点图像数据,在[0,1]或[-1,1]之间。Skimage读取图像后格式为(height, width, channel)。注意RGB图像数据若为浮点数则范围为[0,1],若为整型则范围为[0,255]。
读写csv文件,excel文件:
import csv
def writer_csv():
with open("test.csv", "w") as csvfile:
writer = csv.writer(csvfile)
for i in range(10):
writer.writerow(["index", "a_name", "b_name"])
def reader_csv():
with open("test.csv", "r") as csvfile:
reader = csv.reader(csvfile)
for line in reader:
对于excel,读取使用xlrd,写入使用xlwt模块,这两个模块均可使用pip install xlrd和pip install xlwt 安装
# 读取
import xlrd
file_path = 'video_test.xlsx'
workbook = xlrd.open_workbook(file_path)
# 3、获取一个工作表
# 按sheet名获取sheet
table = workbook.sheet_by_name("Sheet1")
aa = table.cell(0, 0).value
bb = table.cell(0, 1).value
c = table.cell(1, 0).value
print(aa, bb, c)
#写入
import xlwt
workbook = xlwt.Workbook(encoding='utf-8')
#创建sheet
workSheet = workbook.add_sheet('Sheet1')
# 向sheet里面写入信息
workSheet.write(0, 0, label='name')
workSheet.write(0, 1, label=int(666))
#保存
workbook.save('video_test.xlsx')
图像信息熵:
图像熵是一种特征的统计形式,它反映了图像中平均信息量的多少。图像的一维熵表示图像中灰度分布的聚集特征所包含的信息量,令Pi 表示图像中灰度值为i的像素所占的比例,则定义灰度图像的一元灰度熵为:
Python代码:
def caculate_entropy():
tmp = []
for i in range(256):
tmp.append(0)
val = 0
k = 0
res = 0
# 黑白方式读取图片,单通道
image = cv2.imread('/home/szx/Desktop/brazil_QP=23_306.png', 0)
img = np.array(image)
for i in range(len(img)):
for j in range(len(img[i])):
val = img[i][j]
tmp[val] = float(tmp[val] + 1)
k = float(k + 1)
for i in range(len(tmp)):
tmp[i] = float(tmp[i] / k)
for i in range(len(tmp)):
if tmp[i] == 0:
res = res
else:
res = float(res - tmp[i] * (math.log(tmp[i]) / math.log(2.0)))
print(res)
YUV视频文件的读取存储:
def getFrameNUm(filename, bitdepth, W, H):
bytesPerPixel = math.ceil(bitdepth / 8)
framePixels = bytesPerPixel*H * W * 3 // 2
fp = open(filename, 'rb')
fp.seek(0,2)
maxFrameNum = fp.tell()
maxFrameNum=maxFrameNum//framePixels
# print("bitdepth:",bitdepth,"framePix:",framePixels)
return maxFrameNum
def readyuv420_byframe(filename, bitdepth, W, H, startframe, totalframe, show=False):
# 从第startframe(含)开始读(0-based),共读totalframe帧
uv_H = H // 2
uv_W = W // 2
if bitdepth == 8:
Y = np.zeros((H, W), np.uint8)
U = np.zeros((uv_H, uv_W), np.uint8)
V = np.zeros((uv_H, uv_W), np.uint8)
elif bitdepth == 10:
Y = np.zeros((H, W), np.uint16)
U = np.zeros((uv_H, uv_W), np.uint16)
V = np.zeros((uv_H, uv_W), np.uint16)
bytesPerPixel = math.ceil(bitdepth / 8)
seekPixels = startframe * H * W * 3 // 2
fp = open(filename, 'rb')
fp.seek(bytesPerPixel * seekPixels)
for i in range(totalframe):
Y=fp.read(H*W*bytesPerPixel)
U=fp.read(H//2*W//2*bytesPerPixel)
V=fp.read(H//2*W//2*bytesPerPixel)
if bitdepth == 10:
Y=np.reshape(np.fromstring(Y,dtype=np.uint16),(H,W))
U=np.reshape(np.fromstring(U,dtype=np.uint16),(H//2,W//2))
V=np.reshape(np.fromstring(V,dtype=np.uint16),(H//2,W//2))
Y=np.array(Y).view('<u2')
U=np.array(U).view('<u2')
V=np.array(V).view('<u2')
else:
Y=np.reshape(np.fromstring(Y,dtype=np.uint8),(H,W))
U=np.reshape(np.fromstring(U,dtype=np.uint8),(H//2,W//2))
V=np.reshape(np.fromstring(V,dtype=np.uint8),(H//2,W//2))
yield Y,U,V
def writeyuv420p(filename, W, H, Y, U, V):
uv_H = H // 2
uv_W = W // 2
fp = open(filename, 'wb')
fp.write(Y)
fp.write(U)
fp.write(V)
fp.close()
def writeyuv420p10le(filename, W, H, Y, U, V):
uv_H = H // 2
uv_W = W // 2
fp = open(filename, 'wb')
Y=Y.view('<u2')
U=U.view('<u2')
V=V.view('<u2')
fp.write(Y)
fp.write(U)
fp.write(V)
fp.close()
其中的一些参数解释:
i | 有符号整型,'i1', 'i2', 'i4', 'i8' 对应 int8, int16, int32, int64 |
u | 无符号整型,'u1', 'u2', 'u4', 'u8' 对应 uint8, uint16, uint32, uint64 |
f | 浮点型,'f2', 'f4', 'f8' 对应 float16, float32, float64 |
下载 Fairchild HDR dataset
Fairchild HDR dataset网站提供了100余张HDR图像,但是没有提供打包下载,采用如下方法批量下载:
首先用wget下载整个网页:
wget -c -r -np -k -L -p http://rit-mcsl.org/fairchild//HDRPS/HDRthumbs.html
然后在下载目录下有一个Thumbs文件夹,里面的图像是HDR图像压缩后LDR图像,提取所有图像的文件名,更改文件的后缀,得到下载链接(所有的HDR文件都在http://www.cis.rit.edu/fairchild/HDRPS/EXRs/)下
files = dir('./*.jpg');
for i=1:1:length(files)
[file_path,file_name,file_ext] = fileparts(files(i).name);
name = strcat(file_name,'.exr');
downloadLink = strcat('http://www.cis.rit.edu/fairchild/HDRPS/EXRs/',name,'\n');
fprintf(downloadLink)
end
得到的下载连接放入link.txt文件中
http://www.cis.rit.edu/fairchild/HDRPS/EXRs/507.exr
http://www.cis.rit.edu/fairchild/HDRPS/EXRs/AhwahneeGreatLounge.exr
http://www.cis.rit.edu/fairchild/HDRPS/EXRs/AirBellowsGap.exr
....
....
....
利用wget进行下载:
wget -i link.txt
numpy 找出矩阵中大于某个值的个数:
比如3*4的随机矩阵大于0.1的个数
nums = np.random.rand(4, 3)
print(nums)
y = (nums > 0.1)
print(y)
z = nums[y]
print(z.size)
将小于某个数换成另外一个数
array([[-4, -4, -5, 2, 1],
[-1, -2, -1, 3, 3],
[-1, -2, 3, -5, 3],
[ 0, -3, -5, 1, -4],
[ 0, 3, 1, 3, -4]])
# 方式一
>>> np.maximum(a, 0)
array([[0, 0, 0, 2, 1],
[0, 0, 0, 3, 3],
[0, 0, 3, 0, 3],
[0, 0, 0, 1, 0],
[0, 3, 1, 3, 0]])
# 方式二
>>> (a + abs(a)) / 2
array([[0, 0, 0, 2, 1],
[0, 0, 0, 3, 3],
[0, 0, 3, 0, 3],
[0, 0, 0, 1, 0],
[0, 3, 1, 3, 0]])
# 方式三
>>> b = a.copy()
>>> b[b < 0] = 0
>>> b
array([[0, 0, 0, 2, 1],
[0, 0, 0, 3, 3],
[0, 0, 3, 0, 3],
[0, 0, 0, 1, 0],
[0, 3, 1, 3, 0]])
# 方式四
>>> np.where(a > 0, a, 0)
array([[0, 0, 0, 2, 1],
[0, 0, 0, 3, 3],
[0, 0, 3, 0, 3],
[0, 0, 0, 1, 0],
[0, 3, 1, 3, 0]])
加载LUTs文件进行调色
from PIL import Image
import math
LUT_3D_SIZE = 32
def cubeIndex(r, g, b):
return int(r + g * LUT_3D_SIZE + b * LUT_3D_SIZE * LUT_3D_SIZE)
def mix(colorL, colorH, color_c):
return colorL + (colorH - colorL) * (color_c - math.floor(color_c))
save_path = 'LTR.jpg'
img = Image.open("cat.jpg")
# img.show()
bitmap = img.load()
fd = open('LTR.cube')
lines = fd.readlines()
rgbFloatCube = []
cubeDataStart = False
for l in lines:
if cubeDataStart:
rgbStr = l.split(" ")
if len(rgbStr) == 3:
rgbFloat = (float(rgbStr[0]), float(rgbStr[1]), float(rgbStr[2]))
rgbFloatCube.append(rgbFloat)
if l.startswith("LUT_3D_SIZE 32"):
cubeDataStart = True
for x in range(img.size[0]):
for y in range(img.size[1]):
pixelColor = bitmap[x, y]
red = pixelColor[0] / 255.0 * (LUT_3D_SIZE - 1)
green = pixelColor[1] / 255.0 * (LUT_3D_SIZE - 1)
blue = pixelColor[2] / 255.0 * (LUT_3D_SIZE - 1)
redH = math.ceil(red)
redL = math.floor(red)
greenH = math.ceil(green)
greenL = math.floor(green)
blueH = math.ceil(blue)
blueL = math.floor(blue)
indexH = cubeIndex(redH, greenH, blueH)
indexL = cubeIndex(redL, greenL, blueL)
toColorH = rgbFloatCube[indexH]
toColorL = rgbFloatCube[indexL]
toR = mix(toColorL[0], toColorH[0], red)
toG = mix(toColorL[1], toColorH[1], green)
toB = mix(toColorL[2], toColorH[2], blue)
toColor2 = (int(toR * 255), int(toG * 255), int(toB * 255))
bitmap[x, y] = toColor2
# img.show()
img.save(save_path)
神网推理阶段,由于feature map的长宽是奇数导致的恢复图像后长宽不一致问题解决思路:
h = height >> 2 << 2
w = width >> 2 << 2
img_crop = img[:h,:w,:]
img_crop_sr=sr(img_crop)
img_sr=bilinear(img)
img_sr[:h,:w,:]=img_crop_sr
参考网络中的stride=2的个数,对宽高的移位数做修改,这里是以2为例
python设置绝对路径:
import sys
from os.path import dirname, abspath
path = dirname(dirname(abspath(__file__)))
sys.path.append(path)
python PDF转PNG图片
安装:
pip install fitz
pip install PyMuPDF
import fitz
pdfPath pdf文件的路径
imgPath 图像要保存的文件夹
zoom_x x方向的缩放系数
zoom_y y方向的缩放系数
rotation_angle 旋转角度
def pdf_image(pdfPath,imgPath,zoom_x,zoom_y,rotation_angle):
pdf = fitz.open(pdfPath)
# 逐页读取PDF
for pg in range(0, pdf.pageCount):
page = pdf[pg]
# 设置缩放和旋转系数
trans = fitz.Matrix(zoom_x, zoom_y).preRotate(rotation_angle)
pm = page.getPixmap(matrix=trans, alpha=False)
# 开始写图像
pm.writePNG(imgPath+str(pg)+".png")
pdf.close()
pdf_image(r="name.pdf","path/",3,3,0)
zoom_x和zoom_y一般取相同值,值越大,图像分辨率越高。
使用scipy进行图像滤波
from scipy import ndimage from scipy import special ndimage.filters.convolve(img_H, conv_kernel)
def circular_lowpass_kernel(cutoff=1.5, kernel_size=21, pad_to=0):
"""2D sinc filter, ref: https://dsp.stackexchange.com/questions/58301/2-d-circularly-symmetric-low-pass-filter
Args:
cutoff (float): cutoff frequency in radians (pi is max)
kernel_size (int): horizontal and vertical size, must be odd.
pad_to (int): pad kernel size to desired size, must be odd or zero.
"""
assert kernel_size % 2 == 1, 'Kernel size must be an odd number.'
kernel = np.fromfunction(
lambda x, y: cutoff * special.j1(cutoff * np.sqrt(
(x - (kernel_size - 1) / 2)**2 + (y - (kernel_size - 1) / 2)**2)) / (2 * np.pi * np.sqrt(
(x - (kernel_size - 1) / 2)**2 + (y - (kernel_size - 1) / 2)**2)), [kernel_size, kernel_size])
kernel[(kernel_size - 1) // 2, (kernel_size - 1) // 2] = cutoff**2 / (4 * np.pi)
kernel = kernel / np.sum(kernel)
if pad_to > kernel_size:
pad_size = (pad_to - kernel_size) // 2
kernel = np.pad(kernel, ((pad_size, pad_size), (pad_size, pad_size)))
return kernel
生成低通滤波器的卷积核。其中cutoff越大越清晰。kernelsize越大,震铃现象越明显。cutoff越小,也会稍微影响kernelsize产生的震玲现象,比如说cutoff_kernelsize = 0.75_13 图像模糊,但是震玲不明显,1.5_13组合的情况下,图像清晰,但是有震玲
ImageDraw和ImageFont 在图片中添加文字
from PIL import Image, ImageDraw, ImageFont
img = Image.open('im_path.png')
# 需从系统中选择字体,30是文字大小
setFont = ImageFont.truetype('/usr/share/fonts/truetype/arphic/ukai.ttc', 30)
text = "亚古兽要进化"
draw = ImageDraw.Draw(img)
# 添加文字的坐标,fill添加字体的颜色(R,G,B)
draw.text((40,200),text,font=setFont,fill=(255, 255, 255),direction=None)
img.show()
numpy 排序
1. 使用numpy.sort() 函数返回输入数组的排序副本(改变原数组)
numpy.sort(a, axis, kind, order)
主要解释下order这个参数
- order: 如果数组包含字段,则是要排序的字段
array_dtype = np.dtype([('name', 'S100'),('result',float)])
array_List = np.array([('xiaoming', 100.0),('xiaohong', 99.5),('xiaotian', 89.7),('xiaoding', 109.5)], dtype=array_dtype)
res = np.sort(array_List,order='result')
for i in res:
tt = i[0]
tt = tt.decode('utf-8')
print(os.path.join('res/dfsd',tt))
np.dtype中 S100 标识是字符串,100标识长度。
打印出来的字符串会有b。在python中也会有其他u,f
U
字符串前加、字符串前加 u,后面字符串以 Unicode 格式 进行编码,一般用在中文字符串前面,防止因为源码储存格式问题,导致再次使用时出现乱码。
b
b" "前缀表示:后面字符串是bytes 类型。网络编程中,服务器和浏览器只认bytes 类型数据。在 Python3 中,bytes 和 str 的互相转换方式是
str.encode('utf-8')
bytes.decode('utf-8')
f
以 f开头表示在字符串内支持大括号内的python 表达式
print(f'{name} done in {time.time() - t0:.2f} s')
输出
processing done in 1.00 s
2. 使用numpy.argsort() 函数返回的是数组值从小到大的索引值。