计算机视觉中处理数据的小工具
有很多代码写了一遍用的时候还需要一遍遍再写,这里整理一下,避免之后重复的去写,会持续更新。
tips:有些路径可能没改,anyway,等我有时间。。。
如果觉得有用,求一个关注,也可以一起贡献好用的小代码块。
一、数据转换
1.png格式转为numpy格式(常用于分割或图像编辑等方向产生mask)
import os
from PIL import Image
import numpy as np
dir_root = '/Users/ligang/Desktop/FUTURE_chairs/parts/'
folders = os.listdir(dir_root)
for folder in folders:
if folder.endswith('_json'):
files = os.listdir(dir_root+folder)
for file in files:
if file=='label.png':
img_path=dir_root+folder+'/'+file
a = Image.open(img_path)
img_np = np.array(a)
for i in range(len(img_np)):
for j in range(len(img_np[0])):
if img_np[i][j]!=0:
img_np[i][j]=img_np[i][j]+1
print(np.sum(img_np==2))
if not os.path.exists('/Users/ligang/Desktop/FUTURE_chairs/mask_numpy/'+folder.split('_json')[0]+'.npy'):
np.save('/Users/ligang/Desktop/FUTURE_chairs/mask_numpy/'+folder.split('_json')[0]+'.npy',img_np)
print(folder)
2.png格式转为mp4格式(图像拼接成视频)
这里展示的是从一个文件夹中*_30.png-_60.png拼接成_video.mp4,大家可以视情况进行修改。
import cv2
from PIL import Image
def save_gif(imgs, save_root_path):
fourcc = cv2.VideoWriter_fourcc('M','P','4','V')
fps = 7
image = Image.open(imgs[0])#.convert('RGB')
print(image.mode)
media_writer = cv2.VideoWriter(save_root_path+'video.mp4', fourcc, fps, image.size)
for img in imgs:
if '.png' not in img:
continue
im = cv2.imread(img)
print(img)
media_writer.write(im)
print('Completed!')
import os
root_path = '/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/waiting_list/'
if __name__ == '__main__':
file_names = os.listdir(root_path)
for file_name in file_names:
if '30.png' not in file_name:
continue
imgs = []
file_name = file_name.split('30.png')[0]
for i in range(30, 60):
print(str(i).zfill(2))
img = root_path+file_name+ str(i)+'.png'
imgs.append(img)
for i in range(30, 60):
print(str(i).zfill(2))
img = root_path+file_name+ str(i)+'.png'
imgs.append(img)
save_gif(imgs, root_path+file_name)
3.图像RGBD格式转为RGB格式
import os
from PIL import Image
import numpy as np
import cv2
data_path = '/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/choose_cases/seg_obj/'
imgs = os.listdir(data_path)
for img in imgs:
if img == '.ipynb_checkpoints':continue
pil_image = Image.open(data_path+img)#.resize((256,256))
pil_image.load()
b = np.array(pil_image)
if len(b.shape)>2 and b.shape[2]==4:
aa = b[:,:,3:]/255.0
pil_image = Image.fromarray(np.array(b[:,:,:3]*aa + 255*(1-aa),dtype=np.uint8))
pil_image = pil_image.convert("RGB")
# arr = np.array(pil_image)
path = data_path+img
#cv2.imwrite(path,pil_image)
pil_image.save(path)
4.video格式转为png格式(视频切帧)
import cv2
vidcap = cv2.VideoCapture('00.mp4')
success,image = vidcap.read()
count = 0
while success:
cv2.imwrite("frame%d.jpg" % count, image[:,:256,:]) # save frame as JPEG file or PNG file
success,image = vidcap.read()
print('Read a new frame: ', success)
count += 1
二、数据可视化
1.numpy文件高亮可视化(可用于可视化mask或attention map之类的图)
import matplotlib.pyplot as plt
import numpy as np
file_path = ''
x=np.load(file_path)
plt.imshow(x)
plt.show()
三、图像改变数值
1.使用cv2库的读写函数给原图像重新复制并存储(改变图像的RGB像素值)
import cv2
import os
dir_path = '/public/data0/MULT/users/ligang351/projects/NeuS-main/public_data/thin_rope/image/'
for file_name in os.listdir(dir_path):
print(file_name)
img = cv2.imread(dir_path+file_name)
for i in range(img.shape[0]):
for j in range(img.shape[1]):
img[i,j] = [255,255,255]
cv2.imwrite(dir_path.replace('image','mask')+file_name, img)
四、特征提取
1.使用pytorch中模型库现有预训练模型提取特征
tips:
(1) 路径我抽空改,真的很多ddls.
(2) 分别是CLIP的image encoder(ViT)和ResNet提取特征, 注释13-24行放开就是ResNet提取特征。
import torch
import clip
from PIL import Image
from PIL import Image
import numpy as np
import os
from sklearn.cluster import KMeans
import timm
from torchvision import models, transforms
device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)
'''
model = models.resnet50(pretrained=True)
model.fc = torch.nn.Linear(2048, 2048)
torch.nn.init.eye(model.fc.weight)
model = model.cuda()
preprocess = transforms.Compose([
transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
'''
folder_list = os.listdir('/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/3D-FUTURE-chairs_clear-imgs/')
arr_list = []
for folder in folder_list:
file_list = sorted(os.listdir('/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/3D-FUTURE-chairs_clear-imgs/'+folder))
aa_list = []
for file in file_list:
if '2.png' not in file:
continue
print(file)
file_path = '/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/3D-FUTURE-chairs_clear-imgs/'+folder+'/'+file
pil_image = Image.open(file_path)
pil_image.load()
b = np.array(pil_image)
if len(b.shape)>2 and b.shape[2]==4:
#R = random.randint(0, 255)
#G = random.randint(0, 255)
#B = random.randint(0, 255)
aa = b[:,:,3:]/255.0
pil_image = Image.fromarray(np.array(b[:,:,:3]*aa + 128*(1-aa),dtype=np.uint8))
pil_image = pil_image.convert("RGB")
image = preprocess(pil_image).unsqueeze(0).to(device)
with torch.no_grad():
image_features = model.encode_image(image)
#image_features = model(image)
image_features = image_features.squeeze(0).cpu().numpy()
arr_list.append(image_features)
arr_np = np.array(arr_list)
print(arr_np.shape)
#print(arr_np)
nCluster = 10
kmCluster = KMeans(n_clusters=nCluster).fit(arr_np) # 建立模型并进行聚类,设定 K=2
#print("Cluster centers:\n", kmCluster.cluster_centers_) # 返回每个聚类中心的坐标
#print("Cluster results:\n", kmCluster.labels_) # 返回样本集的分类结果
labels = kmCluster.labels_.tolist()
items = []
for item in zip(folder_list, labels):
items.append(item)
#rint(item)
#rint(labels)
for cls_id in range(nCluster):
for k,v in items:
if v==cls_id:
if not os.path.exists('/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/clustering_clip_2cls/'+str(v)):
os.mkdir('/public/data0/MULT/users/zhengheliang/tmp/lg_tmp/clustering_clip_2cls/'+str(v))
os.system('cp /public/data0/MULT/datasets/web_car/data_lg/3D_FUTURE_imgs/final_chairs_images_clear_small/'+k+'_30.png /public/data0/MULT/users/zhengheliang/tmp/lg_tmp/clustering_clip_2cls/'+str(v))