导入所需模块
import numpy as np
import cv2
import pywt
from sklearn.preprocessing import MinMaxScaler
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
from torch.nn import functional as F
import matplotlib.pyplot as plt
%matplotlib inline
Изображение
img = cv2.imread('data/baby_yoda.jpeg',1)#1为彩色,0为灰度
#以上路径,建议写绝对路径,
#特别是在报错(error: (-215:Assertion failed) !_src.empty() in function 'cv::cvtColor)后
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img_rgb)
plt.xticks([])
plt.yticks([])
plt.show();
Wavelets小波段
def extract_wavelets(image, num_levels=4):
image = np.mean(image, axis=2) / 255.
(cA, (cH, cV, cD)) = pywt.dwt2(image, wavelet='haar')
features = [np.mean(cA), np.mean(cH), np.mean(cV), np.mean(cD)]
for l in range(1, num_levels):
(cA, (cH, cV, cD)) = pywt.dwt2(cA, wavelet='haar')
features.append(np.mean(cA))
return features
Hu矩
图像时刻是图像的像素强度的加权平均。
胡矩在形状匹配环节的应用
def hu_moments(image):
gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
_, img = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)
# calculate moments
moments = cv2.moments(img)
# calculate Hu moments
moments = cv2.HuMoments(moments)
for i in range(moments.size):
moments[i] = np.log10(abs(moments[i]))
return moments
小波,胡矩,平均值和标准值。
def get_features(image):
features = []
means, stds = cv2.meanStdDev(image)
features.append(np.ravel(np.array([means, stds])))
features.append(np.ravel(extract_wavelets(image)))
features.append(np.ravel(hu_moments(image)))
return np.hstack(features)
all_features = get_features(img_rgb
all_features
array([ 1.15819394e+02, 1.08684306e+02, 1.03660257e+02, 5.32942985e+01,
5.04810632e+01, 5.16965848e+01, 8.57944987e-01, 2.94698620e-05,
2.18867102e-04, -2.08666183e-06, 1.71588997e+00, 3.43095071e+00,
6.85862812e+00, -2.69858473e+00, -6.75121215e+00, -9.34366502e+00,
-9.65299522e+00, -1.91792389e+01, -1.30349660e+01, -1.96106019e+01])
自动编码器
class VAE(nn.Module):
def __init__(self, input_size, hidden_size, latent_size):
super(VAE, self).__init__()
self.input_size = input_size
self.output_size = input_size
self.fc1 = nn.Linear(self.input_size, hidden_size)
self.fc21 = nn.Linear(hidden_size, latent_size)
self.fc22 = nn.Linear(hidden_size, latent_size)
self.fc3 = nn.Linear(latent_size, hidden_size)
self.fc4 = nn.Linear(hidden_size, self.output_size)
def encode(self, x):
h1 = F.relu(self.fc1(x))
return self.fc21(h1), self.fc22(h1)
def reparameterize(self, mu, logvar):
std = torch.exp(0.5 * logvar)
eps = torch.randn_like(std)
return mu + eps * std
def decode(self, z):
h3 = F.relu(self.fc3(z))
return torch.sigmoid(self.fc4(h3))
def forward(self, x):
mu, logvar = self.encode(x.view(-1, self.output_size))
z = self.reparameterize(mu, logvar)
return mu, self.decode(z), mu, logvar
def loss_function(recon_x, x, mu, logvar):
BCE = F.binary_cross_entropy(recon_x.view(-1, x.shape[0]), x.view(-1, x.shape[0]), reduction='sum')
KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
return BCE + KLD
def img_VAE(img, num_output_vars):
"""
input изображение m x n
output матрица m x num_output_vars
"""
# параметры обучения
num_epochs = 50
batch_size = 5000
learning_rate = 1e-3
scaler = MinMaxScaler()
img = scaler.fit_transform(img)
transform = transforms.Compose([
transforms.ToTensor(),
])
dataset = transform(img)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
model = VAE(img.shape[1], 50, num_output_vars)
optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate, weight_decay=1e-3)
def train(epoch):
model.train()
train_loss = 0
for batch_idx, data in enumerate(dataloader):
data = data.float()
optimizer.zero_grad()
encoded_batch, recon_batch, mu, logvar = model(data)
loss = loss_function(recon_batch, data, mu, logvar)
loss.backward()
train_loss += loss.item()
optimizer.step()
return encoded_batch, recon_batch
for epoch in range(num_epochs):
encoded_output, reconstructed_output = train(epoch)
return scaler.fit_transform(encoded_output.detach().numpy())
img_features_vae = img_VAE(img.mean(axis=2), 3)
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(20, 5))
ax1.plot(img_features_vae[:, 0], label='VAE_1')
ax1.legend()
ax2.plot(img_features_vae[:, 1], label='VAE_2')
ax2.legend()
ax3.plot(img_features_vae[:, 2], label='VAE_3')
ax3.legend()
ax4.imshow(img_rgb)
plt.xticks([])
plt.yticks([])
plt.show();
从图像中提取的特征以后可以作为各种学习算法的输入。