feature_extration

net.py

import torch
from torchvision import models
from torch import nn


class feature_net(nn.Module):
    def __init__(self, model):
        super(feature_net, self).__init__()

        if model == 'vgg':
            vgg = models.vgg19(pretrained=True)
            self.feature = nn.Sequential(*list(vgg.children())[:-1])
            self.feature.add_module('global average', nn.AvgPool2d(9))
        elif model == 'inceptionv3':
            inception = models.inception_v3(pretrained=True)
            self.feature = nn.Sequential(*list(inception.children())[:-1])
            self.feature._modules.pop('13')
            self.feature.add_module('global average', nn.AvgPool2d(35))
        elif model == 'densenet121':
            resnet = models.densenet121(pretrained=True)
            self.feature = nn.Sequential(*list(resnet.children())[:-1])

    def forward(self, x):
        """
        model includes vgg19, inceptionv3, resnet152
        """
        x = self.feature(x)
        x = x.view(x.size(0), -1)
        return x


class classifier(nn.Module):
    def __init__(self, dim, n_classes):
        super(classifier, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(dim, 1000),
            nn.ReLU(True),
            nn.Dropout(0.5),
            nn.Linear(1000, n_classes)
        )

    def forward(self, x):
        x = self.fc(x)
        return x

feature_extraction.py

import os
from tqdm import tqdm
import h5py
import numpy as np
import argparse

import torch
from torchvision import models, transforms
from torch import optim, nn
from torch.autograd import Variable
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from net import feature_net, classifier

parse = argparse.ArgumentParser()
parse.add_argument(
    '--model', required=True, help='vgg, inceptionv3, densenet121')
parse.add_argument('--bs', type=int, default=32)
parse.add_argument('--phase', required=True, help='train, val')
opt = parse.parse_args()
print(opt)

img_transform = transforms.Compose([
    transforms.Scale(320),
    transforms.CenterCrop(299),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
import torch as t
from torch.utils import data
import os
from PIL import  Image
from torchvision import transforms as T
from torch.utils.data import DataLoader



transform = T.Compose([
    T.Resize(100), # 缩放图片(Image),保持长宽比不变,最短边为224像素
    T.CenterCrop(96), # 从图片中间切出224*224的图片
    T.ToTensor(), # 将图片(Image)转成Tensor,归一化至[0, 1]
     # 标准化至[-1, 1],规定均值和标准差
    T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

class DogCat(data.Dataset):
    def __init__(self, root, transforms=None):
        imgs = os.listdir(root)
        self.imgs = [os.path.join(root, img) for img in imgs]
        self.transforms=transforms
        
    def __getitem__(self, index):
        img_path = self.imgs[index]
        label = 0 if 'dog' in img_path.split('/')[-1] else 1
        data = Image.open(img_path)
        if self.transforms:
            data = self.transforms(data)
        return data, label
    
    def __len__(self):
        return len(self.imgs)

#dataset = DogCat('./data/train/', transforms=transform)
#img, label = dataset[0]
#for img, label in dataset:
#    print(img.size(), label)

class NewDogCat(DogCat): # 继承前面实现的DogCat数据集
    def __getitem__(self, index):
        try:
            # 调用父类的获取函数,即 DogCat.__getitem__(self, index)
            return super(NewDogCat,self).__getitem__(index)
        except:
            return None, None
from torch.utils.data.dataloader import default_collate # 导入默认的拼接方式
def my_collate_fn(batch):
    '''
    batch中每个元素形如(data, label)
    '''
    # 过滤为None的数据
    batch = list(filter(lambda x:x[0] is not None, batch))
    if len(batch) == 0: return t.Tensor()
    return default_collate(batch) # 用默认方式拼接过滤后的batch数据
dataset = NewDogCat('/home/cc/Desktop/dj/chapter6-实战指南/data/train/', transforms=transform)
testset=NewDogCat('/home/cc/Desktop/dj/chapter6-实战指南/data/val/', transforms=transform)
#print(dataset[5])
print('*************')
batch_size = opt.bs
data_folder_train = DataLoader(dataset,shuffle=False,batch_size=batch_size,collate_fn=my_collate_fn,num_workers=4)
data_folder_val = DataLoader(testset,shuffle=False,batch_size=batch_size,collate_fn=my_collate_fn,num_workers=4)
'''
root = '/home/cc/Desktop/dj/chapter6-实战指南/data'
data_folder = {
    'train': ImageFolder(os.path.join(root, 'train'), transform=img_transform),
    'val': ImageFolder(os.path.join(root, 'val'), transform=img_transform)
}
# define dataloader to load images

dataloader = {
    'train':
    DataLoader(
        data_folder['train'],
        batch_size=batch_size,
        shuffle=False,
        num_workers=4),
    'val':
    DataLoader(
        data_folder['val'],
        batch_size=batch_size,
        shuffle=False,
        num_workers=4)
}
'''
# get train data size and validation data size
data_size = {
    'train': len(data_folder_train.dataset),
    'val': len(data_folder_val.dataset)
}

# test if using GPU
use_gpu = torch.cuda.is_available()


def CreateFeature(model, phase, outputPath='.'):
    """
    Create h5py dataset for feature extraction.

    ARGS:
        outputPath    : h5py output path
        model         : used model
        labelList     : list of corresponding groundtruth texts
    """
    featurenet = feature_net(model)
    if use_gpu:
        featurenet.cuda()
    feature_map = torch.FloatTensor()
    label_map = torch.LongTensor()
    for data in tqdm(data_folder_val):
        img, label = data
        if use_gpu:
            img = Variable(img).cuda()
        else:
            img = Variable(img)
        out = featurenet(img)
        feature_map = torch.cat((feature_map, out.cpu().data), 0)
        label_map = torch.cat((label_map, label), 0)
    feature_map = feature_map.numpy()
    label_map = label_map.numpy()
    file_name = '_feature_{}.hd5f'.format(model)
    h5_path = os.path.join(outputPath, phase) + file_name
    with h5py.File(h5_path, 'w') as h:
        h.create_dataset('data', data=feature_map)
        h.create_dataset('label', data=label_map)


CreateFeature(opt.model, opt.phase)

dataset.py

import torch
from torch.utils.data import Dataset
import h5py
import numpy as np
'''
class h5Dataset(Dataset):

    def __init__(self, h5py_list):
        label_file = h5py.File(h5py_list[0], 'r')
        self.label = np.array(label_file['label'][:])
        self.nSamples = len(self.label)
        temp_dataset = np.empty([self.nSamples,0])
        for file in h5py_list:
            h5_file = h5py.File(file, 'r')
            dataset = np.array(h5_file['data'][:])
            temp_dataset = np.concatenate([temp_dataset, dataset], axis = 1)

        self.dataset = temp_dataset

    def __len__(self):
        return self.nSamples

    def __getitem__(self, index):
        assert index < len(self), 'index range error'
        data = self.dataset[index]
        label = self.label[index]
        return (data, label)
'''



class h5Dataset(Dataset):

    def __init__(self, h5py_list):
        label_file = h5py.File(h5py_list[0], 'r')
        self.label = torch.from_numpy(label_file['label'].value)
        self.nSamples = self.label.size(0)
        temp_dataset = torch.FloatTensor()
        for file in h5py_list:
            h5_file = h5py.File(file, 'r')
            dataset = torch.from_numpy(h5_file['data'].value)
            temp_dataset = torch.cat((temp_dataset, dataset), 1)

        self.dataset = temp_dataset

    def __len__(self):
        return self.nSamples

    def __getitem__(self, index):
        assert index < len(self), 'index range error'
        data = self.dataset[index]
        label = self.label[index]
        return (data, label)

feature_train.py

import argparse
import time
import os

import torch
from torch import nn
from torch.autograd import Variable
from torch import optim
from torch.utils.data import DataLoader

from dataset import h5Dataset
from net import classifier
import warnings
warnings.filterwarnings("ignore")
parse = argparse.ArgumentParser()
parse.add_argument(
    '--model',
    nargs='+',
    help='inceptionv3, vgg, densenet121',
    default=['vgg', 'inceptionv3', 'densenet121'])
parse.add_argument('--batch_size', type=int, default=32)
parse.add_argument('--epoch', type=int, default=20)
parse.add_argument('--n_classes', default=2, type=int)
parse.add_argument('--num_workers', type=int, default=8)
opt = parse.parse_args()
print(opt)

root = '/home/cc/Desktop/kaggle_dog_vs_cat/model/'
train_list = ['train_feature_{}.hd5f'.format(i) for i in opt.model]
val_list = ['val_feature_{}.hd5f'.format(i) for i in opt.model]
print(train_list)
dataset = {'train': h5Dataset(train_list), 'val': h5Dataset(val_list)}
print(dataset['train'].dataset.size())
print(dataset['val'].dataset.size())
datasize = {
    'train': dataset['train'].dataset.size(0),
    'val': dataset['val'].dataset.size(0)
}

batch_size = opt.batch_size
epoches = opt.epoch

dataloader = {
    'train':
    DataLoader(
        dataset['train'],
        batch_size=batch_size,
        shuffle=True,
        num_workers=opt.num_workers),
    'val':
    DataLoader(
        dataset['val'],
        batch_size=batch_size,
        shuffle=False,
        num_workers=opt.num_workers)
}

dimension = dataset['train'].dataset.size(1)
print(dimension)
mynet = classifier(dimension, opt.n_classes)
mynet.cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(mynet.parameters(), lr=1e-3)
# train
for epoch in range(epoches):
    print('{}'.format(epoch + 1))
    print('*' * 10)
    print('Train')
    mynet.train()
    since = time.time()

    running_loss = 0.0
    running_acc = 0.0
    for i, data in enumerate(dataloader['train'], 1):
        feature, label = data
        feature = Variable(feature).cuda()
        label = Variable(label).cuda()

        # forward
        out = mynet(feature)
        loss = criterion(out, label)
        # backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * label.size(0)
        _, pred = torch.max(out, 1)
        num_correct = torch.sum(pred == label)
        running_acc += num_correct.item()
        if i % 50 == 0:
            print('Loss: {:.6f}, Acc: {:.6f}'.format(running_loss / (
                i * batch_size), running_acc / (i * batch_size)))

    running_loss /= datasize['train']
    running_acc /= datasize['train']
    eplise_time = time.time() - since
    print('Loss: {:.6f}, Acc: {:.6f}, Time: {:.0f}s'.format(
        running_loss, running_acc, eplise_time))
    print('Validation')
    with torch.no_grad():
        mynet.eval()
        num_correct = 0.0
        eval_loss = 0.0
        for data in dataloader['val']:
            feature, label = data
            feature = Variable(feature).cuda()
            label = Variable(label).cuda()
            # forward
            out = mynet(feature)
            loss = criterion(out, label)

            _, pred = torch.max(out, 1)
            correct = torch.sum(pred == label)
            num_correct += correct.item()
            eval_loss += loss.item() * label.size(0)

    print('Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / datasize['val'],
                                             num_correct / datasize['val']))
print('Finish Training!')

save_path = os.path.join(root, 'model_save')
if not os.path.exists(save_path):
    os.mkdir(save_path)

torch.save(mynet.state_dict(), save_path + '/feature_model.pth')
  • 1
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
# -*- coding: utf-8 -*- import numpy as np import librosa import random def extract_power(y, sr, size=3): """ extract log mel spectrogram feature :param y: the input signal (audio time series) :param sr: sample rate of 'y' :param size: the length (seconds) of random crop from original audio, default as 3 seconds :return: log-mel spectrogram feature """ # normalization y = y.astype(np.float32) normalization_factor = 1 / np.max(np.abs(y)) y = y * normalization_factor # random crop start = random.randint(0, len(y) - size * sr) y = y[start: start + size * sr] # extract log mel spectrogram ##### powerspec = np.abs(librosa.stft(y,n_fft=128, hop_length=1024)) ** 2 #logmelspec = librosa.power_to_db(melspectrogram) return powerspec def extract_logmel(y, sr, size=3): """ extract log mel spectrogram feature :param y: the input signal (audio time series) :param sr: sample rate of 'y' :param size: the length (seconds) of random crop from original audio, default as 3 seconds :return: log-mel spectrogram feature """ # normalization y = y.astype(np.float32) normalization_factor = 1 / np.max(np.abs(y)) y = y * normalization_factor # random crop start = random.randint(0, len(y) - size * sr) y = y[start: start + size * sr] # extract log mel spectrogram ##### melspectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=2048, hop_length=1024, n_mels=90) logmelspec = librosa.power_to_db(melspectrogram) return logmelspec def extract_mfcc(y, sr, size=3): """ extract MFCC feature :param y: np.ndarray [shape=(n,)], real-valued the input signal (audio time series) :param sr: sample rate of 'y' :param size: the length (seconds) of random crop from original audio, default as 3 seconds :return: MFCC feature """ # normalization y = y.astype(np.float32) normalization_factor = 1 / np.max(np.abs(y))

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值