Pytorch DeepLearning-models

1. Perceptron

import numpy as np
import matplotlib.pyplot as plt
import torch
import os

basedir = os.path.dirname(os.path.realpath(__file__))

# np.genfromtxt():Load data from a text file, with missing values handled as specified.
data = np.genfromtxt(basedir+'/data/perceptron_toydata.txt', delimiter='\t')
X, y = data[:, :2], data[:,2]
y = y.astype(np.int)

# np.bincount():Count number of occurrences of each value in array of non-negative ints.
print("Class label counts:", np.bincount(y))
print("X.shape", X.shape)
print("y.shape", y.shape)

# Shuffling & train/test split
shuffle_idx = np.arange(y.shape[0])
# Mersenne Twister伪随机数生成器的容器。
shuffle_rng = np.random.RandomState(123)
shuffle_rng.shuffle(shuffle_idx)
X, y = X[shuffle_idx], y[shuffle_idx]

X_train, X_test = X[shuffle_idx[:70]], X[shuffle_idx[70:]]
y_train, y_test = y[shuffle_idx[:70]], y[shuffle_idx[70:]]

mu, sigma = X_train.mean(axis=0), X_train.std(axis=0)
# Normaize
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma

# plt.scatter(X_train[y_train==0, 0], X_train[y_train==0, 1], label="class 0", marker='o')
# plt.scatter(X_train[y_train==1, 0], X_train[y_train==1, 1], label="class 1", marker='s')
# plt.xlabel("feature 1")
# plt.xlabel("feature 2")
# plt.legend()
# plt.show()

# DEFINING THE PERCEPTRON MODEL
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def custom_where(cond, x_1, x_2):
    if cond == True:
        flg = 1
    else:
        flg = 0
    return (flg * x_1) + ((1-flg) * x_2)

class Perceptron():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1,
                                   dtype=torch.float32, device=device)
        self.bias = torch.zeros(1, dtype=torch.float32, device=device)
        self.predictions = []

    def forward(self, x):
        linear = torch.add(torch.mm(x, self.weights), self.bias)
        # 相当于 linear > 0. ? 1 : 0
        prediction = custom_where(linear > 0., 1, 0)
        return prediction

    # 在向后传递计算梯度时,第一步要调用forward
    def backward(self, x, y):
        predictions = self.forward(x)
        errors = y - predictions
        return errors

    def train(self, x, y, epochs):
        for e in range(epochs):
            # y.size(): torch.Size([n]); y.size()[0]: 3
            for i in range(y.size()[0]):
                # view[1, self.num_features]: 相当于reshape作用;
                # 由于self.weights 与 self.bias是矩阵形式,所以我们必须把x[i]转换成矩阵形式
                errors = self.backward(x[i].view(1, self.num_features), y[i]).view(-1)
                self.weights += (errors*x[i]).view(self.num_features, 1)
                self.bias += errors

    def evaluate(self, x, y):
        score = 0
        for i in range(y.size()[0]):
            # 转成行向量
            prediction_y = self.forward(x[i].view(1, self.num_features))
            self.predictions.append(prediction_y)
            score += custom_where(prediction_y == y[i], 1, 0)
        accuracy = score*1.0 / y.size()[0]

        return accuracy

# TRAINING THE PERCEPTRON
ppn = Perceptron(num_features=2)
X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device)

ppn.train(X_train_tensor, y_train_tensor, epochs=5)

print("Model params: ")
print(" Weights: {}".format(ppn.weights))
print(" bias: {}".format(ppn.bias))

# EVALUATING THE MODEL
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)

test_acc = ppn.evaluate(X_train_tensor, y_test_tensor)
print("Test Set accuracy: {}".format(test_acc*100))

2. Fully Connected

  • FC: WT*x + b -> x[1,4] * WT[4,2] + b在这里插入图片描述
import torch
import torch.nn as nn

# 1*1*2*2
inputs = torch.tensor([[[[1., 2.], [3., 4.]]]])

# 4 input features, 2 outputs
fc = nn.Linear(4, 2)

weights = torch.tensor([[1.1, 1.2, 1.3, 1.4],[1.5, 1.6, 1.7, 1.8]])
bias = torch.tensor([1.9, 2.0])

fc.weight.data = weights
fc.bias.data = bias

# FC: WT*x + b ->  x[1,4] * WT[4,2] + b
# print(torch.relu(fc(inputs.view(-1, 4))))

3. Multilayer Perceptron

import time
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

random_seed = 1
learning_rate = 1e-3
num_epochs = 1
batch_size = 64

# 28*28 = 784
num_features = 784
# 先下采样
num_hidden_1 = 128
# 再上采样
num_hidden_2 = 256
num_classes = 10

train_dataset = datasets.MNIST(root="data", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root="data", train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

for images, labels in train_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

class MultilayerPerceptron(torch.nn.Module):
    def __init__(self, num_features, num_classes):
        super(MultilayerPerceptron, self).__init__()

        self.linear_1 = torch.nn.Linear(num_features, num_hidden_1)
        self.linear_1.weight.detach().normal_(0.0, 0.1)
        self.linear_1.bias.detach().zero_()

        self.linear_2 = torch.nn.Linear(num_hidden_1, num_hidden_2)
        self.linear_2.weight.detach().normal_(0.0, 0.1)
        self.linear_2.bias.detach().zero_()

        self.linear_out = torch.nn.Linear(num_hidden_2, num_classes)
        self.linear_out.weight.detach().normal_(0.0, 0.1)
        self.linear_out.bias.detach().zero_()

    def forward(self, x):
        out = F.relu(self.linear_1(x))
        out = F.relu(self.linear_2(out))
        # logits: 包含batch_size*10个元素的数组
        logits = self.linear_out(out)
        probas = F.log_softmax(logits, dim=1)
        return logits, probas

# Pytotch设置随机数
torch.manual_seed(random_seed)
model = MultilayerPerceptron(num_features=num_features, num_classes=num_classes)

model = model.to(device)

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

def compute_accuracy(net, data_loader):
    net.eval()
    num_examples = 0
    correct_pred = 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.view(-1, 28*28).to(device)

            targets = targets.to(device)
            logits, probas = net(features)
            # dim = 1
            _, predicted_labels = torch.max(probas, 1)
            num_examples = num_examples +  targets.size(0)
            correct_pred += (predicted_labels == targets).sum()

        return correct_pred / num_examples * 100


start_time = time.time()
for epoch in range(num_epochs):
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        features = features.view(-1, 28*28).to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()

        cost.backward()
        optimizer.step()

        if batch_idx % 50 == 0:
            print("Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f"
                  % (num_epochs, epoch+1, batch_idx, len(train_loader), cost))

    # with torch.no_grad():
    with torch.set_grad_enabled(False):
        print("Epoch: {:03d}/{:03d} training accuracy: {:.2f}%".format(
            epoch+1, num_epochs, compute_accuracy(model, train_loader)
        ))

    print("Time elapsed: {:.2f} min".format((time.time()- start_time) / 60))

print("Total Training Time: {:.2f}".format((time.time()- start_time) / 60))

4. Multilayer Perceptron with dropout

    def forward(self, x):
        out = F.relu(self.linear_1(x))
        # During training, randomly zeroes some of the elements of the input
        # tensor with probability :attr:`p` using samples from a Bernoulli
        # distribution.
        # Dropout说的简单一点就是:我们在前向传播的时候,让某个神经元的激活值以一定的概率p停止工作
        out = F.dropout(out, p=dropout_prob, training=True)
        out = F.relu(self.linear_2(out))
        out = F.dropout(out, p=dropout_prob, training=True)
        logits = self.linear_out(out)
        probas = F.log_softmax(logits, dim=1)
        return logits, probas

5. GNN with Gaussian model

from scipy.spatial.distance import cdist
from torch.utils.data.dataset import Subset
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt

# Device
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.05
NUM_EPOCHS = 2
BATCH_SIZE = 128
IMG_SIZE = 28

# Architecture
NUM_CLASSES = 10

train_indices = torch.arange(0, 59000)
valid_indices = torch.arange(59000, 60000)

custom_transform = transforms.Compose([transforms.ToTensor()])

train_and_valid = datasets.MNIST(root='data',train=True, transform=custom_transform,download=True)

test_dataset = datasets.MNIST(root='data',train=False,transform=custom_transform,download=True)

train_dataset = Subset(train_and_valid, train_indices)
valid_dataset = Subset(train_and_valid, valid_indices)

train_loader = DataLoader(dataset=train_dataset,batch_size=BATCH_SIZE,num_workers=4,shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset,batch_size=BATCH_SIZE,num_workers=4,shuffle=False)

test_loader = DataLoader(dataset=test_dataset,batch_size=BATCH_SIZE,num_workers=4,shuffle=False)

def precompute_adj_matrix(img_size):
    # col:28*28, row:28*28
    # col = [[0 1 2 3 ... n], [0 1 2 3 ... n], ... [0 1 2 3 ... n]]
    # row = [[0 0 0 0 ... 0], [1 1 1 ... n], [n n n ... n]]
    col, row = np.meshgrid(np.arange(img_size), np.arange(img_size))
    # N = img_size^2
    # construct 2D coordinate array (shape N * 2) and normalize in range [0, 1]

    # >>> arrays = [np.random.randn(3, 4) for _ in range(10)]
    # >>> np.stack(arrays, axis=0).shape
    # (10, 3, 4)
    #
    # >>> np.stack(arrays, axis=1).shape
    # (3, 10, 4)
    #
    # >>> np.stack(arrays, axis=2).shape
    # (3, 4, 10)


    #
    coord_without_norm = coord = np.stack((col, row), axis=2).reshape(-1, 2)
    # coord: 784*2
    coord = np.stack((col, row), axis=2).reshape(-1, 2) / img_size

    # compute pairwise distance matrix (N x N)

    # Find the Euclidean distances between four 2-D coordinates:
    #
    #     >>> from scipy.spatial import distance
    #     >>> coords = [(35.0456, -85.2672),
    #     ...           (35.1174, -89.9711),
    #     ...           (35.9728, -83.9422),
    #     ...           (36.1667, -86.7833)]
    #     >>> distance.cdist(coords, coords, 'euclidean')
    #     array([[ 0.    ,  4.7044,  1.6172,  1.8856],
    #            [ 4.7044,  0.    ,  6.0893,  3.3561],
    #            [ 1.6172,  6.0893,  0.    ,  2.8477],
    #            [ 1.8856,  3.3561,  2.8477,  0.    ]])

    # dist: 784*784
    dist = cdist(coord, coord, metric="euclidean")
    # Apply Gaussian filter
    sigma = 0.05*np.pi
    A = np.exp(- dist / sigma**2)
    A[A < 0.01] = 0
    # A:784*784
    A = torch.from_numpy(A).float()

    # Normalization as per
    # D: 784
    D = A.sum(1)  # nodes degree (N,)
    D_hat = (D + 1e-5) ** (-0.5)
    A_hat = D_hat.view(-1, 1) * A * D_hat.view(1, -1)  # N,N

    return A_hat

class GraphNet(nn.Module):
    def __init__(self, img_size=28, num_classes=10):
        super(GraphNet, self).__init__()

        n_rows = img_size**2
        self.fc = nn.Linear(n_rows, num_classes, bias=False)
        A = precompute_adj_matrix(img_size)
        self.register_buffer('A', A)

    def forward(self, x):
        B = x.size(0)
        # Reshape Adjacency Matrix
        # [N, N] Adj. matrix -> [1, N, N] Adj tensor where N = H*W
        A_tensor = self.A.unsqueeze(0)
        # [1, N, N] Adj tensor -> [B, N, N] tensor
        A_tensor = self.A.expand(B, -1, -1)

        # Reshape inputs
        # [B, C, H, W] => [B, H*W, 1]
        xShape_before_view = x.shape
        x_reshape = x.view(B, -1, 1)
        xShape_after_view = x_reshape
        # bmm = batch matrix product to sum the neighbor features
        # Input: [B, N, N] * [B, N, 1]
        # Output: [B, N]
        avg_neighbor_features = (torch.bmm(A_tensor, x_reshape).view(B, -1))

        logits = self.fc(avg_neighbor_features)
        probas = F.softmax(logits, dim=1)
        return logits, probas

torch.manual_seed(RANDOM_SEED)
model = GraphNet(img_size=IMG_SIZE, num_classes=NUM_CLASSES)
model = model.to(DEVICE)

optimizer = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)


def compute_acc(model, data_loader, device):
    correct_pred, num_examples = 0, 0
    for features, targets in data_loader:
        features = features.to(device)
        targets = targets.to(device)
        logits, probas = model(features)
        _, predicted_labels = torch.max(probas, 1)
        num_examples += targets.size(0)
        correct_pred += (predicted_labels == targets).sum()
    return correct_pred.float() / num_examples * 100


start_time = time.time()

cost_list = []
train_acc_list, valid_acc_list = [], []

for epoch in range(NUM_EPOCHS):

    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):

        features = features.to(DEVICE)
        targets = targets.to(DEVICE)

        ### FORWARD AND BACK PROP
        logits, probas = model(features)
        cost = F.cross_entropy(logits, targets)
        optimizer.zero_grad()

        cost.backward()

        ### UPDATE MODEL PARAMETERS
        optimizer.step()

        #################################################
        ### CODE ONLY FOR LOGGING BEYOND THIS POINT
        ################################################
        cost_list.append(cost.item())
        if not batch_idx % 150:
            print(f'Epoch: {epoch + 1:03d}/{NUM_EPOCHS:03d} | '
                  f'Batch {batch_idx:03d}/{len(train_loader):03d} |'
                  f' Cost: {cost:.4f}')

    model.eval()
    with torch.set_grad_enabled(False):  # save memory during inference

        train_acc = compute_acc(model, train_loader, device=DEVICE)
        valid_acc = compute_acc(model, valid_loader, device=DEVICE)

        print(f'Epoch: {epoch + 1:03d}/{NUM_EPOCHS:03d}\n'
              f'Train ACC: {train_acc:.2f} | Validation ACC: {valid_acc:.2f}')

        train_acc_list.append(train_acc)
        valid_acc_list.append(valid_acc)

    elapsed = (time.time() - start_time) / 60
    print(f'Time elapsed: {elapsed:.2f} min')

elapsed = (time.time() - start_time) / 60
print(f'Total Training Time: {elapsed:.2f} min')

plt.plot(cost_list, label='Minibatch cost')
plt.plot(np.convolve(cost_list,
                     np.ones(200,)/200, mode='valid'),
         label='Running average')

plt.ylabel('Cross Entropy')
plt.xlabel('Iteration')
plt.legend()
plt.show()

6. Multi-layer Perceptron

  • 数学的线性变化WX 在代码实现层面上 是X * WT
import time
import numpy as np
from torchvision import datasets
from torchvision import transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch
import matplotlib.pyplot as plt
import pandas as pd


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

RANDOM_SEED = 1
BATCH_SIZE = 100
NUM_EPOCHS = 1

# random_seed = 1
# learning_rate = 1e-3
# num_epochs = 1
# batch_size = 64
# dropout_prob = 0.5
#
# num_features = 784
# num_hidden_1 = 128
# num_hidden_2 = 256
num_classes = 10

train_dataset = datasets.MNIST(root="data", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root="data", train=False, transform=transforms.ToTensor(), download=True)

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)

for images, labels in train_loader:
    print('Image batch dimensions:', images.shape)
    print('Image label dimensions:', labels.shape)
    break

class MultilayerPerceptron(torch.nn.Module):
    def __init__(self, num_features, num_hidden, num_classes):
        super(MultilayerPerceptron, self).__init__()
        self.num_classes = num_classes
        self.weight_1 = torch.zeros(num_hidden, num_features, dtype=torch.float).normal_(0.0, 0.1)
        self.bias_1 = torch.zeros(num_hidden, dtype=torch.float)
        self.weight_o = torch.zeros(self.num_classes, num_hidden,
                                    dtype=torch.float).normal_(0.0, 0.1)
        self.bias_o = torch.zeros(self.num_classes, dtype=torch.float)

    def forward(self, x):
        # input dim:   [ n_examples, n_features] dot [n_hidden, n_features].T
        # output dim: [n_examples, n_hidden]
        # 转置矩阵:torch.tensor().t()
        z_1 = torch.mm(x, self.weight_1.t() + self.bias_1)
        a_1 = torch.sigmoid(z_1)
        # input dim:   [ n_examples, n_hidden] dot [num_classes, num_hidden].T
        # output dim: [n_examples, num_classes]
        z_2 = torch.mm(a_1, self.weight_o.t()) + self.bias_o
        a_2 = torch.sigmoid(z_2)
        return a_1, a_2

    def backward(self, x, a_1, a_2, y):
        # onehot encoding
        y_onehot = torch.FloatTensor(y.size(0), self.num_classes)
        y_onehot.zero_()
        y_onehot.scatter_(1, y.view(-1, 1).long(), 1)

        # Part 1: dLoss/dOutWeights
        ## = dLoss/dOutAct * dOutAct/dOutNet * dOutNet/dOutWeight
        ## where DeltaOut = dLoss/dOutAct * dOutAct/dOutNet
        ## for convenient re-use

        # input/output dim: [n_examples, n_classes]
        dloss_da2 = 2. * (a_2 - y_onehot) / y.size(0)  # 平均损失
        # input/output dim: [n_examples, n_classes]
        da2_dz2 = a_2 * (1. - a_2)   # sigmoid derivative S'(x) = S(x)(1-S(x))
        # output dim: [n_examples, n_classes]
        delta_out = dloss_da2 * da2_dz2  # dL_dz2

        # gradient for output weights
        dz2__dw_out = a_1
        # output dim: [n_classlabels, n_hidden]
        dloss_dw_out = torch.mm(delta_out.t(), dz2__dw_out)
        dloss_db_out = torch.sum(delta_out, dim=0)  # dL_dz2

        # Part 2: dLoss/dHiddenWeights
        # [n_classes, n_hidden]
        dz2_da1 = self.weight_o
        # output dim: [n_examples, n_hidden]
        dloss_da1 = torch.mm(delta_out, dz2_da1)   # dL_dz2 *  weight_o
        # [n_examples, n_hidden]
        da1__dz1 = a_1 * (1. - a_1)   # sigmoid derivative
        # [n_examples, n_features]
        dz1_dw1 = x
        # output dim: [n_hidden, n_features]
        dloss_dw1 = torch.mm(dloss_da1 * da1__dz1.t() + dz1_dw1)
        dloss_db1 = torch.sum((dloss_da1 * da1__dz1), dim=0)

        return dloss_dw_out, dloss_db_out, dloss_dw1, dloss_db1

def to_onehot(y, num_classs):
    # t 是 targets
    y_onehot = torch.FloatTensor(y.size(0), num_classes)
    y_onehot.zero_()
    # torch.scatter(input, dim, index, src)
    y_onehot.scatter_(1, y.view(-1, 1).long(), 1).float()
    return y_onehot

def loss_func(targets_onehot, probas_onehot):
    return torch.mean(torch.mean((targets_onehot - probas_onehot)**2, dim=0))

def compute_mse(net, data_loader):
    curr_mse, num_examples = torch.zeros(model.num_classes).float(), 0
    with torch.no_grad():
        for features, targets in data_loader:
            features = features.view(-1, 28*28)
            logits, probas = net.forward(features)
            y_onehot = to_onehot(targets, model.num_classes)
            loss = torch.sum((y_onehot - probas)**2, dim=0)
            num_examples += targets.size(0)
            curr_mse += loss

        curr_mse = torch.mean(curr_mse / num_examples, dim = 0)
        return curr_mse

def train(model, data_loader, num_epochs, learning_rate=0.1):
    minibatch_cost = []
    epoch_cost = []
    for e in range(num_epochs):
        for batch_idx, (features, targets) in enumerate(train_loader):
            features = features.view(-1, 28*28)
            #### Compute outputs ####
            a_1, a_2 = model.forward(features)

            #### Compute gradients ####
            dloss__dw_out, dloss__db_out, dloss_dw1, dloss_db1 = \
                model.backward(features, a_1, a_2, targets)

            #### Update weights ####
            model.weight_1 -= learning_rate * dloss_dw1
            model.bias_1 -= learning_rate * dloss_db1
            model.weight_o -= learning_rate * dloss__dw_out
            model.bias_o -= learning_rate * dloss__db_out

            #### Logging ####
            curr_cost = loss_func(to_onehot(targets, model.num_classes), a_2)
            minibatch_cost.append(curr_cost)
            if not batch_idx % 50:
                print('Epoch: %03d/%03d | Batch %03d/%03d | Cost: %.4f'
                      % (e + 1, NUM_EPOCHS, batch_idx,
                         len(train_loader), curr_cost))

            #### Logging ####
        curr_cost = compute_mse(model, train_loader)
        epoch_cost.append(curr_cost)
        print('Epoch: %03d/%03d |' % (e + 1, NUM_EPOCHS), end="")
        print(' Train MSE: %.5f' % curr_cost)

    return minibatch_cost, epoch_cost


torch.manual_seed(RANDOM_SEED)
model = MultilayerPerceptron(num_features=28*28,
                             num_hidden=50,
                             num_classes=10)

minibatch_cost, epoch_cost = train(model,
                                   train_loader,
                                   num_epochs=NUM_EPOCHS,
                                   learning_rate=0.1)




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值