图像算法面经2

最新推荐文章于 2024-05-08 21:00:58 发布

WYXHAHAHA123

最新推荐文章于 2024-05-08 21:00:58 发布

阅读量565

点赞数

分类专栏： deep learning

本文链接：https://blog.csdn.net/WYXHAHAHA123/article/details/100677182

版权

deep learning 专栏收录该内容

20 篇文章 0 订阅

订阅专栏

CNN中pooling层的作用：对特征图进行下采样，增大特征图中每个像素点的感受野，保持图像的旋转不变性、平移不变性和尺度不变性。

logistics regression 逻辑回归的前向传播和反向传播公式推导：

# 机器学习中通常需要对分类的预测结果进行评估
# 假设所使用的是二分类，评价指标通常包括 precision recall 和 F1 score
# P=TP/(TP+FP)  查准率
# R=TP/(TP+FN)  查全率
# F1_score=2*P*R/(P+R)

# 题目要求：能够计算出在各个阈值下的F1 score，并能够找到使得F1 score最大的阈值以及对应的F1 score

'''
首先使用sklearn中的库计算F1_score
'''
import numpy as np
from sklearn.metrics import f1_score

'''
用代码实现计算在某个阈值下的F1 score
'''
def f1_version2(y_true,y_pred,threshold):
    '''
    计算在threshold阈值下数组的F1 score
    :param y_true:
    :param y_pred:
    :param threshold:
    :return:
    '''
    y_pred=y_pred>=threshold
    TP=0
    FP=0
    TN=0
    FN=0
    for i in range(y_pred.shape[0]):
        if y_pred[i]==1:
            if y_true[i]==1:
                TP+=1
            else:
                FP+=1
        else:
            if y_true[i]==1:
                FN+=1
            else:
                TN+=1
    if TP+FP==0:
        P=TP
    else:
        P=TP/(TP+FP)

    if TP+FN==0:
        R=TP
    else:
        R=TP/(TP+FN)

    if P+R==0:
        F1=2*P*R
    else:
        F1=2*P*R/(P+R)

    # print('precision',P)
    # 
    # print('recall',R)

    return  F1

def f1_enhance(y_true,y_pred,threshold):
    '''
    此函数计算F1 score，为加强版本，并不需要遍历每个样本统计
    :param y_true:
    :param y_pred:
    :param threshold:
    :return:
    '''
    index=np.argsort(y_pred)
    # numpy.argsort 返回的是数组从小到大的索引

    y_true=y_true[index]
    y_pred=y_pred[index]

    mid_index=np.where(y_pred>=threshold)

    TP=np.sum(y_true[mid_index[0][0]:])
    FP=y_pred.shape[0]-mid_index[0][0]-TP
    FN=np.sum(y_true[0:mid_index[0][0]])

    if TP+FP==0:
        P=TP
    else:
        P=TP/(TP+FP)

    if TP+FN==0:
        R=TP
    else:
        R=TP/(TP+FN)

    if P+R==0:
        return 2*P*R
    else:
        return 2*P*R/(P+R)

if __name__=="__main__":
    y_true = np.asarray([1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0])
    y_pred = np.asarray([0.6, 0.5, 0.7, 0.2, 0.1, 0.3, 0.4, 0.12, 0.64, 0.8, 0.9])

    # f1_version1 = f1_score(y_true, y_pred >= 0.5)

    # print("调用sklearn库函数的结果",f1_version1)

    thresh_array=np.sort(y_pred)[::-1]

    # thresh_array=np.hstack((np.expand_dims(y_pred,axis=0),np.asarray([[np.max(y_pred)+1]])))
    # thresh_array=np.squeeze(thresh_array,axis=0)

    max_thresh=0
    max_f1=0

    for i in range(thresh_array.shape[0]):
        temp_f1=f1_enhance(y_true,y_pred,thresh_array[i])
        if temp_f1>max_f1:
            max_f1=temp_f1
            max_thresh=thresh_array[i]

    print(max_thresh,max_f1)
    # 0.2 0.6153846153846153

    # print(f1_version2(y_true,y_pred,threshold=0.5))
    #
    # print('加速版本的f1',f1_enhance(y_true,y_pred,threshold=0.5))

import torch
import torch.nn as nn
import torch.nn.functional as F

'''
1.假设对特征图进行self attention(类似于NLP中的attention)



2.SE network中的SE block结构


'''

if __name__=="__main__":
    # 1.self attention
    query=torch.rand(4,3,7,7)  # batch size=4，对于7*7的特征图，通道数为3
    key=torch.rand(4,3,7,7)
    value=torch.rand(4,3,7,7)

    query=query.view(4,3,-1)
    key=key.view(4,3,-1)
    value=value.view(4,3,-1)

    attn=torch.bmm(query,key.permute(0,2,1))

    print('attn',attn.shape)# 4,49,49

    attn=F.softmax(attn,dim=2)

    output=torch.bmm(attn,value)

    print(output.shape)

    # 2.实现SE block
    '''
    对于当前的输入特征图[N,C,H,W],在spatial dimension上进行global average pooling
    '''
    input_feat=torch.rand(4,3,7,7)

    input_feat=F.avg_pool2d(input_feat,(7,7))