用c语言写的transformer速度快

这段代码展示了如何用C语言实现Transformer模型的核心部分,包括多头注意力机制、前馈神经网络和残差连接。输入序列经过编码器和解码器的处理,应用了位置编码、矩阵乘法、加法、激活函数等操作。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
 #define MAX_LENGTH 100
 typedef struct {
    int embedding_size;
    int num_heads;
    int seq_length;
    int num_layers;
    float* embeddings;
    float* encoder_weights;
    float* encoder_biases;
    float* decoder_weights;
    float* decoder_biases;
    float* output_weights;
    float* output_biases;
} Transformer;
 int get_index(int i, int j, int k, int d1, int d2) {
    return i * d1 * d2 + j * d2 + k;
}
 float sigmoid(float x) {
    return 1 / (1 + exp(-x));
}
 void softmax(float* arr, int d) {
    float max_val = arr[0];
    for (int i = 1; i < d; i++) {
        if (arr[i] > max_val) {
            max_val = arr[i];
        }
    }
     float sum = 0;
    for (int i = 0; i < d; i++) {
        sum += exp(arr[i] - max_val);
    }
     for (int i = 0; i < d; i++) {
        arr[i] = exp(arr[i] - max_val) / sum;
    }
}
 float* matmul(float* a, float* b, int d1, int d2, int d3) {
    float* result = malloc(d1 * d3 * sizeof(float));
     for (int i = 0; i < d1; i++) {
        for (int j = 0; j < d3; j++) {
            float sum = 0;
            for (int k = 0; k < d2; k++) {
                sum += a[get_index(i, k, 0, d1, d2)] * b[get_index(k, j, 0, d2, d3)];
            }
            result[get_index(i, j, 0, d1, d3)] = sum;
        }
    }
     return result;
}
 float* add(float* a, float* b, int d) {
    float* result = malloc(d * sizeof(float));
     for (int i = 0; i < d; i++) {
        result[i] = a[i] + b[i];
    }
     return result;
}
 float* multiply(float* a, float b, int d) {
    float* result = malloc(d * sizeof(float));
     for (int i = 0; i < d; i++) {
        result[i] = a[i] * b;
    }
     return result;
}
 float* positional_encoding(int length, int embedding_size) {
    float* result = malloc(length * embedding_size * sizeof(float));
     for (int i = 0; i < length; i++) {
        for (int j = 0; j < embedding_size; j++) {
            if (j % 2 == 0) {
                result[get_index(i, j, 0, length, embedding_size)] = sin(i / pow(10000, j / embedding_size));
            } else {
                result[get_index(i, j, 0, length, embedding_size)] = cos(i / pow(10000, (j - 1) / embedding_size));
            }
        }
    }
     return result;
}
 float* transformer_block(float* input, int seq_length, float* encoder_weights, float* encoder_biases, float* decoder_weights, float* decoder_biases, float* output_weights, float* output_biases, int num_heads, int embedding_size) {
    // Multi-Head Attention
    float* q_weights = encoder_weights;
    float* k_weights = encoder_weights + num_heads * embedding_size * embedding_size;
    float* v_weights = encoder_weights + 2 * num_heads * embedding_size * embedding_size;
    float* q_biases = encoder_biases;
    float* k_biases = encoder_biases + num_heads * embedding_size;
    float* v_biases = encoder_biases + 2 * num_heads * embedding_size;
    float* q = matmul(input, q_weights, seq_length, embedding_size, embedding_size);
    q = add(q, q_biases, seq_length * num_heads);
    float* k = matmul(input, k_weights, seq_length, embedding_size, embedding_size);
    k = add(k, k_biases, seq_length * num_heads);
    float* v = matmul(input, v_weights, seq_length, embedding_size, embedding_size);
    v = add(v, v_biases, seq_length * num_heads);
     float* attention_scores = malloc(seq_length * seq_length * sizeof(float));
    for (int i = 0; i < seq_length; i++) {
        for (int j = 0; j < seq_length; j++) {
            float* q_row = q + i * embedding_size;
            float* k_col = k + j * embedding_size;
            float score = 0;
            for (int h = 0; h < num_heads; h++) {
                float* q_h = q_row + h * embedding_size;
                float* k_h = k_col + h * embedding_size;
                float* v_h = v + h * seq_length * embedding_size;
                float dot_product = 0;
                for (int d = 0; d < embedding_size; d++) {
                    dot_product += q_h[d] * k_h[d];
                }
                float attention_score = sigmoid(dot_product);
                score += attention_score * v_h[get_index(0, j, h, 1, embedding_size)];
            }
            attention_scores[get_index(i, j, 0, seq_length, seq_length)] = score;
        }
    }
     for (int i = 0; i < seq_length; i++) {
        softmax(attention_scores + i * seq_length, seq_length);
    }
     float* attention_output = matmul(attention_scores, v, seq_length, seq_length, embedding_size * num_heads);
     // Add & Norm
    float* attention_norm_weights = encoder_weights + 3 * num_heads * embedding_size * embedding_size;
    float* attention_norm_biases = encoder_biases + 3 * num_heads * embedding_size;
    float* attention_norm_input = add(input, attention_output, seq_length * embedding_size);
    float* attention_norm_output = multiply(attention_norm_input, sqrt(1.0 / embedding_size), seq_length * embedding_size);
    attention_norm_output = matmul(attention_norm_output, attention_norm_weights, seq_length, embedding_size, embedding_size);
    attention_norm_output = add(attention_norm_output, attention_norm_biases, seq_length * embedding_size);
     // Feed Forward
    float* ff_weights_1 = encoder_weights + 4 * num_heads * embedding_size * embedding_size;
    float* ff_biases_1 = encoder_biases + 4 * num_heads * embedding_size;
    float* ff_weights_2 = encoder_weights + 5 * num_heads * embedding_size * embedding_size;
    float* ff_biases_2 = encoder_biases + 5 * num_heads * embedding_size;
    float* ff_output = matmul(attention_norm_output, ff_weights_1, seq_length, embedding_size, embedding_size);
    ff_output = add(ff_output, ff_biases_1, seq_length * embedding_size);
    for (int i = 0; i < seq_length * embedding_size; i++) {
        ff_output[i] = fmax(ff_output[i], 0);
    }
    ff_output = matmul(ff_output, ff_weights_2, seq_length, embedding_size, embedding_size);
    ff_output = add(ff_output, ff_biases_2, seq_length * embedding_size);
     // Add & Norm
    float* ff_norm_weights = encoder_weights + 6 * num_heads * embedding_size * embedding_size;
    float* ff_norm_biases = encoder_biases + 6 * num_heads * embedding_size;
    float* ff_norm_input = add(attention_norm_output, ff_output, seq_length * embedding_size);
    float* ff_norm_output = multiply(ff_norm_input, sqrt(1.0 / embedding_size), seq_length * embedding_size);
    ff_norm_output = matmul(ff_norm_output, ff_norm_weights, seq_length, embedding_size, embedding_size);
    ff_norm_output = add(ff_norm_output, ff_norm_biases, seq_length * embedding_size);
     // Output Projection
    float* output = matmul(ff_norm_output, output_weights, seq_length, embedding_size, embedding_size);
    output = add(output, output_biases, seq_length);
     return output;
}
 Transformer* create_transformer(int embedding_size, int num_heads, int seq_length, int num_layers) {
    Transformer* transformer = malloc(sizeof(Transformer));
    transformer->embedding_size = embedding_size;
    transformer->num_heads = num_heads;
    transformer->seq_length = seq_length;
    transformer->num_layers = num_layers;
     transformer->embeddings = positional_encoding(seq_length, embedding_size);
    transformer->encoder_weights = malloc(num_layers * 7 * num_heads * embedding_size * embedding_size * sizeof(float));
    transformer->encoder_biases = malloc(num_layers * 7 * num_heads * embedding_size * sizeof(float));
    transformer->decoder_weights = malloc(num_layers * 7 * num_heads * embedding_size * embedding_size * sizeof(float));
    transformer->decoder_biases = malloc(num_layers * 7 * num_heads * embedding_size * sizeof(float));
    transformer->output_weights = malloc(embedding_size * seq_length * sizeof(float));
    transformer->output_biases = malloc(seq_length * sizeof(float));
     for (int i = 0; i < num_layers; i++) {
        float* weights = transformer->encoder_weights + i * 7 * num_heads * embedding_size * embedding_size;
        float* biases = transformer->encoder_biases + i * 7 * num_heads * embedding_size;
        for (int j = 0; j < 7; j++) {
            for (int h = 0; h < num_heads; h++) {
                for (int k = 0; k < embedding_size * embedding_size; k++) {
                    weights[get_index(j, h, k, 7, num_heads * embedding_size * embedding_size)] = (float) rand() / RAND_MAX;
                }
                for (int k = 0; k < num_heads * embedding_size; k++) {
                    biases[get_index(j, h, k, 7, num_heads * embedding_size)] = (float) rand() / RAND_MAX;
                }
            }
        }
    }
     for (int i = 0; i < num_layers; i++) {
        float* weights = transformer->decoder_weights + i * 7 * num_heads * embedding_size * embedding_size;
        float* biases = transformer->decoder_biases + i * 7 * num_heads * embedding_size;
        for (int j = 0; j < 7; j++) {
            for (int h = 0; h < num_heads; h++) {
                for (int k = 0; k < embedding_size * embedding_size; k++) {
                    weights[get_index(j, h, k, 7, num_heads * embedding_size * embedding_size)] = (float) rand() / RAND_MAX;
                }
                for (int k = 0; k < num_heads * embedding_size; k++) {
                    biases[get_index(j, h, k, 7, num_heads * embedding_size)] = (float) rand() / RAND_MAX;
                }
            }
        }
    }
     for (int i = 0; i < seq_length; i++) {
        for (int j = 0; j < embedding_size; j++) {
            transformer->output_weights[get_index(j, i, 0, embedding_size, seq_length)] = (float) rand() / RAND_MAX;
        }
        transformer->output_biases[i] = (float) rand() / RAND_MAX;
    }
     return transformer;
}
 float* transformer_forward(Transformer* transformer, float* input) {
    float* output = input;
     for (int i = 0; i < transformer->num_layers; i++) {
        float* encoder_weights = transformer->encoder_weights + i * 7 * transformer->num_heads * transformer->embedding_size * transformer->embedding_size;
        float* encoder_biases = transformer->encoder_biases + i * 7 * transformer->num_heads * transformer->embedding_size;
        float* decoder_weights = transformer->decoder_weights + i * 7 * transformer->num_heads * transformer->embedding_size * transformer->embedding_size;
        float* decoder_biases = transformer->decoder_biases + i * 7 * transformer->num_heads * transformer->embedding_size;
        float* output_weights = transformer->output_weights;
        float* output_biases = transformer->output_biases;
         output = transformer_block(output, transformer->seq_length, encoder_weights, encoder_biases, decoder_weights, decoder_biases, output_weights, output_biases, transformer->num_heads, transformer->embedding_size);
    }
     return output;
}
 int main() {
    int embedding_size = 8;
    int num_heads = 4;
    int seq_length = 5;
    int num_layers = 2;
     Transformer* transformer = create_transformer(embedding_size, num_heads, seq_length, num_layers);
     float* input = malloc(seq_length * embedding_size * sizeof(float));
    for (int i = 0; i < seq_length * embedding_size; i++) {
        input[i] = (float) rand() / RAND_MAX;
    }
     float* output = transformer_forward(transformer, input);
     for (int i = 0; i < seq_length; i++) {
        printf("Output %d: ", i);
        for (int j = 0; j < embedding_size; j++) {
            printf("%.2f ", output[get_index(i, j, 0, seq_length, embedding_size)]);
        }
        printf("\n");
    }
     return 0;
}


 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值