代码目录
- InitMatrix.c
#include "InitMatrix.h"
const float PI = 3.1415926;
const float E = 2.7182818;
void InitMatrixA(float * matrix, int m, int p)
{
for (int i = 0; i < m; i++)
for (int j = 0; j < p; j++)
matrix[i * p + j] = i * PI + j * E;
}
void InitMatrixB(float * matrix, int p, int n)
{
for (int i = 0; i < p; i++)
for (int j = 0; j < n; j++)
matrix[i * n + j] = 2.0 * i * E - j * PI;
}
- InitMatrix.h
void InitMatrixA(float * matrix, int m, int p);
void InitMatrixB(float * matrix, int p, int n);
- main.c
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <sys/time.h>
#include "InitMatrix.h"
#include "MatrixMultiply.h"
#include "PrintMatrix.h"
#define M 512
#define P 512
#define N 512
int main(int argc, char * argv [])
{
float *MatrixA = (float *)malloc(M * P * sizeof(float));
float *MatrixB = (float *)malloc(P * N * sizeof(float));
float *MatrixC = (float *)malloc(M * N * sizeof(float));
struct timeval start, end;
InitMatrixA(MatrixA, M, P);
InitMatrixB(MatrixB, P, N);
gettimeofday(&start, NULL);
MatrixMultiply(MatrixA, MatrixB, MatrixC, M, P, N);
gettimeofday(&end, NULL);
long totaltime = 1000000 * (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec);
PrintMatrix(MatrixC, M, N);
printf("Total used time is %lf s\n", (double)totaltime / 1000000);
free(MatrixA);
free(MatrixB);
free(MatrixC);
return 0;
}
- Makefile
CXX = gcc
TARGET = gemm
SRC = $(wildcard *.c)
OBJ = $(patsubst %.c, %.o, $(SRC))
$(TARGET): $(OBJ)
$(CXX) -o $@ $^
%.o: %.c
$(CXX) -c $< -o $@
PHONY: clean
clean:
rm -f *.o $(TARGET)
- MatrixMultiply.c
#include "MatrixMultiply.h"
void MatrixMultiply(float * matrixA, float * matrixB, float * matrixC, int m, int p, int n)
{
for (int i = 0; i < m; i++)
for(int j = 0; j < n; j++)
for (int k = 0; k < p; k++)
matrixC[i * n + j] += matrixA[i * p + k] * matrixB[k * n + j];
}
- MatrixMultiply.h
void MatrixMultiply(float * matrixA, float * matrixB, float * matrixC, int m, int p, int n);
- PrintMatrix.c
#include "PrintMatrix.h"
void PrintMatrix(float *matrix, int m, int n)
{
FILE *pf = fopen("ResultC.dat", "w");
fprintf(pf, "%.d\n", m * n);
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
fprintf(pf, "%.10f\n", matrix[i * n + j]);
fclose(pf);
}
- PrintMatrix.h
#include <stdio.h>
void PrintMatrix(float * matrix, int m, int n);
- yhrun.sh
#!/bin/bash
yhrun -p thcp1 -N 1 -n 1 ./gemm
运行结果:
Dimension | Time |
512 | 1.985703 s |
1024 | 72.536384 s |