一个同步到 Lapack 算法的 Householder 变换来三对角化一个对称矩阵的实现

Householder Transformation:

任意 U(i) 是一个 n 维实数向量(或者复数向量),

P(i) = I - U(i) * U(i)' / H(i)

其中:   Hi = (1/2) * Ui' * Ui

Hi = = H(i)等;

—————————————————————

变换矩阵:

Q = P(n-3)*P(n-2)*...*P(1)*P(0)

T = Q * A * Q'

A = inv(Q) * A * inv(Q')

_____________________________________

在计算如下值得过程中,

Pi = I - Ui * Ui' / Hi

Hi = (1/2) * Ui' * Ui

,为了实现数值稳定,Hi作为一个分母,这里在选取 U[t-1] 的时候,最大化了U[t-1]的绝对值:

通过:

U[t - 1] += get_sign(U[t - 1]) * sqrt(delta);

tridiagonal_symmetirc_matrix.cpp

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <cuComplex.h>

#define NA 5

#define PRINT_ALL 1

void print_matrix(float *A, int M, int N, int lda);
void print_vector(float *A, int len);
void cpu_gemm(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha);    // C = alpha * A*B
void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'

void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq);
int symmetric_matrix_test();
int hermite_matrix_test();
void complex_gemm(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha);
void complex_gemm_NH(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha);
float get_sign(float aij);

union np_f_i
{
  float f;
  int i;
};
typedef union np_f_i np_f_i;

float get_sign(float aij)
{
  np_f_i x;
  x.f = aij;
  return (x.i >> (sizeof(float)*8 - 1)) ? -1.0 : 1.0;
}

void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq)
{
  for (int i = 0; i < n - 2; i++) // i = 0 ~ 3;
  {
    int t;

    float H;
    float delta;

    float U[NA];
    float P[NA * NA];
    int ldp = n;

    t = n - i - 1;

    // 01. fill the first U
    memset(U, 0x00, NA * sizeof(float));
    for (int idx = 0; idx < t; idx++)
    { // U(0:t-1) = A(t, 0:t-1)
      U[idx] = A[t + idx * lda];
    }
    // void print_vector(float *A, int len)
    printf("U(%d) first =\n", i);
    print_vector(U, n);

    // 02. calculat delta:
    delta = 0.0f;
    for (int idx = 0; idx < t; idx++)
    {
      delta += U[idx] * U[idx];
    }
    printf("delta(%d)=%7.4f\n", i, delta);
    // 03. fill the second U
    U[t - 1] += get_sign(U[t - 1]) * sqrt(delta); // for numerical stable;
    printf("U(%d) second =\n", i);
    print_vector(U, n);

    // 04. calculate H, a scalar
    H = 0.0f;
    for (int idx = 0; idx < t; idx++)
    {
      H += U[idx] * U[idx];
    }
    H *= 0.5f;
    printf("H_i(%d) = %7.4f\n", i, H);

    // 05. calculate P = -U*U'/H;     P = I - U*U'/H;
    // void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'
    cpu_gemm(n, n, 1, U, n, U, 1, P, n, -1.0f / H); // not need NT

    printf("P(%d) first=\n", i);
    print_matrix(P, n, n, ldp);
    // P = I + P      = I - U*U'/H
    for (int idx = 0; idx < n; idx++)
    {
      P[idx + idx * ldp] += 1.0f;
    }

    printf("P(%d) second=\n", i);
    print_matrix(P, n, n, ldp);
    // 06. A(i+1) = P(i)*A(i)*P(i)
    cpu_gemm(n, n, n, P, ldp, A, lda, A, lda, 1.0f);
    cpu_gemm(n, n, n, A, lda, P, ldp, A, lda, 1.0f);
    printf("A(%d) =\n", i);
    print_matrix(A, n, n, lda);

    // 07. Q(i+1) = P(i)*Q(i);         Q = P(n-3)*P(n-2)*...*P(0)*I
    cpu_gemm(n, n, n, P, ldp, Q, ldq, Q, ldq, 1.0f);
    printf("Q(%d) =\n", i);
    print_matrix(Q, n, n, ldq);
  }
}

int main()
{
  printf("\nN-1 to 2  tridiaganal symm ...\n");

  static float A_h[NA * NA] = {
      10.0, 1.0, 2.0, 3.0, 4.0,
      1.0, 9.0, -1.0, 2.0, -3.0,
      2.0, -1.0, 7.0, 3.0, -5.0,
      3.0, 2.0, 3.0, 12.0, -1.0,
      4.0, -3.0, -5.0, -1.0, 15.0};

  ///
  // i=0,1,...,n-3;   t=n-i-1;
  float *A = nullptr;
  float *Q = nullptr;
  int n = NA;
  int lda = n;
  int ldq = n;
  // alloc A
  A = (float *)malloc(n * n * sizeof(float));
  memcpy(A, A_h, n * n * sizeof(float));
  printf("A(0) =\n");
  print_matrix(A, n, n, lda);
  // alloc Q
  Q = (float *)malloc(ldq * n * sizeof(float));
  memset(Q, 0x00, ldq * n * sizeof(float));
  for (int idx = 0; idx < n; idx++)
  {
    Q[idx + idx * ldq] = 1.0f;
  }

  // void tridiag_symm_mat_Householder(float *A, int n, int lda, float *Q, int ldq)
  tridiag_symm_mat_Householder(A, n, lda, Q, ldq);

  /**
    for (int i = 0; i < n - 2; i++)// i = 0 ~ 3;
    {
      int t;

      float H;
      float delta;

      float U[NA];
      float P[NA * NA];
      int ldp = n;

      t = n - i - 1;

      // 01. fill the first U
      memset(U, 0x00, NA * sizeof(float));
      for (int idx = 0; idx < t; idx++)
      { // U(0:t-1) = A(t, 0:t-1)
        U[idx] = A[t + idx * lda];
      }
      // void print_vector(float *A, int len)
      printf("U(%d) first =\n", i);
      print_vector(U, n);

      // 02. calculat delta:
      delta = 0.0f;
      for (int idx = 0; idx < t; idx++)
      {
        delta += U[idx] * U[idx];
      }
      printf("delta(%d)=%7.4f\n", i, delta);
      // 03. fill the second U
      U[t - 1] += get_sign(U[t - 1]) * sqrt(delta); // for numerical stable;
      printf("U(%d) second =\n", i);
      print_vector(U, n);

      // 04. calculate H, a scalar
      H = 0.0f;
      for (int idx = 0; idx < t; idx++)
      {
        H += U[idx] * U[idx];
      }
      H *= 0.5f;
      printf("H_i(%d) = %7.4f\n", i, H);

      // 05. calculate P = -U*U'/H;     P = I - U*U'/H;
      // void cpu_gemm_NT(int M, int N, int K, float *A, int lda, float *B, int ldb, float *C, int ldc, float alpha); // C = alpha * A*B'
      cpu_gemm(n, n, 1, U, n, U, 1, P, n, -1.0f / H);// not need NT

      printf("P(%d) first=\n", i);
      print_matrix(P, n, n, ldp);
      // P = I + P      = I - U*U'/H
      for (int idx = 0; idx < n; idx++)
      {
        P[idx + idx * ldp] += 1.0f;
      }

      printf("P(%d) second=\n", i);
      print_matrix(P, n, n, ldp);
      // 06. A(i+1) = P(i)*A(i)*P(i)
      cpu_gemm(n, n, n, P, ldp, A, lda, A, lda, 1.0f);
      cpu_gemm(n, n, n, A, lda, P, ldp, A, lda, 1.0f);
      printf("A(%d) =\n", i);
      print_matrix(A, n, n, lda);

      // 07. Q(i+1) = P(i)*Q(i);         Q = P(n-3)*P(n-2)*...*P(0)*I
      cpu_gemm(n, n, n, P, ldp, Q, ldq, Q, ldq, 1.0f);
      printf("Q(%d) =\n", i);
      print_matrix(Q, n, n, ldq);

    }

  */

  printf("T=[ ...\n");
  print_matrix(A, n, n, lda);
  printf("]\nQ=[ ...\n");
  print_matrix(Q, n, n, ldq);
  printf("]\n");

  return 0;
}

void print_matrix(float *A, int M, int N, int lda)
{
  for (int i = 0; i < M; i++)
  {
    for (int j = 0; j < N; j++)
    {
      printf("%7.3f ", A[i + j * lda]);
    }
    printf("; ...\n");
  }
  // printf("\n");
}

void print_vector(float *A, int len)
{
  for (int idx = 0; idx < len; idx++)
  {
    printf("%7.4f ", A[idx]);
  }
  printf("\n");
}

void cpu_gemm(int M, int N, int K, float *A_h, int lda, float *B_h, int ldb, float *C, int ldc, float alpha)
{
  float *A = nullptr;
  float *B = nullptr;

  A = (float *)malloc(lda * K * sizeof(float));
  B = (float *)malloc(ldb * N * sizeof(float));

  memcpy(A, A_h, lda * K * sizeof(float));
  memcpy(B, B_h, ldb * N * sizeof(float));

  for (int i = 0; i < M; i++)
  {
    for (int j = 0; j < N; j++)
    {
      float sigma;

      sigma = 0.0f;
      for (int k = 0; k < K; k++)
      {
        sigma += A[i + k * lda] * B[k + j * ldb];
      }
      C[i + j * ldc] = alpha * sigma;
    }
  }

  free(A);
  free(B);
}

void cpu_gemm_NT(int M, int N, int K, float *A_h, int lda, float *B_h, int ldb, float *C, int ldc, float alpha)
{
  float *A = nullptr;
  float *B = nullptr;

  A = (float *)malloc(lda * K * sizeof(float));
  B = (float *)malloc(ldb * N * sizeof(float));

  memcpy(A, A_h, lda * K * sizeof(float));
  memcpy(B, B_h, ldb * N * sizeof(float));

  for (int i = 0; i < M; i++)
  {
    for (int j = 0; j < N; j++)
    {
      float sigma;

      sigma = 0.0f;
      for (int k = 0; k < K; k++)
      {
        sigma += A[i + k * lda] * B[k * ldb + j];
      }
      C[i + j * ldc] = alpha * sigma;
    }
  }

  free(A);
  free(B);
}

void complex_gemm_NH(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha)
{
  cuComplex zero_;

  zero_ = make_cuFloatComplex(0.0f, 0.0f);
  for (int i = 0; i < M; i++)
  {
    for (int j = 0; j < N; j++)
    {
      cuComplex sigma;
      sigma = zero_;
      for (int k = 0; k < K; k++)
      {
        // sigma += A[i + k*lda]*B[k+j*ldb];
        sigma = cuCaddf(sigma, cuCmulf(A[i + k * lda], cuConjf(B[k * ldb + j])));
      }
      C[i + j * ldc] = make_cuFloatComplex(alpha * cuCrealf(sigma), alpha * cuCimagf(sigma));
    }
  }
}

void complex_gemm(int M, int N, int K, cuComplex *A, int lda, cuComplex *B, int ldb, cuComplex *C, int ldc, float alpha)
{
  cuComplex zero_;

  zero_ = make_cuFloatComplex(0.0f, 0.0f);
  for (int i = 0; i < M; i++)
  {
    for (int j = 0; j < N; j++)
    {
      cuComplex sigma;
      sigma = zero_;
      for (int k = 0; k < K; k++)
      {
        // sigma += A[i + k*lda]*B[k+j*ldb];
        sigma = cuCaddf(sigma, cuCmulf(A[i + k * lda], B[k + j * ldb]));
      }
      C[i + j * ldc] = make_cuFloatComplex(alpha * cuCrealf(sigma), alpha * cuCimagf(sigma));
    }
  }
}

/**
#include <stdio.h>
#include <math.h>



void strq(double *a, int n, double *q, double* b, double* c)//int n;  double a[],q[],b[],c[];
{
  int i,j,k,u,v;
    double h,f,g,h2;
    for (i=0; i<=n-1; i++)
    for (j=0; j<=n-1; j++)
      { u=i*n+j; q[u]=a[u];}
    for (i=n-1; i>=1; i--)
      { h=0.0;
        if (i>1)
          for (k=0; k<=i-1; k++)
            { u=i*n+k; h=h+q[u]*q[u];}
        if (h+1.0==1.0)
          { c[i]=0.0;
            if (i==1) c[i]=q[i*n+i-1];
            b[i]=0.0;
          }
        else
          { c[i]=sqrt(h);
            u=i*n+i-1;
            if (q[u]>0.0) c[i]=-c[i];
            h=h-q[u]*c[i];
            q[u]=q[u]-c[i];
            f=0.0;
            for (j=0; j<=i-1; j++)
              { q[j*n+i]=q[i*n+j]/h;
                g=0.0;
                for (k=0; k<=j; k++)
                  g=g+q[j*n+k]*q[i*n+k];
                if (j+1<=i-1)
                  for (k=j+1; k<=i-1; k++)
                    g=g+q[k*n+j]*q[i*n+k];
                c[j]=g/h;
                f=f+g*q[j*n+i];
              }
            h2=f/(h+h);
            for (j=0; j<=i-1; j++)
              { f=q[i*n+j];
                g=c[j]-h2*f;
                c[j]=g;
                for (k=0; k<=j; k++)
                  { u=j*n+k;
                    q[u]=q[u]-f*c[k]-g*q[i*n+k];
                  }
              }
            b[i]=h;
          }
      }
    for (i=0; i<=n-2; i++) c[i]=c[i+1];
    c[n-1]=0.0;
    b[0]=0.0;
    for (i=0; i<=n-1; i++)
      { if ((b[i]!=0.0)&&(i-1>=0))
          for (j=0; j<=i-1; j++)
            { g=0.0;
              for (k=0; k<=i-1; k++)
                g=g+q[i*n+k]*q[k*n+j];
              for (k=0; k<=i-1; k++)
                { u=k*n+j;
                  q[u]=q[u]-g*q[k*n+i];
                }
            }
        u=i*n+i;
        b[i]=q[u]; q[u]=1.0;
        if (i-1>=0)
          for (j=0; j<=i-1; j++)
            { q[i*n+j]=0.0; q[j*n+i]=0.0;}
      }
    return;
  }


int  main()
{
  int i,j;
    static double b[5],c[5],q[5*5];
    static double a[5*5]={
  10.0,  1.0,  2.0,  3.0,  4.0,
   1.0,  9.0, -1.0,  2.0, -3.0,
   2.0, -1.0,  7.0,  3.0, -5.0,
   3.0,  2.0,  3.0, 12.0, -1.0,
   4.0, -3.0, -5.0, -1.0, 15.0
  };

  int n = 5;

    strq(a,n,q,b,c);
    printf("MAT A IS:\n");

  int lda = n;
  int ldq = n;

    for (i=0; i<=4; i++)
    {
    for (j=0; j<=4; j++)
      printf("%7.3f ",a[i*lda + j]);
        printf("\n");
    }

    printf("\n");
    printf("MAT Q IS:\n");
    for (i=0; i<=4; i++)
    {
    for (j=0; j<=4; j++)
      printf("%7.3f ",q[i*ldq + j]);
        printf("\n");
    }

    printf("\n");
    printf("MAT B IS:\n");
    for (i=0; i<=4; i++)
    printf("%7.3f ",b[i]);

    printf("\n\n");
    printf("MAT C IS:\n");
    for (i=0; i<=4; i++)
    printf("%7.3f ",c[i]);
    printf("\n\n");

  return 0;
}

***/

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值