cuda加速的头文件_api - 哪个是CUDA Math Library的头文件？ - 堆栈内存溢出

最新推荐文章于 2023-03-26 22:03:19 发布

weixin_39621669

最新推荐文章于 2023-03-26 22:03:19 发布

阅读量444

点赞数

文章标签： cuda加速的头文件

版权声明：本文为博主原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

本文链接：https://blog.csdn.net/weixin_39621669/article/details/111499100

版权

以前，我实现了这样的代码：

// This implementation follows the code from

// https://github.com/erwincoumans/experiments/blob/master/opencl/primitives/AdlPrimitives/Math/MathCL.h

#ifndef UNIFIED_MATH_CUDA_H

#define UNIFIED_MATH_CUDA_H

#include "vector_functions.h"

/*****************************************

Vector

/*****************************************/

__device__

float fastDiv(float numerator, float denominator)

{

return __fdividef(numerator, denominator);

//return numerator/denominator;

}

__device__

float getSqrtf(float f2)

{

return sqrtf(f2);

}

__device__

float getReverseSqrt(float f2)

{

return rsqrtf(f2);

}

__device__

float3 getCrossProduct(float3 a, float3 b)

{

return make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);

}

__device__

float4 getCrossProduct(float4 a, float4 b)

{

float3 v1 = make_float3(a.x, a.y, a.z);

float3 v2 = make_float3(b.x, b.y, b.z);

float3 v3 = make_float3(a.y*b.z - a.z*b.y, a.z*b.x - a.x*b.z, a.x*b.y - a.y*b.x);

return make_float4(v3.x, v3.y, v3.z, 0.0f);

}

__device__

float getDotProduct(float3 a, float3 b)

{

return a.x * b.x + a.y * b.y + a.z * b.z;

}

__device__

float getDotProduct(float4 a, float4 b)

{

return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w;

}

__device__ float3 getNormalizedVec(const float3 v)

{

float invLen = 1.0f / sqrtf(getDotProduct(v, v));

return make_float3(v.x * invLen, v.y * invLen, v.z * invLen);

}

__device__ float4 getNormalizedVec(const float4 v)

{

float invLen = 1.0f / sqrtf(getDotProduct(v, v));

return make_float4(v.x * invLen, v.y * invLen, v.z * invLen, v.w * invLen);

}

__device__

float dot3F4(float4 a, float4 b)

{

float4 a1 = make_float4(a.x, a.y, a.z,0.f);

float4 b1 = make_float4(b.x, b.y, b.z,0.f);

return getDotProduct(a1, b1);

}

__device__

float getLength(float3 a)

{

return sqrtf(getDotProduct(a, a))

最低0.47元/天解锁文章

weixin_39621669

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
cuda加速的头文件_api - 哪个是CUDA Math Library的头文件？ - 堆栈内存溢出

以前，我实现了这样的代码：// This implementation follows the code from// https://github.com/erwincoumans/experiments/blob/master/opencl/primitives/AdlPrimitives/Math/MathCL.h#ifndef UNIFIED_MATH_CUDA_H#define UNIF...
复制链接

扫一扫

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。