/** Copyright © 2020 LEI Hongfaan. Distributed under the MIT License.*/
// 示例代码,仅供参考。using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Arm;
using System.Runtime.Intrinsics.X86;
namespace UltimateOrb.Numerics {
public struct Matrix4x4 {
public double M11;
public double M12;
public double M13;
public double M14;
public double M21;
public double M22;
public double M23;
public double M24;
public double M31;
public double M32;
public double M33;
public double M34;
public double M41;
public double M42;
public double M43;
public double M44;
public static unsafe Matrix4x4 operator *(Matrix4x4 first, Matrix4x4 second) {
if (AdvSimd.Arm64.IsSupported) {
// 小心发烫。 var rowL = AdvSimd.LoadVector128(&first.M11);
var rowH = AdvSimd.LoadVector128(&first.M13);
var vXL = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M11), rowL, 0);
var vXH = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M13), rowL, 0);
var vYL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXL, AdvSimd.LoadVector128(&second.M21), rowL, 1);
var vYH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXH, AdvSimd.LoadVector128(&second.M23), rowL, 1);
var vZL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYL, AdvSimd.LoadVector128(&second.M31), rowH, 0);
var vZH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYH, AdvSimd.LoadVector128(&second.M33), rowH, 0);
var vWL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZL, AdvSimd.LoadVector128(&second.M41), rowH, 1);
var vWH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZH, AdvSimd.LoadVector128(&second.M43), rowH, 1);
AdvSimd.Store(&first.M11, vWL);
AdvSimd.Store(&first.M13, vWH);
rowL = AdvSimd.LoadVector128(&first.M21);
rowH = AdvSimd.LoadVector128(&first.M23);
vXL = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M11), rowL, 0);
vXH = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M13), rowL, 0);
vYL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXL, AdvSimd.LoadVector128(&second.M21), rowL, 1);
vYH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXH, AdvSimd.LoadVector128(&second.M23), rowL, 1);
vZL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYL, AdvSimd.LoadVector128(&second.M31), rowH, 0);
vZH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYH, AdvSimd.LoadVector128(&second.M33), rowH, 0);
vWL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZL, AdvSimd.LoadVector128(&second.M41), rowH, 1);
vWH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZH, AdvSimd.LoadVector128(&second.M43), rowH, 1);
AdvSimd.Store(&first.M21, vWL);
AdvSimd.Store(&first.M23, vWH);
rowL = AdvSimd.LoadVector128(&first.M31);
rowH = AdvSimd.LoadVector128(&first.M33);
vXL = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M11), rowL, 0);
vXH = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M13), rowL, 0);
vYL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXL, AdvSimd.LoadVector128(&second.M21), rowL, 1);
vYH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXH, AdvSimd.LoadVector128(&second.M23), rowL, 1);
vZL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYL, AdvSimd.LoadVector128(&second.M31), rowH, 0);
vZH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYH, AdvSimd.LoadVector128(&second.M33), rowH, 0);
vWL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZL, AdvSimd.LoadVector128(&second.M41), rowH, 1);
vWH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZH, AdvSimd.LoadVector128(&second.M43), rowH, 1);
AdvSimd.Store(&first.M31, vWL);
AdvSimd.Store(&first.M33, vWH);
rowL = AdvSimd.LoadVector128(&first.M41);
rowH = AdvSimd.LoadVector128(&first.M43);
vXL = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M11), rowL, 0);
vXH = AdvSimd.Arm64.MultiplyBySelectedScalar(AdvSimd.LoadVector128(&second.M13), rowL, 0);
vYL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXL, AdvSimd.LoadVector128(&second.M21), rowL, 1);
vYH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vXH, AdvSimd.LoadVector128(&second.M23), rowL, 1);
vZL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYL, AdvSimd.LoadVector128(&second.M31), rowH, 0);
vZH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vYH, AdvSimd.LoadVector128(&second.M33), rowH, 0);
vWL = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZL, AdvSimd.LoadVector128(&second.M41), rowH, 1);
vWH = AdvSimd.Arm64.FusedMultiplyAddBySelectedScalar(vZH, AdvSimd.LoadVector128(&second.M43), rowH, 1);
AdvSimd.Store(&first.M41, vWL);
AdvSimd.Store(&first.M43, vWH);
return first;
}
if (Avx.IsSupported) {
var sRow0 = Avx.LoadVector256(&second.M11);
var sRow1 = Avx.LoadVector256(&second.M21);
var sRow2 = Avx.LoadVector256(&second.M31);
var sRow3 = Avx.LoadVector256(&second.M41);
var row = &first.M11;
Avx.Store(row,
Avx.Add(Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(0 + row), sRow0),
Avx.Multiply(Avx.BroadcastScalarToVector256(1 + row), sRow1)),
Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(2 + row), sRow2),
Avx.Multiply(Avx.BroadcastScalarToVector256(3 + row), sRow3))));
row = &first.M21;
Avx.Store(row,
Avx.Add(Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(0 + row), sRow0),
Avx.Multiply(Avx.BroadcastScalarToVector256(1 + row), sRow1)),
Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(2 + row), sRow2),
Avx.Multiply(Avx.BroadcastScalarToVector256(3 + row), sRow3))));
row = &first.M31;
Avx.Store(row,
Avx.Add(Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(0 + row), sRow0),
Avx.Multiply(Avx.BroadcastScalarToVector256(1 + row), sRow1)),
Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(2 + row), sRow2),
Avx.Multiply(Avx.BroadcastScalarToVector256(3 + row), sRow3))));
row = &first.M41;
Avx.Store(row,
Avx.Add(Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(0 + row), sRow0),
Avx.Multiply(Avx.BroadcastScalarToVector256(1 + row), sRow1)),
Avx.Add(Avx.Multiply(Avx.BroadcastScalarToVector256(2 + row), sRow2),
Avx.Multiply(Avx.BroadcastScalarToVector256(3 + row), sRow3))));
return first;
}
Matrix4x4 result;
result.M11 = first.M11 * second.M11 + first.M12 * second.M21 + first.M13 * second.M31 + first.M14 * second.M41;
result.M12 = first.M11 * second.M12 + first.M12 * second.M22 + first.M13 * second.M32 + first.M14 * second.M42;
result.M13 = first.M11 * second.M13 + first.M12 * second.M23 + first.M13 * second.M33 + first.M14 * second.M43;
result.M14 = first.M11 * second.M14 + first.M12 * second.M24 + first.M13 * second.M34 + first.M14 * second.M44;
result.M21 = first.M21 * second.M11 + first.M22 * second.M21 + first.M23 * second.M31 + first.M24 * second.M41;
result.M22 = first.M21 * second.M12 + first.M22 * second.M22 + first.M23 * second.M32 + first.M24 * second.M42;
result.M23 = first.M21 * second.M13 + first.M22 * second.M23 + first.M23 * second.M33 + first.M24 * second.M43;
result.M24 = first.M21 * second.M14 + first.M22 * second.M24 + first.M23 * second.M34 + first.M24 * second.M44;
result.M31 = first.M31 * second.M11 + first.M32 * second.M21 + first.M33 * second.M31 + first.M34 * second.M41;
result.M32 = first.M31 * second.M12 + first.M32 * second.M22 + first.M33 * second.M32 + first.M34 * second.M42;
result.M33 = first.M31 * second.M13 + first.M32 * second.M23 + first.M33 * second.M33 + first.M34 * second.M43;
result.M34 = first.M31 * second.M14 + first.M32 * second.M24 + first.M33 * second.M34 + first.M34 * second.M44;
result.M41 = first.M41 * second.M11 + first.M42 * second.M21 + first.M43 * second.M31 + first.M44 * second.M41;
result.M42 = first.M41 * second.M12 + first.M42 * second.M22 + first.M43 * second.M32 + first.M44 * second.M42;
result.M43 = first.M41 * second.M13 + first.M42 * second.M23 + first.M43 * second.M33 + first.M44 * second.M43;
result.M44 = first.M41 * second.M14 + first.M42 * second.M24 + first.M43 * second.M34 + first.M44 * second.M44;
return result;
}
}
}