官方直播已经说明了思路和步骤
https://www.bilibili.com/video/BV1Q741177Jd
知乎也有相关文章
https://zhuanlan.zhihu.com/p/113367281
核心思想就是利用多线程分散物理骨骼的运算部分,从而达到优化效果
这里记录一下我自己的实现步骤
- 修改DynamicBone.Particle定义,目的是支持Blittable结构,便于NativeArray存储
public struct Particle
{
// 移除原来的 public Transform m_Transform = null;
public int m_ParentIndex;
public float m_Damping;
public float m_Elasticity;
public float m_Stiffness;
public float m_Inert;
public float m_Radius;
public float m_BoneLength;
public Vector3 m_Position;
public Vector3 m_PrevPosition;
public Vector3 m_EndOffset;
public Vector3 m_InitLocalPosition;
public Quaternion m_InitLocalRotation;
// 这里是扩展部分用以在job中临时保存中间结果
public Vector3 m_tmpWorldPos;
public Quaternion m_tmpWorldRot;
public Vector3 m_tmpLocalPos;
public Quaternion m_tmpLocalRot;
public int m_ChildCount;
}
public List<Particle> m_Particles = new List<Particle>();
// 添加 Tranform Array,用来代替原来 Particle.m_Transform
public List<Transform> m_particleTransformArr = new List<Transform>();
- 修改DynamicBone中各个方法,确保所有逻辑可行,将其中的p.m_Transform修改为m_particleTransformArr[i],p0.m_Transform修改为m_particleTransformArr[p.m_ParentIndex]。
这里还要注意,由于Particle被定义为struct,所以Particle p = m_Particle[i]实际上是值传递,对p的修改不会应用到原数组,所以修改完还要再赋值回去
Particle p = m_Particles[i];
p.xx = xx;
m_Particles[i] = p;
测试修改,并做性能分析,会发现运行效率比以前慢了
- 实现DynamicBoneManager类,并将其声明为单例(Singleton)模式,在 Edit | Project Settings | Script Execution Order 中配置优先级在所有obj对象之前,便于DynamicBone随时可以访问到
并在其中声明Head类型,用来存储并行DynamicBone对象数据
public class DynamicBoneManager : MonoBehaviour
{
public static DynamicBoneManager Instance
{
get
{
if (null == m_instance)
{
m_instance = GameObject.FindObjectOfType<DynamicBoneManager>();
}
return m_instance;
}
}
private List<DynamicBoneFast> m_dynamicBoneList; // 保存被托管的DynamicBone对象,不参与并行运算
public struct Head
{
public int particleIndex;
public int particleLength;
// 以下是DynamicBone计算时所需要用到参数
public Vector3 m_Gravity;
public Vector3 m_LocalGravity;
public float m_ObjectScale;
public Vector3 m_Force;
public Vector3 m_ObjectMove;
public float m_Weight;
}
private NativeArray<Head> heads; // 并行数据,保存DynamicBone上下文
private NativeArray<DynamicBoneFast.Particle> particles; // 并行数据,所有DynamicBone中含有的所有Particle数据,冗余结构
private TransformAccessArray m_particleTransformArr; // 并行数据,所有DynamicBone中所有Particle对应的Transform
这里把heads、particles声明为定长Array,而不是NativeList,目的是简化代码,避开额外Entity包的引用
在数据存储结构中,每个Head(DynamicBone)所对应的Particle长度可能不一样,但在内存中长度是一致的(冗余结构)
这里要注意TransformAccessArray(和NativeList一样)删除时也会对最后一个元素进行位置对换,在Unity里称之为 RemoveAtSwapBack 方法
实现DynamicBoneManager.m_dynamicBoneList添加、删除操作,同时确保heads、particles、m_particleTransformArr中元素顺序一致
public void OnEnter(DynamicBoneFast target)
{
if (m_dynamicBoneList.Count >= MAX_HEAD_LENGTH)
{
Debug.LogWarning("bone list out of range");
return;
}
int idx = m_dynamicBoneList.IndexOf(target);
if (idx != -1)
{
Debug.LogError("target already existed");
return;
}
m_lastJobHandle.Complete(); // 等待并行线程结束
idx = m_dynamicBoneList.Count;
m_dynamicBoneList.Add(target);
Head hd = new Head();
hd.particleIndex = idx * SINGLE_HEAD_PARTICLE_SIZE;
hd.particleLength = Mathf.Min(SINGLE_HEAD_PARTICLE_SIZE, target.m_Particles.Count);
hd.m_Gravity = target.m_Gravity;
hd.m_LocalGravity = target.m_LocalGravity;
hd.m_ObjectScale = target.m_ObjectScale;
hd.m_Force = target.m_Force;
hd.m_ObjectMove = target.m_ObjectMove;
hd.m_Weight = target.m_Weight;
heads[idx] = hd;
NativeArray<DynamicBoneNode.Particle>.Copy(target.m_Particles.ToArray(), 0, particles, hd.particleIndex, hd.particleLength);
for (int i = 0; i < hd.particleLength; i++)
{
int trans_idx = idx * SINGLE_HEAD_PARTICLE_SIZE + i;
Debug.Assert(trans_idx >= m_particleTransformArr.length);
m_particleTransformArr.Add(target.m_particleTransformArr[i]);
}
// 将不用的particle transform填空,保持和所有head.particles等长
for (int i = hd.particleLength; i < SINGLE_HEAD_PARTICLE_SIZE; i++)
{
m_particleTransformArr.Add(null);
}
Debug.Assert(m_particleTransformArr.length == m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE);
}
public void OnExit(DynamicBoneFast target)
{
int idx = m_dynamicBoneList.IndexOf(target);
if (idx == -1)
{
Debug.LogWarning("target dose not existed");
return;
}
m_lastJobHandle.Complete(); // 等待并行线程结束
int swap_back_idx = m_dynamicBoneList.Count - 1;
m_dynamicBoneList[idx] = m_dynamicBoneList[swap_back_idx]; // 手动RemoveAtSwapBack
m_dynamicBoneList.RemoveAt(swap_back_idx);
Head hd = heads[swap_back_idx];
{
Debug.Assert(hd.particleIndex == swap_back_idx * SINGLE_HEAD_PARTICLE_SIZE);
hd.particleIndex = idx * SINGLE_HEAD_PARTICLE_SIZE; // 重新计算particleIndex
heads[idx] = hd;
}
NativeArray<DynamicBoneNode.Particle>.Copy(particles, swap_back_idx * SINGLE_HEAD_PARTICLE_SIZE, particles, idx * SINGLE_HEAD_PARTICLE_SIZE, hd.particleLength);
// 采用倒序RemoveAtSwapBack,确保和particles顺序一致
for (int i = (idx + 1) * SINGLE_HEAD_PARTICLE_SIZE - 1; i >= idx * SINGLE_HEAD_PARTICLE_SIZE; i--)
{
m_particleTransformArr.RemoveAtSwapBack(i);
}
Debug.Assert(m_particleTransformArr.length == m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE);
}
在DynamicBone的OnEnable、OnDisable将自己加入到Manager
void OnEnable()
{
// ResetParticlesPosition();
SetupParticles();
DynamicBoneManager.Instance.OnEnter(this);
}
void OnDisable()
{
// InitTransforms();
DynamicBoneManager.Instance.OnExit(this);
}
- 实现DynamicBoneManager.OnDrawGizmosSelected便于查看物理骨骼数据是否正确
void OnDrawGizmosSelected()
{
if (!enabled || m_dynamicBoneList == null)
return;
Gizmos.color = Color.white;
for (int i = 0; i < m_dynamicBoneList.Count; i++)
{
Head hd = heads[i];
for (int j = 0; j < hd.particleLength; ++j)
{
DynamicBoneFast.Particle p = particles[hd.particleIndex + j];
if (p.m_ParentIndex >= 0)
{
DynamicBoneFast.Particle p0 = particles[hd.particleIndex + p.m_ParentIndex];
Gizmos.DrawLine(p.m_Position, p0.m_Position);
}
if (p.m_Radius > 0)
Gizmos.DrawWireSphere(p.m_Position, p.m_Radius * hd.m_ObjectScale);
}
}
}
- 移除原来DynamicBone.LateUpdate,改由DynamicBoneManager.LateUpdate并行更新所有骨骼,里边启用 Unity IJob派生类来实现对heads、particles、m_particleTransformArr操作
经过实践,我这边总结了5个Job
private void LateUpdate()
{
if (!m_lastJobHandle.IsCompleted)
{
return;
}
m_lastJobHandle.Complete();
PrepareParticleJob prepareParticleJob = new PrepareParticleJob();
prepareParticleJob.heads = heads;
prepareParticleJob.particles = particles;
JobHandle prepareParticleJobHandle = prepareParticleJob.Schedule(m_particleTransformArr);
UpdateParticle1Job particle1Job = new UpdateParticle1Job();
particle1Job.heads = heads;
particle1Job.particles = particles;
JobHandle particle1JobHandle = particle1Job.Schedule(m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE, SINGLE_HEAD_PARTICLE_SIZE, prepareParticleJobHandle);
UpdateParticle2Job particle2Job = new UpdateParticle2Job();
particle2Job.colliders = colliders;
particle2Job.heads = heads;
particle2Job.particles = particles;
JobHandle particle2JobHandle = particle2Job.Schedule(m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE, SINGLE_HEAD_PARTICLE_SIZE, particle1JobHandle);
UpdateParticle3Job particle3Job = new UpdateParticle3Job();
particle3Job.heads = heads;
particle3Job.particles = particles;
JobHandle particle3JobHandle = particle3Job.Schedule(m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE, SINGLE_HEAD_PARTICLE_SIZE, particle2JobHandle);
ApplyParticleToTransformJob applyToTransformJob = new ApplyParticleToTransformJob();
applyToTransformJob.heads = heads;
applyToTransformJob.particles = particles;
m_lastJobHandle = applyToTransformJob.Schedule(m_particleTransformArr, particle3JobHandle);
JobHandle.ScheduleBatchedJobs();
}
每个Job具体实现如下,其实就是DynamicBone.UpdateDynamicBones中各个方法转移过来
struct PrepareParticleJob : IJobParallelForTransform
{
[ReadOnly]
public NativeArray<Head> heads;
public NativeArray<DynamicBoneFast.Particle> particles;
public void Execute(int i, TransformAccess transform)
{
Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
if (idx >= hd.particleLength)
{
return;
}
DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
p.m_tmpWorldPos = transform.position;
p.m_tmpWorldRot = transform.rotation;
p.m_tmpLocalPos = transform.localPosition;
p.m_tmpLocalRot = transform.localRotation;
particles[hd.particleIndex + idx] = p;
}
}
public struct UpdateParticle1Job : IJobParallelFor
{
[ReadOnly]
public NativeArray<Head> heads;
public NativeArray<DynamicBoneFast.Particle> particles;
public void Execute(int i)
{
Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
if (idx >= hd.particleLength)
{
return;
}
Vector3 force = hd.m_Gravity;
// Vector3 fdir = hd.m_Gravity.normalized;
// Vector3 rf = m_Root.TransformDirection(hd.m_LocalGravity);
// Vector3 pf = fdir * Mathf.Max(Vector3.Dot(rf, fdir), 0); // project current gravity to rest gravity
// force -= pf; // remove projected gravity
force = (force + hd.m_Force) * hd.m_ObjectScale;
DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
if (p.m_ParentIndex >= 0)
{
// verlet integration
Vector3 v = p.m_Position - p.m_PrevPosition;
Vector3 rmove = hd.m_ObjectMove * p.m_Inert;
p.m_PrevPosition = p.m_Position + rmove;
p.m_Position += v * (1 - p.m_Damping) + force + rmove;
}
else
{
p.m_PrevPosition = p.m_Position;
p.m_Position = p.m_tmpWorldPos;//p.m_Position = m_particleTransformArr[hd.particleIndex + idx].position;
}
particles[hd.particleIndex + idx] = p;
}
}
public struct UpdateParticle2Job : IJobParallelFor
{
[ReadOnly]
public NativeArray<Head> heads;
public NativeArray<DynamicBoneFast.Particle> particles;
public void Execute(int i)
{
Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
if (idx <= 0 || idx >= hd.particleLength)
{
return;
}
DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
DynamicBoneFast.Particle p0 = particles[hd.particleIndex + p.m_ParentIndex];
Matrix4x4 m0 = Matrix4x4.TRS(p0.m_tmpWorldPos, p0.m_tmpWorldRot, Vector3.one);
float restLen;
if (idx < hd.particleLength - 1)
restLen = (p0.m_tmpWorldPos - p.m_tmpWorldPos).magnitude;
else
restLen = m0.MultiplyVector(p.m_EndOffset).magnitude;
// keep shape
float stiffness = Mathf.Lerp(1.0f, p.m_Stiffness, hd.m_Weight);
if (stiffness > 0 || p.m_Elasticity > 0)
{
m0.SetColumn(3, p0.m_Position);
Vector3 restPos;
if (idx < hd.particleLength - 1)
restPos = m0.MultiplyPoint3x4(p.m_tmpLocalPos);
else
restPos = m0.MultiplyPoint3x4(p.m_EndOffset);
Vector3 d = restPos - p.m_Position;
p.m_Position += d * p.m_Elasticity;
if (stiffness > 0)
{
d = restPos - p.m_Position;
float len = d.magnitude;
float maxlen = restLen * (1 - stiffness) * 2;
if (len > maxlen)
p.m_Position += d * ((len - maxlen) / len);
}
}
// collide相关的代码需要更多的逻辑维护,这里简略
//if (hd.colliderIndex >= 0)
//{
// SphereCollider sphereCollider = colliders[hd.colliderIndex];
// if (sphereCollider.h <= 0)
// {
// if (sphereCollider.m_Bound == (int)DynamicBoneColliderBase.Bound.Outside)
// DynamicBoneCollider.OutsideSphere(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.radius);
// else
// DynamicBoneCollider.InsideSphere(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.radius);
// }
// else
// {
// if (sphereCollider.m_Bound == (int)DynamicBoneColliderBase.Bound.Outside)
// DynamicBoneCollider.OutsideCapsule(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.c1, sphereCollider.radius);
// else
// DynamicBoneCollider.InsideCapsule(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.c1, sphereCollider.radius);
// }
//}
// keep length
Vector3 dd = p0.m_Position - p.m_Position;
float leng = dd.magnitude;
if (leng > 0)
p.m_Position += dd * ((leng - restLen) / leng);
particles[hd.particleIndex + idx] = p;
}
}
public struct UpdateParticle3Job : IJobParallelFor
{
[ReadOnly]
public NativeArray<Head> heads;
public NativeArray<DynamicBoneFast.Particle> particles;
public void Execute(int i)
{
Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
if (idx <= 0 || idx >= hd.particleLength)
{
return;
}
DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
DynamicBoneFast.Particle p0 = particles[hd.particleIndex + p.m_ParentIndex];
if (p0.m_ChildCount <= 1) // do not modify bone orientation if has more then one child
{
Vector3 v;
if (idx < hd.particleLength - 1)
v = p.m_tmpLocalPos;
else
v = p.m_EndOffset;
Vector3 v2 = p.m_Position - p0.m_Position;
Matrix4x4 m0 = Matrix4x4.TRS(p0.m_tmpWorldPos, p0.m_tmpWorldRot, Vector3.one);
Quaternion rot = Quaternion.FromToRotation(m0.MultiplyVector(v), v2);
p0.m_tmpWorldRot = rot * p0.m_tmpWorldRot;
}
particles[hd.particleIndex + p.m_ParentIndex] = p0;
}
}
public struct ApplyParticleToTransformJob : IJobParallelForTransform
{
[ReadOnly]
public NativeArray<Head> heads;
[ReadOnly]
public NativeArray<DynamicBoneFast.Particle> particles;
public void Execute(int i, TransformAccess transform)
{
Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
Debug.Assert(idx < hd.particleLength);
DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
transform.position = p.m_Position;
transform.rotation = p.m_tmpWorldRot;
}
}
- 当obj之间有父子关系时,实际运行时会发生Transform相互等待,导致Job分配不均衡,这是可以在运行时用脚本将所有对象平铺
foreach (var obj in objs)
obj.SetParent(null, true);
实际测试100个对象,平铺后的Job分配
7. 使用 BrustCompile 编译选项优化,需要通过 Unity Package Manager 安装 Entity 包,并修改代码属性
[BurstCompile(CompileSynchronously = true)]
struct PrepareParticleJob : IJobParallelForTransform
{
...
}
[BurstCompile(CompileSynchronously = true)]
public struct UpdateParticle1Job : IJobParallelFor
{
...
}
[BurstCompile(CompileSynchronously = true)]
public struct UpdateParticle2Job : IJobParallelFor
{
...
}
[BurstCompile(CompileSynchronously = true)]
public struct UpdateParticle3Job : IJobParallelFor
{
...
}
[BurstCompile(CompileSynchronously = true)]
public struct ApplyParticleToTransformJob : IJobParallelForTransform
{
...
}
最终测试结果
总结:
(平台:win7、i5四核3.3GHz、GTX650 2G)
- 将数据并行化,会导致原有的效率下降
- 采用并行线程分散运算,可再次提升性能
- 平铺角色对象,可避免Transform相互依赖,均衡分配Job
- 使用Brust编译可大幅度提升性能
- 还可以使用Unity.Mathematics进一步优化性能,如float3、float4、float4x4等的SIMD支持