使用Unity Job System优化Dynamic Bone

官方直播已经说明了思路和步骤
https://www.bilibili.com/video/BV1Q741177Jd
知乎也有相关文章
https://zhuanlan.zhihu.com/p/113367281

核心思想就是利用多线程分散物理骨骼的运算部分,从而达到优化效果
这里记录一下我自己的实现步骤

  1. 修改DynamicBone.Particle定义,目的是支持Blittable结构,便于NativeArray存储
public struct Particle
{
	// 移除原来的 public Transform m_Transform = null;
	public int m_ParentIndex;
	public float m_Damping;
	public float m_Elasticity;
	public float m_Stiffness;
	public float m_Inert;
	public float m_Radius;
	public float m_BoneLength;
	public Vector3 m_Position;
	public Vector3 m_PrevPosition;
	public Vector3 m_EndOffset;
	public Vector3 m_InitLocalPosition;
	public Quaternion m_InitLocalRotation;

	// 这里是扩展部分用以在job中临时保存中间结果
	public Vector3 m_tmpWorldPos;
	public Quaternion m_tmpWorldRot;
	public Vector3 m_tmpLocalPos;
	public Quaternion m_tmpLocalRot;
	public int m_ChildCount;
}
public List<Particle> m_Particles = new List<Particle>();
// 添加 Tranform Array,用来代替原来 Particle.m_Transform
public List<Transform> m_particleTransformArr = new List<Transform>();
  1. 修改DynamicBone中各个方法,确保所有逻辑可行,将其中的p.m_Transform修改为m_particleTransformArr[i],p0.m_Transform修改为m_particleTransformArr[p.m_ParentIndex]。
    这里还要注意,由于Particle被定义为struct,所以Particle p = m_Particle[i]实际上是值传递,对p的修改不会应用到原数组,所以修改完还要再赋值回去
Particle p = m_Particles[i];
p.xx = xx;
m_Particles[i] = p;

测试修改,并做性能分析,会发现运行效率比以前慢了
在这里插入图片描述

  1. 实现DynamicBoneManager类,并将其声明为单例(Singleton)模式,在 Edit | Project Settings | Script Execution Order 中配置优先级在所有obj对象之前,便于DynamicBone随时可以访问到
    在这里插入图片描述
    并在其中声明Head类型,用来存储并行DynamicBone对象数据
public class DynamicBoneManager : MonoBehaviour
{
    public static DynamicBoneManager Instance
    {
        get
        {
            if (null == m_instance)
            {
                m_instance = GameObject.FindObjectOfType<DynamicBoneManager>();
            }
            return m_instance;
        }
    }
    
    private List<DynamicBoneFast> m_dynamicBoneList; // 保存被托管的DynamicBone对象,不参与并行运算
    
    public struct Head
    {
        public int particleIndex;
        public int particleLength;
        
        // 以下是DynamicBone计算时所需要用到参数
        public Vector3 m_Gravity;
        public Vector3 m_LocalGravity;
        public float m_ObjectScale;
        public Vector3 m_Force;
        public Vector3 m_ObjectMove;
        public float m_Weight;
    }
    private NativeArray<Head> heads; // 并行数据,保存DynamicBone上下文
    private NativeArray<DynamicBoneFast.Particle> particles; // 并行数据,所有DynamicBone中含有的所有Particle数据,冗余结构
    private TransformAccessArray m_particleTransformArr; // 并行数据,所有DynamicBone中所有Particle对应的Transform

这里把heads、particles声明为定长Array,而不是NativeList,目的是简化代码,避开额外Entity包的引用
在数据存储结构中,每个Head(DynamicBone)所对应的Particle长度可能不一样,但在内存中长度是一致的(冗余结构)
在这里插入图片描述
这里要注意TransformAccessArray(和NativeList一样)删除时也会对最后一个元素进行位置对换,在Unity里称之为 RemoveAtSwapBack 方法
在这里插入图片描述
实现DynamicBoneManager.m_dynamicBoneList添加、删除操作,同时确保heads、particles、m_particleTransformArr中元素顺序一致

public void OnEnter(DynamicBoneFast target)
{
    if (m_dynamicBoneList.Count >= MAX_HEAD_LENGTH)
    {
        Debug.LogWarning("bone list out of range");
        return;
    }
    int idx = m_dynamicBoneList.IndexOf(target);
    if (idx != -1)
    {
        Debug.LogError("target already existed");
        return;
    }
    m_lastJobHandle.Complete(); // 等待并行线程结束
    idx = m_dynamicBoneList.Count;
    m_dynamicBoneList.Add(target);
    Head hd = new Head();
    hd.particleIndex = idx * SINGLE_HEAD_PARTICLE_SIZE;
    hd.particleLength = Mathf.Min(SINGLE_HEAD_PARTICLE_SIZE, target.m_Particles.Count);
    hd.m_Gravity = target.m_Gravity;
    hd.m_LocalGravity = target.m_LocalGravity;
    hd.m_ObjectScale = target.m_ObjectScale;
    hd.m_Force = target.m_Force;
    hd.m_ObjectMove = target.m_ObjectMove;
    hd.m_Weight = target.m_Weight;
    heads[idx] = hd;
    NativeArray<DynamicBoneNode.Particle>.Copy(target.m_Particles.ToArray(), 0, particles, hd.particleIndex, hd.particleLength);
    for (int i = 0; i < hd.particleLength; i++)
    {
        int trans_idx = idx * SINGLE_HEAD_PARTICLE_SIZE + i;
        Debug.Assert(trans_idx >= m_particleTransformArr.length);
        m_particleTransformArr.Add(target.m_particleTransformArr[i]);
    }
    // 将不用的particle transform填空,保持和所有head.particles等长
    for (int i = hd.particleLength; i < SINGLE_HEAD_PARTICLE_SIZE; i++)
    {
        m_particleTransformArr.Add(null);
    }
    Debug.Assert(m_particleTransformArr.length == m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE);
}

public void OnExit(DynamicBoneFast target)
{
	int idx = m_dynamicBoneList.IndexOf(target);
	if (idx == -1)
    {
		Debug.LogWarning("target dose not existed");
		return;
    }
	m_lastJobHandle.Complete(); // 等待并行线程结束
	int swap_back_idx = m_dynamicBoneList.Count - 1;
	m_dynamicBoneList[idx] = m_dynamicBoneList[swap_back_idx]; // 手动RemoveAtSwapBack
    m_dynamicBoneList.RemoveAt(swap_back_idx);
	Head hd = heads[swap_back_idx];
	{
		Debug.Assert(hd.particleIndex == swap_back_idx * SINGLE_HEAD_PARTICLE_SIZE);
		hd.particleIndex = idx * SINGLE_HEAD_PARTICLE_SIZE; // 重新计算particleIndex
		heads[idx] = hd;
	}
	NativeArray<DynamicBoneNode.Particle>.Copy(particles, swap_back_idx * SINGLE_HEAD_PARTICLE_SIZE, particles, idx * SINGLE_HEAD_PARTICLE_SIZE, hd.particleLength);
	// 采用倒序RemoveAtSwapBack,确保和particles顺序一致
	for (int i = (idx + 1) * SINGLE_HEAD_PARTICLE_SIZE - 1; i >= idx * SINGLE_HEAD_PARTICLE_SIZE; i--)
    {
        m_particleTransformArr.RemoveAtSwapBack(i);
    }
    Debug.Assert(m_particleTransformArr.length == m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE);
}

在DynamicBone的OnEnable、OnDisable将自己加入到Manager

void OnEnable()
{
	// ResetParticlesPosition();
	SetupParticles();
	DynamicBoneManager.Instance.OnEnter(this);
}

void OnDisable()
{
    // InitTransforms();
    DynamicBoneManager.Instance.OnExit(this);
}
  1. 实现DynamicBoneManager.OnDrawGizmosSelected便于查看物理骨骼数据是否正确
void OnDrawGizmosSelected()
{
    if (!enabled || m_dynamicBoneList == null)
        return;

    Gizmos.color = Color.white;
    for (int i = 0; i < m_dynamicBoneList.Count; i++)
    {
        Head hd = heads[i];
        for (int j = 0; j < hd.particleLength; ++j)
        {
            DynamicBoneFast.Particle p = particles[hd.particleIndex + j];
            if (p.m_ParentIndex >= 0)
            {
                DynamicBoneFast.Particle p0 = particles[hd.particleIndex + p.m_ParentIndex];
                Gizmos.DrawLine(p.m_Position, p0.m_Position);
            }
            if (p.m_Radius > 0)
                Gizmos.DrawWireSphere(p.m_Position, p.m_Radius * hd.m_ObjectScale);
        }
    }
}
  1. 移除原来DynamicBone.LateUpdate,改由DynamicBoneManager.LateUpdate并行更新所有骨骼,里边启用 Unity IJob派生类来实现对heads、particles、m_particleTransformArr操作
    经过实践,我这边总结了5个Job
private void LateUpdate()
{
    if (!m_lastJobHandle.IsCompleted)
    {
        return;
    }

    m_lastJobHandle.Complete();

    PrepareParticleJob prepareParticleJob = new PrepareParticleJob();
    prepareParticleJob.heads = heads;
    prepareParticleJob.particles = particles;
    JobHandle prepareParticleJobHandle = prepareParticleJob.Schedule(m_particleTransformArr);

	UpdateParticle1Job particle1Job = new UpdateParticle1Job();
    particle1Job.heads = heads;
    particle1Job.particles = particles;
    JobHandle particle1JobHandle = particle1Job.Schedule(m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE, SINGLE_HEAD_PARTICLE_SIZE, prepareParticleJobHandle);

    UpdateParticle2Job particle2Job = new UpdateParticle2Job();
    particle2Job.colliders = colliders;
    particle2Job.heads = heads;
    particle2Job.particles = particles;
    JobHandle particle2JobHandle = particle2Job.Schedule(m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE, SINGLE_HEAD_PARTICLE_SIZE, particle1JobHandle);

    UpdateParticle3Job particle3Job = new UpdateParticle3Job();
    particle3Job.heads = heads;
    particle3Job.particles = particles;
    JobHandle particle3JobHandle = particle3Job.Schedule(m_dynamicBoneList.Count * SINGLE_HEAD_PARTICLE_SIZE, SINGLE_HEAD_PARTICLE_SIZE, particle2JobHandle);

    ApplyParticleToTransformJob applyToTransformJob = new ApplyParticleToTransformJob();
    applyToTransformJob.heads = heads;
    applyToTransformJob.particles = particles;
    m_lastJobHandle = applyToTransformJob.Schedule(m_particleTransformArr, particle3JobHandle);

    JobHandle.ScheduleBatchedJobs();
}

每个Job具体实现如下,其实就是DynamicBone.UpdateDynamicBones中各个方法转移过来

struct PrepareParticleJob : IJobParallelForTransform
{
	[ReadOnly]
	public NativeArray<Head> heads;
	public NativeArray<DynamicBoneFast.Particle> particles;
	public void Execute(int i, TransformAccess transform)
	{
		Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
		int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
		if (idx >= hd.particleLength)
		{
			return;
		}
		DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
		p.m_tmpWorldPos = transform.position;
		p.m_tmpWorldRot = transform.rotation;
		p.m_tmpLocalPos = transform.localPosition;
		p.m_tmpLocalRot = transform.localRotation;
		particles[hd.particleIndex + idx] = p;
	}
}

public struct UpdateParticle1Job : IJobParallelFor
{
	[ReadOnly]
	public NativeArray<Head> heads;
	public NativeArray<DynamicBoneFast.Particle> particles;
	public void Execute(int i)
	{
		Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
		int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
		if (idx >= hd.particleLength)
		{
			return;
		}
		Vector3 force = hd.m_Gravity;
		// Vector3 fdir = hd.m_Gravity.normalized;
		// Vector3 rf = m_Root.TransformDirection(hd.m_LocalGravity);
		// Vector3 pf = fdir * Mathf.Max(Vector3.Dot(rf, fdir), 0);	// project current gravity to rest gravity
		// force -= pf;	// remove projected gravity
		force = (force + hd.m_Force) * hd.m_ObjectScale;
		DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
		if (p.m_ParentIndex >= 0)
		{
			// verlet integration
			Vector3 v = p.m_Position - p.m_PrevPosition;
			Vector3 rmove = hd.m_ObjectMove * p.m_Inert;
			p.m_PrevPosition = p.m_Position + rmove;
			p.m_Position += v * (1 - p.m_Damping) + force + rmove;
		}
		else
		{
			p.m_PrevPosition = p.m_Position;
			p.m_Position = p.m_tmpWorldPos;//p.m_Position = m_particleTransformArr[hd.particleIndex + idx].position;
		}
		particles[hd.particleIndex + idx] = p;
	}
}

public struct UpdateParticle2Job : IJobParallelFor
{
	[ReadOnly]
	public NativeArray<Head> heads;
	public NativeArray<DynamicBoneFast.Particle> particles;
	public void Execute(int i)
	{
		Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
		int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
		if (idx <= 0 || idx >= hd.particleLength)
		{
			return;
		}
		DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
		DynamicBoneFast.Particle p0 = particles[hd.particleIndex + p.m_ParentIndex];
		Matrix4x4 m0 = Matrix4x4.TRS(p0.m_tmpWorldPos, p0.m_tmpWorldRot, Vector3.one);
		float restLen;
		if (idx < hd.particleLength - 1)
			restLen = (p0.m_tmpWorldPos - p.m_tmpWorldPos).magnitude;
		else
			restLen = m0.MultiplyVector(p.m_EndOffset).magnitude;
		// keep shape
		float stiffness = Mathf.Lerp(1.0f, p.m_Stiffness, hd.m_Weight);
		if (stiffness > 0 || p.m_Elasticity > 0)
		{
			m0.SetColumn(3, p0.m_Position);
			Vector3 restPos;
			if (idx < hd.particleLength - 1)
				restPos = m0.MultiplyPoint3x4(p.m_tmpLocalPos);
			else
				restPos = m0.MultiplyPoint3x4(p.m_EndOffset);
			Vector3 d = restPos - p.m_Position;
			p.m_Position += d * p.m_Elasticity;
			if (stiffness > 0)
			{
				d = restPos - p.m_Position;
				float len = d.magnitude;
				float maxlen = restLen * (1 - stiffness) * 2;
				if (len > maxlen)
					p.m_Position += d * ((len - maxlen) / len);
			}
		}
		// collide相关的代码需要更多的逻辑维护,这里简略
		//if (hd.colliderIndex >= 0)
		//{
		//	SphereCollider sphereCollider = colliders[hd.colliderIndex];
		//	if (sphereCollider.h <= 0)
		//	{
		//		if (sphereCollider.m_Bound == (int)DynamicBoneColliderBase.Bound.Outside)
		//			DynamicBoneCollider.OutsideSphere(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.radius);
		//		else
		//			DynamicBoneCollider.InsideSphere(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.radius);
		//	}
		//	else
		//	{
		//		if (sphereCollider.m_Bound == (int)DynamicBoneColliderBase.Bound.Outside)
		//			DynamicBoneCollider.OutsideCapsule(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.c1, sphereCollider.radius);
		//		else
		//			DynamicBoneCollider.InsideCapsule(ref p.m_Position, p.m_Radius, sphereCollider.c0, sphereCollider.c1, sphereCollider.radius);
		//	}
		//}
		// keep length
		Vector3 dd = p0.m_Position - p.m_Position;
		float leng = dd.magnitude;
		if (leng > 0)
			p.m_Position += dd * ((leng - restLen) / leng);
		particles[hd.particleIndex + idx] = p;
	}
}

public struct UpdateParticle3Job : IJobParallelFor
{
	[ReadOnly]
	public NativeArray<Head> heads;
	public NativeArray<DynamicBoneFast.Particle> particles;
	public void Execute(int i)
	{
		Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
		int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
		if (idx <= 0 || idx >= hd.particleLength)
		{
			return;
		}
		DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
		DynamicBoneFast.Particle p0 = particles[hd.particleIndex + p.m_ParentIndex];
		if (p0.m_ChildCount <= 1)        // do not modify bone orientation if has more then one child
		{
			Vector3 v;
			if (idx < hd.particleLength - 1)
				v = p.m_tmpLocalPos;
			else
				v = p.m_EndOffset;
			Vector3 v2 = p.m_Position - p0.m_Position;
			Matrix4x4 m0 = Matrix4x4.TRS(p0.m_tmpWorldPos, p0.m_tmpWorldRot, Vector3.one);
			Quaternion rot = Quaternion.FromToRotation(m0.MultiplyVector(v), v2);
			p0.m_tmpWorldRot = rot * p0.m_tmpWorldRot;
		}
		particles[hd.particleIndex + p.m_ParentIndex] = p0;
	}
}

public struct ApplyParticleToTransformJob : IJobParallelForTransform
{
	[ReadOnly]
	public NativeArray<Head> heads;
	[ReadOnly]
	public NativeArray<DynamicBoneFast.Particle> particles;
	public void Execute(int i, TransformAccess transform)
	{
		Head hd = heads[i / SINGLE_HEAD_PARTICLE_SIZE];
		int idx = i % SINGLE_HEAD_PARTICLE_SIZE;
		Debug.Assert(idx < hd.particleLength);
		DynamicBoneFast.Particle p = particles[hd.particleIndex + idx];
		transform.position = p.m_Position;
		transform.rotation = p.m_tmpWorldRot;
	}
}
  1. 当obj之间有父子关系时,实际运行时会发生Transform相互等待,导致Job分配不均衡,这是可以在运行时用脚本将所有对象平铺
foreach (var obj in objs)
    obj.SetParent(null, true);

实际测试100个对象,平铺后的Job分配
在这里插入图片描述
7. 使用 BrustCompile 编译选项优化,需要通过 Unity Package Manager 安装 Entity 包,并修改代码属性

[BurstCompile(CompileSynchronously = true)]
struct PrepareParticleJob : IJobParallelForTransform
{
	...
}

[BurstCompile(CompileSynchronously = true)]
public struct UpdateParticle1Job : IJobParallelFor
{
	...
}

[BurstCompile(CompileSynchronously = true)]
public struct UpdateParticle2Job : IJobParallelFor
{
	...
}

[BurstCompile(CompileSynchronously = true)]
public struct UpdateParticle3Job : IJobParallelFor
{
	...
}

[BurstCompile(CompileSynchronously = true)]
public struct ApplyParticleToTransformJob : IJobParallelForTransform
{
	...
}

最终测试结果
在这里插入图片描述
总结:
在这里插入图片描述
(平台:win7、i5四核3.3GHz、GTX650 2G)

  1. 将数据并行化,会导致原有的效率下降
  2. 采用并行线程分散运算,可再次提升性能
  3. 平铺角色对象,可避免Transform相互依赖,均衡分配Job
  4. 使用Brust编译可大幅度提升性能
  5. 还可以使用Unity.Mathematics进一步优化性能,如float3、float4、float4x4等的SIMD支持
  • 4
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 3
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值