(五)多物体资源绑定
http://github.com/Orillusion/orillusion-webgpu-samples
/src/cubes.ts
/src/cubesOffsets.ts
/src/cubesDynamicOffsets.ts
/src/cubesInstance.ts
demo1:rotatingCube.ts
import basicVert from './shaders/basic.vert.wgsl?raw'
import positionFrag from './shaders/position.frag.wgsl?raw'
import * as cube from './util/cube'
import { getMvpMatrix } from './util/math'
// initialize webgpu device & config canvas context
async function initWebGPU(canvas: HTMLCanvasElement) {
if(!navigator.gpu)
throw new Error('Not Support WebGPU')
const adapter = await navigator.gpu.requestAdapter()
if (!adapter)
throw new Error('No Adapter Found')
const device = await adapter.requestDevice()
const context = canvas.getContext('webgpu') as GPUCanvasContext
const format = navigator.gpu.getPreferredCanvasFormat ? navigator.gpu.getPreferredCanvasFormat() : context.getPreferredFormat(adapter)
const devicePixelRatio = window.devicePixelRatio || 1
canvas.width = canvas.clientWidth * devicePixelRatio
canvas.height = canvas.clientHeight * devicePixelRatio
const size = {width: canvas.width, height: canvas.height}
context.configure({
device, format,
// prevent chrome warning after v102
alphaMode: 'opaque'
})
return {device, context, format, size}
}
// create pipiline & buffers
async function initPipeline(device: GPUDevice, format: GPUTextureFormat, size: {width:number, height:number}) {
const pipeline = await device.createRenderPipelineAsync({
label: 'Basic Pipline',
layout: 'auto',
vertex: {
module: device.createShaderModule({
code: basicVert,
}),
entryPoint: 'main',
buffers: [{
arrayStride: 5 * 4, // 3 position 2 uv,
attributes: [
{
// position
shaderLocation: 0,
offset: 0,
format: 'float32x3',
},
{
// uv
shaderLocation: 1,
offset: 3 * 4,
format: 'float32x2',
}
]
}]
},
fragment: {
module: device.createShaderModule({
code: positionFrag,
}),
entryPoint: 'main',
targets: [
{
format: format
}
]
},
primitive: {
topology: 'triangle-list',
// Culling backfaces pointing away from the camera
cullMode: 'back',
frontFace: 'ccw'
},
// Enable depth testing since we have z-level positions
// Fragment closest to the camera is rendered in front
depthStencil: {
depthWriteEnabled: true,
depthCompare: 'less',
format: 'depth24plus',
}
} as GPURenderPipelineDescriptor)
// create depthTexture for renderPass
const depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
const depthView = depthTexture.createView()
// create vertex buffer
const vertexBuffer = device.createBuffer({
label: 'GPUBuffer store vertex',
size: cube.vertex.byteLength,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
})
device.queue.writeBuffer(vertexBuffer, 0, cube.vertex)
// create a mvp matrix buffer
const mvpBuffer = device.createBuffer({
label: 'GPUBuffer store 4x4 matrix',
size: 4 * 4 * 4, // 4 x 4 x float32
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
})
// create a uniform group for Matrix
const uniformGroup = device.createBindGroup({
label: 'Uniform Group with Matrix',
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer
}
}
]
})
// return all vars
return { pipeline, vertexBuffer, mvpBuffer, uniformGroup, depthTexture, depthView }
}
// create & submit device commands
function draw(
device: GPUDevice,
context: GPUCanvasContext,
pipelineObj: {
pipeline: GPURenderPipeline
vertexBuffer: GPUBuffer
mvpBuffer: GPUBuffer
uniformGroup: GPUBindGroup
depthView: GPUTextureView
}
) {
// start encoder
const commandEncoder = device.createCommandEncoder()
const renderPassDescriptor: GPURenderPassDescriptor = {
colorAttachments: [
{
view: context.getCurrentTexture().createView(),
clearValue: { r: 0, g: 0, b: 0, a: 1.0 },
loadOp: 'clear',
storeOp: 'store'
}
],
depthStencilAttachment: {
view: pipelineObj.depthView,
depthClearValue: 1.0,
depthLoadOp: 'clear',
depthStoreOp: 'store',
}
}
const passEncoder = commandEncoder.beginRenderPass(renderPassDescriptor)
passEncoder.setPipeline(pipelineObj.pipeline)
// set vertex
passEncoder.setVertexBuffer(0, pipelineObj.vertexBuffer)
// set uniformGroup
passEncoder.setBindGroup(0, pipelineObj.uniformGroup)
// draw vertex count of cube
passEncoder.draw(cube.vertexCount)
passEncoder.end()
// webgpu run in a separate process, all the commands will be executed after submit
device.queue.submit([commandEncoder.finish()])
}
async function run(){
const canvas = document.querySelector('canvas')
if (!canvas)
throw new Error('No Canvas')
const {device, context, format, size} = await initWebGPU(canvas)
const pipelineObj = await initPipeline(device, format, size)
// default state
let aspect = size.width/ size.height
const position = {x:0, y:0, z: -5}
const scale = {x:1, y:1, z:1}
const rotation = {x: 0, y: 0, z:0}
// start loop
function frame(){
// rotate by time, and update transform matrix
const now = Date.now() / 1000
rotation.x = Math.sin(now)
rotation.y = Math.cos(now)
const mvpMatrix = getMvpMatrix(aspect, position, rotation, scale)
device.queue.writeBuffer(
pipelineObj.mvpBuffer,
0,
mvpMatrix.buffer
)
// then draw
draw(device, context, pipelineObj)
requestAnimationFrame(frame)
}
frame()
// re-configure context on resize
window.addEventListener('resize', ()=>{
size.width = canvas.width = canvas.clientWidth * devicePixelRatio
size.height = canvas.height = canvas.clientHeight * devicePixelRatio
// don't need to recall context.configure() after v104
// re-create depth texture
pipelineObj.depthTexture.destroy()
pipelineObj.depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
pipelineObj.depthView = pipelineObj.depthTexture.createView()
// update aspect
aspect = size.width/ size.height
})
}
run()
对于一个物体:
创建了一个mvpBuffer,对应的创建了一个bindGroup,来对mvpBuffer进行了绑定。最后在renderPass中通过setBindGroup将这个mvpBuffer传入到Vertex Shader中从而进行了绘制。
demo2:cube.ts
import basicVert from './shaders/basic.vert.wgsl?raw'
import positionFrag from './shaders/position.frag.wgsl?raw'
import * as cube from './util/cube'
import { getMvpMatrix } from './util/math'
// initialize webgpu device & config canvas context
async function initWebGPU(canvas: HTMLCanvasElement) {
if(!navigator.gpu)
throw new Error('Not Support WebGPU')
const adapter = await navigator.gpu.requestAdapter()
if (!adapter)
throw new Error('No Adapter Found')
const device = await adapter.requestDevice()
const context = canvas.getContext('webgpu') as GPUCanvasContext
const format = navigator.gpu.getPreferredCanvasFormat ? navigator.gpu.getPreferredCanvasFormat() : context.getPreferredFormat(adapter)
const devicePixelRatio = window.devicePixelRatio || 1
canvas.width = canvas.clientWidth * devicePixelRatio
canvas.height = canvas.clientHeight * devicePixelRatio
const size = {width: canvas.width, height: canvas.height}
context.configure({
device, format,
// prevent chrome warning after v102
alphaMode: 'opaque'
})
return {device, context, format, size}
}
// create pipiline & buffers
async function initPipeline(device: GPUDevice, format: GPUTextureFormat, size:{width:number, height:number}) {
const pipeline = await device.createRenderPipelineAsync({
label: 'Basic Pipline',
layout: 'auto',
vertex: {
module: device.createShaderModule({
code: basicVert,
}),
entryPoint: 'main',
buffers: [{
arrayStride: 5 * 4, // 3 position 2 uv,
attributes: [
{
// position
shaderLocation: 0,
offset: 0,
format: 'float32x3',
},
{
// uv
shaderLocation: 1,
offset: 3 * 4,
format: 'float32x2',
}
]
}]
},
fragment: {
module: device.createShaderModule({
code: positionFrag,
}),
entryPoint: 'main',
targets: [
{
format: format
}
]
},
primitive: {
topology: 'triangle-list',
// Culling backfaces pointing away from the camera
cullMode: 'back'
},
// Enable depth testing since we have z-level positions
// Fragment closest to the camera is rendered in front
depthStencil: {
depthWriteEnabled: true,
depthCompare: 'less',
format: 'depth24plus',
}
} as GPURenderPipelineDescriptor)
// create depthTexture for renderPass
const depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
const depthView = depthTexture.createView()
// create vertex buffer
const vertexBuffer = device.createBuffer({
label: 'GPUBuffer store vertex',
size: cube.vertex.byteLength,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
})
device.queue.writeBuffer(vertexBuffer, 0, cube.vertex)
// create a 4x4 mvp matrix1
const mvpBuffer1 = device.createBuffer({
label: 'GPUBuffer store 4x4 matrix1',
size: 4 * 4 * 4, // 4 x 4 x float32
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
})
// create a uniform group for Matrix2
const group1 = device.createBindGroup({
label: 'Uniform Group with matrix1',
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer1
}
}
]
})
// create a 4x4 mvp matrix2
const mvpBuffer2 = device.createBuffer({
label: 'GPUBuffer store 4x4 matrix2',
size: 4 * 4 * 4, // 4 x 4 x float32
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
})
// create a uniform group for Matrix2
const group2 = device.createBindGroup({
label: 'Uniform Group with matrix2',
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer2
}
}
]
})
// return all vars
return {pipeline, depthTexture, depthView, vertexBuffer, mvpBuffer1, mvpBuffer2, group1, group2}
}
// create & submit device commands
function draw(
device: GPUDevice,
context: GPUCanvasContext,
pipelineObj: {
pipeline: GPURenderPipeline,
vertexBuffer: GPUBuffer,
mvpBuffer1: GPUBuffer,
mvpBuffer2: GPUBuffer,
group1: GPUBindGroup,
group2: GPUBindGroup,
depthView: GPUTextureView
}
) {
// start encoder
const commandEncoder = device.createCommandEncoder()
const renderPassDescriptor: GPURenderPassDescriptor = {
colorAttachments: [
{
view: context.getCurrentTexture().createView(),
clearValue: { r: 0, g: 0, b: 0, a: 1.0 },
loadOp: 'clear',
storeOp: 'store'
}
],
depthStencilAttachment: {
view: pipelineObj.depthView,
depthClearValue: 1.0,
depthLoadOp: 'clear',
depthStoreOp: 'store',
}
}
const passEncoder = commandEncoder.beginRenderPass(renderPassDescriptor)
passEncoder.setPipeline(pipelineObj.pipeline)
// set vertex
passEncoder.setVertexBuffer(0, pipelineObj.vertexBuffer)
{
// draw first cube
passEncoder.setBindGroup(0, pipelineObj.group1)
passEncoder.draw(cube.vertexCount)
// draw second cube
passEncoder.setBindGroup(0, pipelineObj.group2)
passEncoder.draw(cube.vertexCount)
}
passEncoder.end()
// webgpu run in a separate process, all the commands will be executed after submit
device.queue.submit([commandEncoder.finish()])
}
async function run(){
const canvas = document.querySelector('canvas')
if (!canvas)
throw new Error('No Canvas')
const {device, context, format, size} = await initWebGPU(canvas)
const pipelineObj = await initPipeline(device, format, size)
// defaut state
let aspect = size.width/ size.height
const position1 = {x:2, y:0, z: -8}
const rotation1 = {x: 0, y: 0, z:0}
const scale1 = {x:1, y:1, z: 1}
const position2 = {x:-2, y:0, z: -8}
const rotation2 = {x: 0, y: 0, z:0}
const scale2 = {x:1, y:1, z: 1}
// start loop
function frame(){
// first, update two transform matrixs
const now = Date.now() / 1000
{
// first cube
rotation1.x = Math.sin(now)
rotation1.y = Math.cos(now)
const mvpMatrix1 = getMvpMatrix(aspect, position1, rotation1, scale1)
device.queue.writeBuffer(
pipelineObj.mvpBuffer1,
0,
mvpMatrix1
)
}
{
// second cube
rotation2.x = Math.cos(now)
rotation2.y = Math.sin(now)
const mvpMatrix2 = getMvpMatrix(aspect, position2, rotation2, scale2)
device.queue.writeBuffer(
pipelineObj.mvpBuffer2,
0,
mvpMatrix2
)
}
// then draw
draw(device, context, pipelineObj)
requestAnimationFrame(frame)
}
frame()
// re-configure context on resize
window.addEventListener('resize', ()=>{
size.width = canvas.width = canvas.clientWidth * devicePixelRatio
size.height = canvas.height = canvas.clientHeight * devicePixelRatio
// don't need to recall context.configure() after v104
// re-create depth texture
pipelineObj.depthTexture.destroy()
pipelineObj.depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
pipelineObj.depthView = pipelineObj.depthTexture.createView()
// update aspect
aspect = size.width/ size.height
})
}
run()
两个物体,重复了两次创建一个物体的方法:
在initPipeline中,创建了两个mvpBuffer(大小一样),对应的创建了两个group(结构完全一样),最后将它们返回出来。
相应的,在用户逻辑代码【function run()】里模拟了两个物体的position、rotation和scale,在循环中分别计算他们对应的MVP矩阵,写入到对应的mvpBuffer里。为了代码简洁,将计算MVP的代码单独打包到了getMVPMatrix这个函数中。
在draw代码中,因为这两个cube用的是同一个顶点数据,所以只需要设置一次setVertexBuffer,然后依次设置每个物体对应的bindGroup,然后再重复draw两遍。
为什么这样就可以显示两个不同状态的cube呢?
在Vertex shader中物体的最终状态只和输入的顶点数据和传入的mvpMatrix相关。当我们重新设置不同的bindGroup的时候,相当于切换了不同的MvpMatrix的输入。管线再次运行的时候,自然就会绘制出不同矩阵对应的状态。
实际操作场景中如果绘制的模型不同,我们可以切换不同的vertexBuffer,比如vertexBuffer1和vertexBuffer2。如果连管线也不同,当然也可以切换对应的pipeline1和pipeline2。
API对管线的配置或数据进行切换仍然是具有比较大的开销的,而且对性能的影响效果是不一样的
消耗最大的是setPipeline。因为它涉及到切换整个vertex shader和fragment shader,还有深度测试、图形组装、颜色混合等相关配置。所以尽量复用pipeline。
其次是setVertexBuffer。因为它会根据管线配置要去识别和转换顶点数据。shader内部还要对应的生成一些局部变量。
最小的是setBindGroup。它本身作为外部或者全局变量的引入,对管线本身的配置没有影响。大多数情况下只是内存指针的拷贝或者重新定向而已,是一种效率很高的操作。
所以,绘制一个管线的时候,最好先把符合管线相关的所有物体都画完,在切换下一个。同理,在绘制一个模型顶点的时候,尽量先把相关的物体都先绘制完,在切换下一个顶点模型进行绘制。对于bindGroup,它的内部其实对应着不同的buffer,虽然切换效率很高,但是本质上每次切换都是需要内存指针做重新定位寻址,所以也要遵循同样的道理,也应该尽量复用。
demo3:cubesOffsets.ts
import basicVert from './shaders/basic.vert.wgsl?raw'
import positionFrag from './shaders/position.frag.wgsl?raw'
import * as cube from './util/cube'
import { getMvpMatrix } from './util/math'
// initialize webgpu device & config canvas context
async function initWebGPU(canvas: HTMLCanvasElement) {
if(!navigator.gpu)
throw new Error('Not Support WebGPU')
const adapter = await navigator.gpu.requestAdapter()
if (!adapter)
throw new Error('No Adapter Found')
const device = await adapter.requestDevice()
const context = canvas.getContext('webgpu') as GPUCanvasContext
const format = navigator.gpu.getPreferredCanvasFormat ? navigator.gpu.getPreferredCanvasFormat() : context.getPreferredFormat(adapter)
const devicePixelRatio = window.devicePixelRatio || 1
canvas.width = canvas.clientWidth * devicePixelRatio
canvas.height = canvas.clientHeight * devicePixelRatio
const size = {width: canvas.width, height: canvas.height}
context.configure({
device, format,
// prevent chrome warning after v102
alphaMode: 'opaque'
})
return {device, context, format, size}
}
// create pipiline & buffers
async function initPipeline(device: GPUDevice, format: GPUTextureFormat, size:{width:number, height:number}) {
const pipeline = await device.createRenderPipelineAsync({
label: 'Basic Pipline',
layout: 'auto',
vertex: {
module: device.createShaderModule({
code: basicVert,
}),
entryPoint: 'main',
buffers: [{
arrayStride: 5 * 4, // 3 position 2 uv,
attributes: [
{
// position
shaderLocation: 0,
offset: 0,
format: 'float32x3',
},
{
// uv
shaderLocation: 1,
offset: 3 * 4,
format: 'float32x2',
}
]
}]
},
fragment: {
module: device.createShaderModule({
code: positionFrag,
}),
entryPoint: 'main',
targets: [
{
format: format
}
]
},
primitive: {
topology: 'triangle-list',
// Culling backfaces pointing away from the camera
cullMode: 'back'
},
// Enable depth testing since we have z-level positions
// Fragment closest to the camera is rendered in front
depthStencil: {
depthWriteEnabled: true,
depthCompare: 'less',
format: 'depth24plus',
}
} as GPURenderPipelineDescriptor)
// create depthTexture for renderPass
const depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
const depthView = depthTexture.createView()
// create vertex buffer
const vertexBuffer = device.createBuffer({
label: 'GPUBuffer store vertex',
size: cube.vertex.byteLength,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
})
device.queue.writeBuffer(vertexBuffer, 0, cube.vertex)
// create a buffer for 2 mvp matrix
const mvpBuffer = device.createBuffer({
label: 'GPUBuffer store 2 4*4 matrix',
size: 256 * 2, // 2 matrix with 256-byte aligned, or 256 + 64
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
})
//create two groups with different offset for matrix3
const group1 = device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer,
offset: 0,
size: 4 * 16
}
}
]
})
// group with 256-byte offset
const group2 = device.createBindGroup({
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer,
offset: 256, // must be 256-byte aligned
size: 4 * 16
}
}
]
})
// return all vars
return {pipeline, vertexBuffer, mvpBuffer, group1, group2, depthTexture, depthView}
}
// create & submit device commands
function draw(
device: GPUDevice,
context: GPUCanvasContext,
pipelineObj: {
pipeline: GPURenderPipeline,
vertexBuffer: GPUBuffer,
mvpBuffer: GPUBuffer,
group1: GPUBindGroup,
group2: GPUBindGroup,
depthView: GPUTextureView
}
) {
// start encoder
const commandEncoder = device.createCommandEncoder()
const renderPassDescriptor: GPURenderPassDescriptor = {
colorAttachments: [
{
view: context.getCurrentTexture().createView(),
clearValue: { r: 0, g: 0, b: 0, a: 1.0 },
loadOp: 'clear',
storeOp: 'store'
}
],
depthStencilAttachment: {
view: pipelineObj.depthView,
depthClearValue: 1.0,
depthLoadOp: 'clear',
depthStoreOp: 'store',
}
}
const passEncoder = commandEncoder.beginRenderPass(renderPassDescriptor)
passEncoder.setPipeline(pipelineObj.pipeline)
// set vertex
passEncoder.setVertexBuffer(0, pipelineObj.vertexBuffer)
{
// draw first cube
passEncoder.setBindGroup(0, pipelineObj.group1)
passEncoder.draw(cube.vertexCount)
// draw second cube
passEncoder.setBindGroup(0, pipelineObj.group2)
passEncoder.draw(cube.vertexCount)
}
passEncoder.end()
// webgpu run in a separate process, all the commands will be executed after submit
device.queue.submit([commandEncoder.finish()])
}
async function run(){
const canvas = document.querySelector('canvas')
if (!canvas)
throw new Error('No Canvas')
const {device, context, format, size} = await initWebGPU(canvas)
const pipelineObj = await initPipeline(device, format, size)
// defaut state
let aspect = size.width/ size.height
const position1 = {x:2, y:0, z: -8}
const rotation1 = {x: 0, y: 0, z:0}
const scale1 = {x:1, y:1, z: 1}
const position2 = {x:-2, y:0, z: -8}
const rotation2 = {x: 0, y: 0, z:0}
const scale2 = {x:1, y:1, z: 1}
// start loop
function frame(){
// first, update two transform matrixs
const now = Date.now() / 1000
{
// first cube
rotation1.x = Math.sin(now)
rotation1.y = Math.cos(now)
const mvpMatrix1 = getMvpMatrix(aspect, position1, rotation1, scale1)
device.queue.writeBuffer(
pipelineObj.mvpBuffer,
0,
mvpMatrix1
)
}
{
// second cube with 256-byte offset
rotation2.x = Math.cos(now)
rotation2.y = Math.sin(now)
const mvpMatrix2 = getMvpMatrix(aspect, position2, rotation2, scale2)
device.queue.writeBuffer(
pipelineObj.mvpBuffer,
256, // aligned at 256-byte
mvpMatrix2
)
}
draw(device, context, pipelineObj)
requestAnimationFrame(frame)
}
frame()
// re-configure context on resize
window.addEventListener('resize', ()=>{
size.width = canvas.width = canvas.clientWidth * devicePixelRatio
size.height = canvas.height = canvas.clientHeight * devicePixelRatio
// don't need to recall context.configure() after v104
// re-create depth texture
pipelineObj.depthTexture.destroy()
pipelineObj.depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
pipelineObj.depthView = pipelineObj.depthTexture.createView()
// update aspect
aspect = size.width/ size.height
})
}
run()
没有设置两个mvpBuffer,而是改用1个大的buffer来存储2个4*4的矩阵。还是会创建2个group,但不同的是他们绑定的是同一个mvpBuffer。通过设置offset和size来读取相应的buffer内容,也就是这两个group的指针是定位在一个buffer的不同位置。所以当我们在renderPass中执行setBindGroup的时候,GPU不需要重新定位mvpBuffer的位置,只需要根据group中不同的offset来直接移动指针即可。
和上一个的区别:将多个buffer合并成了一个连续的buffer,虽然创建group的开销和复杂度没有变,只是指向了同一个buffer的不同位置。又因为WebGPU对对齐的限制,实际上使用的内存也增加了。以4*4的MVP矩阵来说,实际上我们要消耗多余4倍的显存。如果是color这种1*4的小数据来说,则实际要消耗16倍的显存。对于JS写入GPU数据的时候,我们也需要进行256的对齐,相对来讲JS的复杂度也高了一些。
Chrome自带的帧率显示:Console——Rendering——Frame Rendering Stats
这种setBindGroup方式需要注意
(size:256*2 or 256+64)多个group绑定同一个buffer的形式,需要给每个group之间留有一定的对齐空间。可以通过adaptor中的limits查询具体的数值。目前版本的WebGPU,uniform buffer和storage buffer的最小限制都是256。所以虽然一个4*4的矩阵只有64个字节,但是我们创建的mvpBuffer要留足对齐空间,最小也是256个字节,最后一个可以不是256,因为不需要给后面的group留空间。
(offset:0,256)如果后面还有其他的group,都是以256个字节做对齐进行累加。
(device.queue.writeBuffer…256…)对应的,写入数据的时候也必须按256个字节来做对齐写入。
demo4:cubesDynamicOffsets.ts
import basicVert from './shaders/basic.vert.wgsl?raw'
import positionFrag from './shaders/position.frag.wgsl?raw'
import * as cube from './util/cube'
import { getMvpMatrix } from './util/math'
// initialize webgpu device & config canvas context
async function initWebGPU(canvas: HTMLCanvasElement) {
if(!navigator.gpu)
throw new Error('Not Support WebGPU')
const adapter = await navigator.gpu.requestAdapter()
if (!adapter)
throw new Error('No Adapter Found')
const device = await adapter.requestDevice()
const context = canvas.getContext('webgpu') as GPUCanvasContext
const format = navigator.gpu.getPreferredCanvasFormat ? navigator.gpu.getPreferredCanvasFormat() : context.getPreferredFormat(adapter)
const devicePixelRatio = window.devicePixelRatio || 1
canvas.width = canvas.clientWidth * devicePixelRatio
canvas.height = canvas.clientHeight * devicePixelRatio
const size = {width: canvas.width, height: canvas.height}
context.configure({
device, format,
// prevent chrome warning after v102
alphaMode: 'opaque'
})
return {device, context, format, size}
}
// create pipiline & buffers
async function initPipeline(device: GPUDevice, format: GPUTextureFormat, size:{width:number, height:number}) {
// create group layout for dynamicOffset
const dynamicBindGroupLayout = device.createBindGroupLayout({
entries: [
{
binding: 0,
visibility: GPUShaderStage.VERTEX,
buffer: {
type: 'uniform',
hasDynamicOffset: true,
minBindingSize: 0
}
}
]
})
// create pipline layout for dynamicOffset
const dynamicPipelineLayout = device.createPipelineLayout({
bindGroupLayouts: [dynamicBindGroupLayout]
});
const pipeline = await device.createRenderPipelineAsync({
label: 'Basic Pipline',
layout: dynamicPipelineLayout,
vertex: {
module: device.createShaderModule({
code: basicVert,
}),
entryPoint: 'main',
buffers: [{
arrayStride: 5 * 4, // 3 position 2 uv,
attributes: [
{
// position
shaderLocation: 0,
offset: 0,
format: 'float32x3',
},
{
// uv
shaderLocation: 1,
offset: 3 * 4,
format: 'float32x2',
}
]
}]
},
fragment: {
module: device.createShaderModule({
code: positionFrag,
}),
entryPoint: 'main',
targets: [
{
format: format
}
]
},
primitive: {
topology: 'triangle-list',
// Culling backfaces pointing away from the camera
cullMode: 'back'
},
// Enable depth testing since we have z-level positions
// Fragment closest to the camera is rendered in front
depthStencil: {
depthWriteEnabled: true,
depthCompare: 'less',
format: 'depth24plus',
}
} as GPURenderPipelineDescriptor)
// create depthTexture for renderPass
const depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
const depthView = depthTexture.createView()
// create vertex buffer
const vertexBuffer = device.createBuffer({
label: 'GPUBuffer store vertex',
size: cube.vertex.byteLength,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
})
device.queue.writeBuffer(vertexBuffer, 0, cube.vertex)
// create a buffer with 2 mvp matrix
const mvpBuffer = device.createBuffer({
label: 'GPUBuffer store 2 4*4 matrix',
size: 256 * 2, // 2 matrix with 256-byte aligned, or 256 + 64
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
})
// create a uniform group with dynamicOffsets
const group = device.createBindGroup({
layout: dynamicBindGroupLayout,
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer,
size: 4 * 16
}
}
]
})
// return all vars
return {pipeline, vertexBuffer, mvpBuffer, group, depthTexture, depthView}
}
// create & submit device commands
function draw(
device: GPUDevice,
context: GPUCanvasContext,
pipelineObj: {
pipeline: GPURenderPipeline,
vertexBuffer: GPUBuffer,
mvpBuffer: GPUBuffer,
group: GPUBindGroup,
depthView: GPUTextureView
}
) {
// start encoder
const commandEncoder = device.createCommandEncoder()
const renderPassDescriptor: GPURenderPassDescriptor = {
colorAttachments: [
{
view: context.getCurrentTexture().createView(),
clearValue: { r: 0, g: 0, b: 0, a: 1.0 },
loadOp: 'clear',
storeOp: 'store'
}
],
depthStencilAttachment: {
view: pipelineObj.depthView,
depthClearValue: 1.0,
depthLoadOp: 'clear',
depthStoreOp: 'store',
}
}
const passEncoder = commandEncoder.beginRenderPass(renderPassDescriptor)
passEncoder.setPipeline(pipelineObj.pipeline)
// set vertex
passEncoder.setVertexBuffer(0, pipelineObj.vertexBuffer)
const offset = new Uint32Array([0, 256])
{
// draw first cube with dynamicOffset 0
passEncoder.setBindGroup(0, pipelineObj.group, offset, 0, 1)
passEncoder.draw(cube.vertexCount)
// draw second cube with dynamicOffset 256
passEncoder.setBindGroup(0, pipelineObj.group, offset, 1, 1)
passEncoder.draw(cube.vertexCount)
}
passEncoder.end()
// webgpu run in a separate process, all the commands will be executed after submit
device.queue.submit([commandEncoder.finish()])
}
async function run(){
const canvas = document.querySelector('canvas')
if (!canvas)
throw new Error('No Canvas')
const {device, context, format, size} = await initWebGPU(canvas)
const pipelineObj = await initPipeline(device, format, size)
// defaut state
let aspect = size.width/ size.height
const position1 = {x:2, y:0, z: -8}
const rotation1 = {x: 0, y: 0, z:0}
const scale1 = {x:1, y:1, z: 1}
const position2 = {x:-2, y:0, z: -8}
const rotation2 = {x: 0, y: 0, z:0}
const scale2 = {x:1, y:1, z: 1}
// start loop
function frame(){
// first, update two transform matrixs
const now = Date.now() / 1000
{
// first cube
rotation1.x = Math.sin(now)
rotation1.y = Math.cos(now)
const mvpMatrix1 = getMvpMatrix(aspect, position1, rotation1, scale1)
device.queue.writeBuffer(
pipelineObj.mvpBuffer,
0,
mvpMatrix1
)
}
{
// second cube
rotation2.x = Math.cos(now)
rotation2.y = Math.sin(now)
const mvpMatrix2 = getMvpMatrix(aspect, position2, rotation2, scale2)
device.queue.writeBuffer(
pipelineObj.mvpBuffer,
256,
mvpMatrix2
)
}
// then draw
draw(device, context, pipelineObj)
requestAnimationFrame(frame)
}
frame()
// re-configure context on resize
window.addEventListener('resize', ()=>{
size.width = canvas.width = canvas.clientWidth * devicePixelRatio
size.height = canvas.height = canvas.clientHeight * devicePixelRatio
// don't need to recall context.configure() after v104
// re-create depth texture
pipelineObj.depthTexture.destroy()
pipelineObj.depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
pipelineObj.depthView = pipelineObj.depthTexture.createView()
// update aspect
aspect = size.width/ size.height
})
}
run()
同样使用一个连续的buffer来存储MVP矩阵,也是256对齐,但只需要创建一个group,不需要设置offset,在稍后来动态的设置offset来进行数据的切换,主要用到group的dynamicOffset特性,该特性默认是关闭的。所以需要手动创建一个groupLayout,手动的设置要绑定的buffer信息,比如绑定的位置是传给vertex shader还是fragment shader,buffer类型是uniform还是storage,是否支持dynamicOffset。
对应的,也需要更改pipeline默认的group的配置,也需要动态的创建一个管线的layout,创建管线的时候要去覆盖默认的layout的配置。这样在renderPass中setBindGroup的时候,可以统一的设置group,但需要增加一个(array)参数,用来动态的设置offset进行数据切换,以256对齐来增加这个offset。
跟第二个例子的区别在于一个提前规定好group的offset,一个是在renderPass中动态的设定offset。这种形式对group的管理更简单一些,减少了创建group的开销,操作也更灵活,但需要额外的设置group和pipeline的layout。对于buffer切换的开销来说与demo2没有区别,内存还是存在浪费,而且因为调用JS API会涉及CPU,综合性能降低,反而会比demo2慢很多。它的优势主要体现在减少创建group的开销,如果有频繁需要创建group的这种场景,这种方式带来的优势会更大一些。
demo5:cubesInstance.ts
import basicInstanced from './shaders/basic.instanced.vert.wgsl?raw'
import positionFrag from './shaders/position.frag.wgsl?raw'
import * as cube from './util/cube'
import { getMvpMatrix } from './util/math'
// initialize webgpu device & config canvas context
async function initWebGPU(canvas: HTMLCanvasElement) {
if(!navigator.gpu)
throw new Error('Not Support WebGPU')
const adapter = await navigator.gpu.requestAdapter()
if (!adapter)
throw new Error('No Adapter Found')
const device = await adapter.requestDevice()
const context = canvas.getContext('webgpu') as GPUCanvasContext
const format = navigator.gpu.getPreferredCanvasFormat ? navigator.gpu.getPreferredCanvasFormat() : context.getPreferredFormat(adapter)
const devicePixelRatio = window.devicePixelRatio || 1
canvas.width = canvas.clientWidth * devicePixelRatio
canvas.height = canvas.clientHeight * devicePixelRatio
const size = {width: canvas.width, height: canvas.height}
context.configure({
device, format,
// prevent chrome warning after v102
alphaMode: 'opaque'
})
return {device, context, format, size}
}
// create pipiline & buffers
async function initPipeline(device: GPUDevice, format: GPUTextureFormat, size:{width:number, height:number}) {
const pipeline = await device.createRenderPipelineAsync({
label: 'Basic Pipline',
layout: 'auto',
vertex: {
module: device.createShaderModule({
code: basicInstanced,
}),
entryPoint: 'main',
buffers: [{
arrayStride: 5 * 4, // 3 position 2 uv,
attributes: [
{
// position
shaderLocation: 0,
offset: 0,
format: 'float32x3',
},
{
// uv
shaderLocation: 1,
offset: 3 * 4,
format: 'float32x2',
}
]
}]
},
fragment: {
module: device.createShaderModule({
code: positionFrag,
}),
entryPoint: 'main',
targets: [
{
format: format
}
]
},
primitive: {
topology: 'triangle-list',
// Culling backfaces pointing away from the camera
cullMode: 'back'
},
// Enable depth testing since we have z-level positions
// Fragment closest to the camera is rendered in front
depthStencil: {
depthWriteEnabled: true,
depthCompare: 'less',
format: 'depth24plus',
}
} as GPURenderPipelineDescriptor)
// create depthTexture for renderPass
const depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
const depthView = depthTexture.createView()
// create vertex buffer
const vertexBuffer = device.createBuffer({
label: 'GPUBuffer store vertex',
size: cube.vertex.byteLength,
usage: GPUBufferUsage.VERTEX | GPUBufferUsage.COPY_DST,
})
device.queue.writeBuffer(vertexBuffer, 0, cube.vertex)
// create a 4x4xNUM STORAGE buffer to store matrix
const mvpBuffer = device.createBuffer({
label: 'GPUBuffer store n*4x4 matrix',
size: 4 * 4 * 4 * NUM, // 4 x 4 x float32 x NUM
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_DST
})
// create a uniform group for Matrix
const group = device.createBindGroup({
label: 'Uniform Group with matrix',
layout: pipeline.getBindGroupLayout(0),
entries: [
{
binding: 0,
resource: {
buffer: mvpBuffer
}
}
]
})
// return all vars
return {pipeline, vertexBuffer, mvpBuffer, group, depthTexture, depthView}
}
// create & submit device commands
function draw(
device: GPUDevice,
context: GPUCanvasContext,
pipelineObj: {
pipeline: GPURenderPipeline,
vertexBuffer: GPUBuffer,
mvpBuffer: GPUBuffer,
group: GPUBindGroup,
depthView: GPUTextureView
}
) {
const commandEncoder = device.createCommandEncoder()
const renderPassDescriptor: GPURenderPassDescriptor = {
colorAttachments: [
{
view: context.getCurrentTexture().createView(),
clearValue: { r: 0, g: 0, b: 0, a: 1.0 },
loadOp: 'clear',
storeOp: 'store'
}
],
depthStencilAttachment: {
view: pipelineObj.depthView,
depthClearValue: 1.0,
depthLoadOp: 'clear',
depthStoreOp: 'store',
}
}
const passEncoder = commandEncoder.beginRenderPass(renderPassDescriptor)
passEncoder.setPipeline(pipelineObj.pipeline)
// set vertex
passEncoder.setVertexBuffer(0, pipelineObj.vertexBuffer)
{
// draw NUM cubes in one draw()
passEncoder.setBindGroup(0, pipelineObj.group)
passEncoder.draw(cube.vertexCount, NUM)
}
passEncoder.end()
// webgpu run in a separate process, all the commands will be executed after submit
device.queue.submit([commandEncoder.finish()])
}
// total objects
const NUM = 500 // 10000
async function run(){
const canvas = document.querySelector('canvas')
if (!canvas)
throw new Error('No Canvas')
const {device, context, format, size} = await initWebGPU(canvas)
const pipelineObj = await initPipeline(device, format, size)
// create objects
let aspect = size.width / size.height
const scene:any[] = []
const mvpBuffer = new Float32Array(NUM * 4 * 4)
for(let i = 0; i < NUM; i++){
// craete simple object
const position = {x: Math.random() * 40 - 20, y: Math.random() * 40 - 20, z: - 50 - Math.random() * 50}
const rotation = {x: 0, y: 0, z: 0}
const scale = {x:1, y:1, z:1}
scene.push({position, rotation, scale})
}
// start loop
function frame(){
// update rotation for each object
for(let i = 0; i < scene.length - 1; i++){
const obj = scene[i]
const now = Date.now() / 1000
obj.rotation.x = Math.sin(now + i)
obj.rotation.y = Math.cos(now + i)
const mvpMatrix = getMvpMatrix(aspect, obj.position, obj.rotation, obj.scale)
// update buffer based on offset
// device.queue.writeBuffer(
// pipelineObj.mvpBuffer,
// i * 4 * 4 * 4, // offset for each object, no need to 256-byte aligned
// mvpMatrix
// )
// or save to mvpBuffer first
mvpBuffer.set(mvpMatrix, i * 4 * 4)
}
// the better way is update buffer in one write after loop
device.queue.writeBuffer(pipelineObj.mvpBuffer, 0, mvpBuffer)
draw(device, context, pipelineObj)
requestAnimationFrame(frame)
}
frame()
// re-configure context on resize
window.addEventListener('resize', ()=>{
size.width = canvas.width = canvas.clientWidth * devicePixelRatio
size.height = canvas.height = canvas.clientHeight * devicePixelRatio
// don't need to recall context.configure() after v104
// re-create depth texture
pipelineObj.depthTexture.destroy()
pipelineObj.depthTexture = device.createTexture({
size, format: 'depth24plus',
usage: GPUTextureUsage.RENDER_ATTACHMENT,
})
pipelineObj.depthView = pipelineObj.depthTexture.createView()
// update aspect
aspect = size.width/ size.height
})
}
run()
只创建了1个mvpBuffer和1个group。跟demo3不同的是这个mvpBuffer并没有使用256的对齐,而是直接根据matrix的数量生成对应的大小(size:4*4*4*NUM)。
buffer设置成为storage buffer。因为uniform buffer最大只有64k,最多存放1000个matrix,而storage buffer默认可以申请128M,可以存放超过200万个matrix。一般情况下都推荐使用storage buffer。
对应的group也不需要设定任何offset,直接进行绑定即可。MVP计算的过程跟之前没有区别,除了因为数量过多增加了for循环,自动化地生成对应NUM数量的cube。每一帧的动画中,我们也遍历场景中的所有物体,动态改变他们的rotation并生成对应的mvp矩阵,再将它们写入大的mvpBuffer里。因为不是256对齐,所以每个matrix紧接着上一个写入即可。
在draw代码中,我们只设置了1次setBindGroup,进行了一次draw,甚至没有使用for循环,而是在draw中添加了第二个参数——NUM,对应场景中的物体。passEncoder.draw()的第一个参数vertexCount是开多少个并行线程去执行vertex Shader,第二个参数instanceCount是如果把第一个参数的数量当成一个组,要同时启动几组去运行,默认是1。比如这个场景vertexCount是36,默认是开36个线程,作为1个组只运行1次。如果写成100,同时开启100组去运行,整个管线要被运行3600次。通俗来讲,普通draw如果画100个cube,要按顺序循环画100次(先画一个顶点组装上色等,再画下一个顶点组装上色等)。对于单个物体GPU会多线程并行绘制,但对于整个场景来说,仍然是按照顺序单线程进行的,这种形式很难充分的调动GPU的并行能力。而instance draw则是同时画100个cube的所有顶点,再同时对这100个物体的所有顶点进行组装上色等环节,相当于所有物体是并行绘制的,所以管线的整体运行效率将大大的提升。
与资源切换的关系
这种方式下,因为这100cube都变成了并行关系,它们之间绑定的是同一份资源,所以这里我们也没有办法进行切换group,没有办法进行管理buffer或offset。在shader代码中,在之前无论是哪一种group方式,vertexshader中传入的MVPMatrix都是一个4*4的矩阵。矩阵的内容其实是靠JS来切换管理的,但在demo5中,我们没有办法在JS中进行切换控制,而是直接把整个mvpBuffer都传到了vertex shader里。
我们可以利用shader来进行处理,把basic.instanced.vert.wgsl中改为mvpMatrix : array<mat4x4<f32>>
,意思为shader会按mat4x4的大小来自动切分这个大的storage buffer,得益于<storage>
的特性,我们可以放入一个很大的array,并且不需要事先声明array的长度。
如何在shader中获得单个的MVP
vertex shader除了传入外部的顶点数据之外,本身还有很多自带的内部参数。在之前的三角形的vertex shader(trangle.vert.wgsl),@builtin(vertex_index)对应了vertexCount的第几个顶点数据。
类似的,instance draw(basic.instanced.vert.wgsl)也有一个内部参数@builtin(instance_index),表明当前运行的是第几组实例,实际上这个值对应我们mvpBuffer中的第几个cube。所以这里我们就可以像操作普通Array一样,通过[index]直接访问到大的mvpMatrix中对应的mvp矩阵内容,由此解决了资源定位的问题。
优化
对于JS写入更新GPUBuffer的数据,之前的几个demo都是按照物体的数量独立写入的,前三个只是没用for循环而已。
// 先创建一个一样大小的array
const allMatrix = new Float32Array(NUM * 4 * 4)
// start loop
function frame(){
// update rotation for each object
for(let i = 0; i < scene.length - 1; i++){
const obj = scene[i]
const now = Date.now() / 1000
obj.rotation.x = Math.sin(now + i)
obj.rotation.y = Math.cos(now + i)
const mvpMatrix = getMvpMatrix(aspect, obj.position, obj.rotation, obj.scale)
// update buffer based on offset
// device.queue.writeBuffer(
// pipelineObj.mvpBuffer,
// i * 4 * 4 * 4, // offset for each object, no need to 256-byte aligned
// mvpMatrix
// )
// or save to mvpBuffer first
// 再把数据写入到这个array中
allMatrix.set(mvpMatrix, i * 4 * 4)
}
// the better way is update buffer in one write after loop
// 最后一次性的把JS的内容写入到这个MVPBuffer中
device.queue.writeBuffer(pipelineObj.mvpBuffer, 0, mvpBuffer)
优化后,for循环的时间缩短了,帧率也基本跑满了。之后也可以优化matrix计算。JS中的所有变量也可以都按照这种连续的办法进行优化。因为每次CPU和GPU进行数据交换的时候,都会伴有额外的沟通开销,我们应该尽量减少CPU和GPU的交换次数。理想情况下应该是先在JS或者CPU中准备好所有数据,然后再一次性写入GPU,而不要零散的写入数据。WebGPU也有其他的API的形式来优化这种情况,后续课程会再介绍。
由此可以看到instance buffer(demo5)的另一个优势,如第二第三个例子中的buffer,如果要分别多次写入速度会慢很多,如果统一写入,因为有offset 的原因,本身要比第四种buffer大了4倍甚至16倍,要比第四种方式多写几倍的数据量,那么速度自然也会相对慢很多。缺点是所有的实例都必须使用同一套vertexbuffer和group的信息。group信息可以通过内部的index来进行切换,但顶点信息却没有任何办法进行切换,所以我们一般也只能用于绘制相同顶点数据的物体。instance draw对于拥有大量重复物体的场景是比较适用的,但它并不能适用所有的通用场景。在实践中我们可以结合普通group和这种storage buffer array的形式来优化整体的性能。
总结
instance draw(demo5):最大的优点是渲染管线的效率非常高,能最大限度地利用GPU的并行能力。对于资源的管理本质上是将group和buffer的切换放到了GPU / Shader里来做。JS中省去了创建多个buffer或group,内存上也没有多余的offset的需求。
从buffer的读取性能来说,demo2和demo3虽然也是连续的buffer,但因为要对齐256个字节,其实每次指针切换实际上至少要移动256个字节。demo5在shader里操作index来移动指针,只需要移动64或者16的长度,所以实际上的速度会更快一些。
从API的调用角度来说,普通draw因为要for循环多次调用,如果场景中物体的数量巨大,JS和native API都有很大的开销。而demo5无论场景中物体物体有多少个,我们只需要调用1次setBindGroup和1次draw,大大节省了API调用浪费的时间。
四种资源管理方式对比
【Buffer】 | 【B with Offsets】 | 【DynamicOffsets】 | 【Instance】 | |
---|---|---|---|---|
【Complexity】代码复杂度 | 【Easy】虽然要创建多个buffer和group,但符合对象开发的使用习惯,代码逻辑容易理解 | 【Harder】一个连续buffer的形式对于习惯了使用JS这种离散对象的开发者来说有一定的适应门槛,JS操作过程中还要去注意对齐offset。读取的和写入的操作难度都比较大。但group基本保持了之前的简单逻辑 | 【Hardest】虽然groupd呃形式上简单了许多,但操作难度是最大的。不仅要创建自定义的各种layout,还要对setBindGroup的API进行改动 | 【Easy for JS Harder for VS】JS的操作逻辑非常简单,相当于吧所有的管理都放入了shader中,所以对于vertex shader的编写难度比较大 |
【Flexibility】对buffer的控制的灵活度 | 【Good】 | 【~Good】要对齐256,相对来说有点复杂。设置group的时候要注意对齐位置,相对来说灵活性差一点。 | 【~Good】要对齐256,相对来说有点复杂。设置group的时候要注意对齐位置,相对来说灵活性差一点。 | 【Best for same Vertex】对于相同顶点的物体来说是最好的,对于不同顶点的物体来说就很难控制了,需要引入一些其他的变量或者group来辅助操作。 |
【Read】读取速度 | 【Slow】零散的buffer和group的形式是最慢的 | 【Fast】 | 【Fast but Slow setBindGroup】在JS中执行API的速度相对要慢一些,适合需要动态创建group的场景 | 【Fastest】无论从buffer的角度还是从API调用次数的角度都是最快的,但它适用于相同模型之间的切换 |
【Write】写入速度 | 【Slow】零散的buffer和group的形式是最慢的。除了本身内存指针定位的时间之外,JS和GPU建立数据通道的额外的开销也会非常大 | 【Fast】后三种都使用了连续的buffer。如果我们都使用统一一次性写入数据的模式的话,速度都会非常快。 | 【Fast】但是由于offset带来额外的数据大小,第二种和第三种的写入速度会相对慢一些。 | 【Fastest】 |
【Mem】内存占用 | 【N Buffer N Group】本身没有冗余的数据了,N个对象就是对应N个buffer,但也需要创建N个Group | 【> N × 4 Buffer N Group】对于第二第三种方式,因为有offset需要对齐的需求,实际上需要更多的内存 | 【> N × 4 Buffer 1 Group】 | 【N Buffer 1 Group】对内存最高效的一种利用方式 |
各有利弊,需要根据具体的场景需求来设计我们的资源管理方式。原则上应该尽可能的使用连续的buffer,优先使用instance模型来进行绘制。实践中我们常将这几种方式进行配合使用,从而达到性能和效率的平衡。