一般来说使用GpuSkinning 已经能得到很不错的性能了,那么能不能再快一点呢?
答案当然是肯定的,这一次我们来使用ECS榨干CPU的部分
先上性能对比图
1万个蒙皮角色,每个角色472面,带有uv0,uv1
测试设备硬件 win10, Intel i7-7700, GPU GTX-1060 6G
可以看到Entity的帧数在 110帧以上, 而传统GPUSkinning 的帧数在 29帧
这个Demo使用的GPU蒙皮方案为 将骨骼矩阵数据以双四元数的方式存储在纹理上,具体实现方法不是这个Demo的重点,大家也可以参考这篇文章
GPU Skinning 加速骨骼动画
https://github.com/chengkehan/GPUSkinning
接下来一步一步开始分解这个Demo
首先实现Shader Include
Skinning.hlsl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | #ifndef __AOI_GPUSKINNING #define __AOI_GPUSKINNING TEXTURE2D(_AnimTex); SAMPLER(sampler_AnimTex); inline float2 BoneIndexToTexUV(float index, float4 param) { int row = (int)(index / param.y); int col = index % param.x; return float2(col * param.w, row * param.w); } inline float3 QuatMulPos(float4 rotation, float3 rhs) { float3 qVec = half3(rotation.xyz); float3 c1 = cross(qVec, rhs); float3 c2 = cross(qVec, c1); return rhs + 2 * (c1 * rotation.w + c2); } inline float3 QuatMulPos(float4 real, float4 dual, float4 rhs) { return dual.xyz * rhs.w + QuatMulPos(real, rhs.xyz); } inline float4 DQTexSkinning(float4 vertex, float4 texcoord, float4 startData, Texture2D<float4> animTex, SamplerState animTexSample) { int index1 = startData.z + texcoord.x; float4 boneDataReal1 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index1, startData), 0); float4 boneDataDual1 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index1 + 1, startData), 0); float4 real1 = boneDataReal1.rgba; float4 dual1 = boneDataDual1.rgba; int index2 = startData.z + texcoord.z; float4 boneDataReal2 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index2, startData), 0); float4 boneDataDual2 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index2 + 1, startData), 0); float4 real2 = boneDataReal2.rgba; float4 dual2 = boneDataDual2.rgba; float3 position = (dual1.xyz * vertex.w) + QuatMulPos(real1, vertex.xyz); float4 t0 = float4(position, vertex.w); position = (dual2.xyz * vertex.w) + QuatMulPos(real2, vertex.xyz); float4 t1 = float4(position, vertex.w); return t0 * texcoord.y + t1 * texcoord.w; } inline void SkinningTex_float(float4 positionOS, float4 texcoord, float4 frameData, Texture2D</float4><float4> animTex, SamplerState animTexSample, out float4 output) { output = float4(DQTexSkinning(positionOS, texcoord, frameData, animTex, animTexSample).xyz,1); } #endif </float4> |