一般来说使用GpuSkinning 已经能得到很不错的性能了,那么能不能再快一点呢?
答案当然是肯定的,这一次我们来使用ECS榨干CPU的部分

先上性能对比图
1万个蒙皮角色,每个角色472面,带有uv0,uv1
测试设备硬件 win10, Intel i7-7700, GPU GTX-1060 6G

可以看到Entity的帧数在 110帧以上, 而传统GPUSkinning 的帧数在 29帧

这个Demo使用的GPU蒙皮方案为 将骨骼矩阵数据以双四元数的方式存储在纹理上,具体实现方法不是这个Demo的重点,大家也可以参考这篇文章

GPU Skinning 加速骨骼动画
https://github.com/chengkehan/GPUSkinning

接下来一步一步开始分解这个Demo
首先实现Shader Include
Skinning.hlsl

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#ifndef __AOI_GPUSKINNING
#define __AOI_GPUSKINNING

TEXTURE2D(_AnimTex);
SAMPLER(sampler_AnimTex);

inline float2 BoneIndexToTexUV(float index, float4 param) {
    int row = (int)(index / param.y);
    int col = index % param.x;
    return float2(col * param.w, row * param.w);
}

inline float3 QuatMulPos(float4 rotation, float3 rhs)
{
    float3 qVec = half3(rotation.xyz);
    float3 c1 = cross(qVec, rhs);
    float3 c2 = cross(qVec, c1);

    return rhs + 2 * (c1 * rotation.w + c2);
}

inline float3 QuatMulPos(float4 real, float4 dual, float4 rhs) {
    return dual.xyz * rhs.w + QuatMulPos(real, rhs.xyz);
}

inline float4 DQTexSkinning(float4 vertex, float4 texcoord, float4 startData, Texture2D<float4> animTex, SamplerState animTexSample) {

    int index1 = startData.z + texcoord.x;
    float4 boneDataReal1 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index1, startData), 0);
    float4 boneDataDual1 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index1 + 1, startData), 0);
    float4 real1 = boneDataReal1.rgba;
    float4 dual1 = boneDataDual1.rgba;

    int index2 = startData.z + texcoord.z;
    float4 boneDataReal2 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index2, startData), 0);
    float4 boneDataDual2 = SAMPLE_TEXTURE2D_LOD(animTex, animTexSample, BoneIndexToTexUV(index2 + 1, startData), 0);
    float4 real2 = boneDataReal2.rgba;
    float4 dual2 = boneDataDual2.rgba;

    float3 position = (dual1.xyz * vertex.w) + QuatMulPos(real1, vertex.xyz);
    float4 t0 = float4(position, vertex.w);

    position = (dual2.xyz * vertex.w) + QuatMulPos(real2, vertex.xyz);
    float4 t1 = float4(position, vertex.w);

    return t0 * texcoord.y + t1 * texcoord.w;
}

inline void SkinningTex_float(float4 positionOS, float4 texcoord, float4 frameData, Texture2D</float4><float4> animTex, SamplerState animTexSample, out float4 output) {
    output = float4(DQTexSkinning(positionOS, texcoord, frameData, animTex, animTexSample).xyz,1);
}

#endif
</float4>

继续阅读