2

我在 Compute Shader 中的 GPU 上的数组索引存在问题,我已经坚持了好几个星期。

我正在尝试使用 SV_DispatchThreadID 的 x 值作为我的粒子数组的索引(如网络上的某些示例所示)。

它正在工作......但threadID变量(在主函数中)总是返回0,3,6,9,12,15......而不是0,1,2,3,4,......

我在 CPU 端的调度调用是: Dispatch(64, 1, 1);

我尝试了许多调度配置 (32,16,1), (128,1,1),... 有许多配置 numtards (1,1,1), (32,32,1), (16 ,16,1)...但总是相同的结果... threadID 从来没有被很好地排序。

如何获得有序索引?:(...总是得到像0,3,6,9这样的索引,...

有什么建议吗?

非常感谢。

这是我的 CS 内核和 C# 源代码:

#pragma kernel CSMain

float  dt;
float  time;
float  pi;

uint   maxParticles = 1024;
float  maxAge;

struct Particle 
{
    int         index;
    float3      position;
    float3      velocity; 
    float       size;
    float       age;
    float       normAge; 
    int         type;    
};

RWStructuredBuffer <Particle> particles;

[numthreads( 1, 1, 1 )]

void CSMain ( uint3 Gid : SV_GroupID, uint3 DTid : SV_DispatchThreadID, uint3 GTid : SV_GroupThreadID, uint GI : SV_GroupIndex )        
{
    uint index = DTid.x;

    if (index < maxParticles)
    {
        Particle p = particles[index];
        p.position.y = p.index; //just check if the index is correct by giving a Y position
        particles[index] = p;   
    }

}

用于创建 ComputeBuffer 和其他内容的 C# 代码:

using UnityEngine;
using System.Collections;
using System.Runtime.InteropServices;

public class SimpleEmitter : MonoBehaviour 
{
   struct Particle 
    {
        public int          index;
        public Vector3      position;
        public Vector3      velocity;
        public float        size;
        public float        age;
        public float        normAge;
        public int          type;
    }

    public ComputeShader computeShader;
    public Material material;
    public int maxParticles = 1000000;
    public float maxAge = 3.0f; 
    public float particleSize = 0.5f;

    private ComputeBuffer particles;
    private int particleSizeOf;

    void Start () 
    {   
        Particle p = new Particle();
        particleSizeOf = Marshal.SizeOf(p);

        Particle[] pInitBuffer = new Particle[maxParticles];

        for (int i = 0; i < maxParticles; i++) 
        {
            p = new Particle();
            p.index = i;
            p.type = 0;            
            p.age = 0;
            p.normAge = 0.1f;
            p.size = particleSize * 0.5f + Random.value * particleSize;                     
            p.velocity = new Vector3(0, 0, 0);

            pInitBuffer[i] = p;
        }

        particles = new ComputeBuffer(maxParticles, particleSizeOf, ComputeBufferType.Default);
        particles.SetData(pInitBuffer);

        computeShader.SetBuffer(0, "particles", particles);
    }

    void Update() 
    {       
        computeShader.SetFloat("dt", Time.deltaTime);
        computeShader.SetFloat("time", Time.time);
        computeShader.SetFloat("pi", Mathf.PI);     
        computeShader.SetInt("maxParticles", maxParticles);                
        computeShader.SetFloat("maxAge", maxAge);

        computeShader.Dispatch(0, 64, 1, 1);            
    }

    public void OnPostRender() 
    {
        material.SetPass(0);
        material.SetFloat("maxAge", maxAge);
        material.SetBuffer("particles", particles);

        Graphics.DrawProcedural(MeshTopology.Triangles, maxParticles, 0);
    }

    void OnDisable() 
    {
        particles.Release();
    }
}

这里是顶点、几何和像素着色器:

Shader "Custom/SimpleRS" 
{
    Properties 
    {
        _ParticleTexture ("Diffuse Tex", 2D) = "white" {}
        _Ramp1Texture ("G_Ramp1", 2D) = "white" {}
    }

    SubShader 
    {
        Pass 
        {
            Tags { "Queue"="Transparent" "IgnoreProjector"="True" "RenderType"="Transparent" }
            Blend OneMinusDstColor One
            Cull Off 
            Lighting Off 
            ZWrite Off 
            Fog { Color (0,0,0,0) }


            CGPROGRAM
            #pragma target 5.0
            #pragma vertex VSMAIN
            #pragma fragment PSMAIN
            #pragma geometry GSMAIN
            #include "UnityCG.cginc" 

            struct Particle 
            {
                int    index;
                float3 position;
                float3 velocity;
                float  size;
                float  age;
                float  normAge;
                int    type;

            };

            StructuredBuffer<Particle>  particles;

            Texture2D                   _ParticleTexture;           
            SamplerState                sampler_ParticleTexture;

            Texture2D                   _Ramp1Texture;
            SamplerState                sampler_Ramp1Texture;

            float maxAge;
            float maxRad;

            struct VS_INPUT
            {
                uint vertexid           : SV_VertexID;
            };
            //--------------------------------------------------------------------------------
            struct GS_INPUT
            {
                float4 position         : SV_POSITION;
                float size              : TEXCOORD0;
                float age               : TEXCOORD1;
                float type              : TEXCOORD2;
            };
            //--------------------------------------------------------------------------------
            struct PS_INPUT
            {
                float4 position         : SV_POSITION;
                float2 texcoords        : TEXCOORD0;
                float size              : TEXCOORD1;
                float age               : TEXCOORD2;
                float type              : TEXCOORD3;
            };
            //--------------------------------------------------------------------------------
            GS_INPUT VSMAIN( in VS_INPUT input )
            {
                GS_INPUT output;

                output.position.xyz = particles[input.vertexid].position;
                output.position.w = 1.0;                
                output.age = particles[input.vertexid].normAge;
                output.size = particles[input.vertexid].size;
                output.type = particles[input.vertexid].type;
                return output;
            }
            //--------------------------------------------------------------------------------
            [maxvertexcount(4)]
            void GSMAIN( point GS_INPUT p[1], inout TriangleStream<PS_INPUT> triStream )
            {           
                float4 pos = mul(UNITY_MATRIX_MVP, p[0].position);

                float halfS = p[0].size * 0.5f;
                float4 offset = mul(UNITY_MATRIX_P, float4(halfS, halfS, 0, 1));

                float4 v[4];
                v[0] = pos + float4(offset.x, offset.y, 0, 1);
                v[1] = pos + float4(offset.x, -offset.y, 0, 1);
                v[2] = pos + float4(-offset.x, offset.y, 0, 1);
                v[3] = pos + float4(-offset.x, -offset.y, 0, 1);

                PS_INPUT pIn;
                pIn.position = v[0];
                pIn.texcoords = float2(1.0f, 0.0f);

                    pIn.size = p[0].size;
                    pIn.age = p[0].age;
                    pIn.type = p[0].type;                       

                triStream.Append(pIn);

                pIn.position =  v[1];
                pIn.texcoords = float2(1.0f, 1.0f);
                triStream.Append(pIn);

                pIn.position =  v[2];
                pIn.texcoords = float2(0.0f, 0.0f);
                triStream.Append(pIn);

                pIn.position =  v[3];
                pIn.texcoords = float2(0.0f, 1.0f);
                triStream.Append(pIn);                  
            }
            //--------------------------------------------------------------------------------
            float4 PSMAIN( in PS_INPUT input ) : COLOR
            {
                float4 color = _ParticleTexture.Sample( sampler_ParticleTexture, input.texcoords );
                float4 tint = _Ramp1Texture.Sample(sampler_Ramp1Texture, float2(min(1.0, input.age),0));
                color *= tint;

                if (input.age == 0) discard;

                return color;
            }
            //--------------------------------------------------------------------------------
            ENDCG
        }
    } 
}
4

1 回答 1

4

您应该使用 MeshTopology.Points 而不是三角形进行绘制。

由于几何着色器只传递了一个点,这就是您的拓扑,GS 然后根据您的着色器代码将其扩展为三角形。

这就解释了为什么只有每 3 个粒子才会出现。根据您请求的拓扑,它们以 3 个一组的形式传递给 GS,然后您将透明地丢弃第二个和第三个输入,GS 看到的下一个顶点是顶点 3,然后是 6,然后是 9,以此类推。

于 2013-12-22T21:42:52.247 回答