1

我一直在研究渲染草的不同技术。我决定主要使用几何着色器生成的草,这样当我将它们渲染为 GL_POINTS 时,我可以动态生成三角形风扇,但我没有看到我想看到的性能。使用 100,000 片草叶,我可能会获得 20-50 fps,而且我有一个不错的 GPU。我想知道我的方法是否错误,或者我是否达到了 GPU 的限制,或者我做错了什么,或者他们是否是一种更快的方法(我的目标是单个刀片,我可以在其中理想地操纵顶点) . 我使用的纹理 256x256

我的渲染步骤是:

创建 VAO 和 VBO 并存储位置和绑定一次:

   float[] GrassLocations= new float[100000];
   int vaoID = createVAO();
 . //bind VBO to VAO
   storeDataInAttributeList(0, 3, GrassLocations,0,0);

然后我渲染:

    GL30.glBindVertexArray(VAO);
    GL20.glEnableVertexAttribArray(0);
    GL13.glActiveTexture(GL13.GL_TEXTURE0);
    GL11.glBindTexture(GL11.GL_TEXTURE_2D, texture);
    GL11.glDrawArrays(GL11.GL_POINTS, 0, 100000);
    GL20.glDisableVertexAttribArray(0);
    GL30.glBindVertexArray(0);

然后是我的顶点着色器:

#version 400


layout (location = 0) in vec3 VertexLocation;


uniform float time;

out vec3 offsets;
out vec3 Position;
out vec3 Normal;
out vec2 TexCoord;
out float visibility;

uniform mat4 transformationMatrix;

uniform mat4 viewMatrix;
uniform mat4 MVPmatrix;
uniform mat4 modelViewMatrix;

const float density = .007;
const float gradient = 1.5;
out float Time;

void main()
{
Time = time;

vec4 worldPosition = transformationMatrix * vec4(VertexLocation,1.0);
vec4 positionRelativeToCam = modelViewMatrix* vec4(VertexLocation,1.0); 

Normal = vec3(0,1,0);
Position = vec3(  worldPosition );

gl_Position = MVPmatrix* vec4(VertexLocation,1.0);

float distance = length(positionRelativeToCam.xyz);
visibility = exp(-pow((distance * density), gradient));
visibility = clamp(visibility,0.0,1.0);
offsets = offset;


}

我确实删除了顶点着色器,只留下了 GL_POSITION,但仍然不是问题。我的几何着色器:

#version 400

layout( points ) in;
layout( triangle_strip, max_vertices = 10 ) out;

float Size2=1;   // Half the width of the quad



in vec3 Position[];
in vec3 Normal[];
in vec3 offsets[];

out vec3 position;
out vec3 normal;
in float Time[];

out vec2 TexCoord;
out vec3 color;
const  float width = 5;
void main()
{
position = Position[0];
normal = Normal[0];

color = offsets[0];

gl_Position =  (vec4(-Size2*width,-Size2,0.0,0.0) + gl_in[0].gl_Position);
TexCoord = vec2(0.0,0.0);
EmitVertex();

gl_Position =  (vec4(Size2*width,-Size2,0.0,0.0) + gl_in[0].gl_Position);
TexCoord = vec2(1.0,0.0);
EmitVertex();

gl_Position = (vec4(-Size2*width+(Time[0].x),10,0.0,0.0) + 
gl_in[0].gl_Position);
TexCoord = vec2(0.0,.25);
EmitVertex();

gl_Position = (vec4(Size2*width+(Time[0].x),10,0.0,0.0) + 
gl_in[0].gl_Position);
TexCoord = vec2(1.0,.25);
EmitVertex();
///////////////////////////////////////////////////
gl_Position =  (vec4(-Size2*width+(Time[0].x)*2,15,0.0,0.0) +   
gl_in[0].gl_Position);
TexCoord = vec2(0.0,.50);
EmitVertex();
gl_Position =  (vec4(Size2*width+(Time[0].x)*2,15,0.0,0.0) +   
gl_in[0].gl_Position);
TexCoord = vec2(1.0,.50);
EmitVertex();   
///////////////////////////////////////////////////
gl_Position =  (vec4(-Size2*width+(Time[0].x)*3,25,0.0,0.0) + 
gl_in[0].gl_Position);
TexCoord = vec2(0.0,.75);
EmitVertex();

 gl_Position =  (vec4(Size2*width+(Time[0].x)*3,25,0.0,0.0) + 
gl_in[0].gl_Position);
TexCoord = vec2(1.0,.75);
EmitVertex();  
 ///////////////////////////////////////////////////
gl_Position =  (vec4(-Size2*width,Size2*7,Time[0].x,0.0) +   
gl_in[0].gl_Position);
TexCoord = vec2(0.0,1.0);
EmitVertex();

gl_Position = (vec4(Size2*width,Size2*7,Time[0].x,0.0) +     
gl_in[0].gl_Position);
TexCoord = vec2(1.0,1.0);
EmitVertex();    
}

和我的片段着色器:(这是在延迟引擎中,我也尝试过前向渲染,但我认为这里不会影响性能)

#version 400

in vec2 TexCoord;

layout (binding=0) uniform sampler2D SpriteTex;
in vec3 color;
in vec3 normal;
in vec3 position;
layout( location = 0 ) out vec4 FragColor;

void main() {
vec4 texColor =  texture(SpriteTex,TexCoord);
vec4 posColor = vec4(position.xyz,0);


gl_FragData[1] =  posColor;
gl_FragData[2] =  vec4(normal,1);


if(texColor.a<.5){
discard;
}


gl_FragData[0] = texColor;

}
4

1 回答 1

3

您想要的是一种称为Instancing的技术。我链接的教程非常适合弄清楚如何进行实例化。

我可能会建议您避免使用几何着色器(因为几何着色器在其目的是扩展顶点数量时通常不能很好地缩放),而是只定义一个缓冲区,其中包含绘制单个刀片所需的所有顶点(或补丁)草,然后使用实例重绘该对象数千次。

于 2015-09-01T13:42:15.097 回答