我正在尝试使用几何着色器将点膨胀为四边形,以进行一些简单的 2D 渲染。大多数帧的渲染完全符合我的预期,但有时某些顶点的某些属性会不正确。我花了一些时间从一个带有许多移动部件的更大程序中简化它,这样我在渲染过程中只做最低限度的工作,但不幸的是仍然有很多设置。完整的代码在这里:
#!/usr/bin/env python
# Copyright 2011-2013, Andrew Wilson
# Licensed under the MIT license:
# http://www.opensource.org/licenses/MIT
# memglitch.py
from OpenGL import GL
import sys
import pygame
import pygame.image
import pygame.key
import pygame as PG
import numpy
import hashlib
import collections
import ctypes
######## SHADERS ########
vertex_shader = '''\
#version 330
uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;
layout(location=0) in vec2 position;
layout(location=1) in vec2 size;
layout(location=2) in vec2 other;
out VertexData
{
vec2 position;
vec2 size;
float layer;
float rotation;
} outData;
void main()
{
outData.position = position;
outData.size = size;
outData.rotation = other.x;
outData.layer = other.y;
}
'''
geometry_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable
layout (points) in;
layout (triangle_strip, max_vertices = 4) out;
uniform vec2 screen_dimensions;
uniform vec2 cam_position;
uniform float zoom;
in VertexData
{
vec2 position;
vec2 size;
float rotation;
float layer;
} vert[];
out FragData
{
smooth vec2 texcoord;
smooth float layer;
} vertOut;
vec4 calcPosition(in vec2 pos)
{
// Transform a position in world-space into screen-space
vec4 result;
result.xy =
(
pos
- cam_position
)
* zoom
/ screen_dimensions;
result.zw = vec2(0.0, 1.0);
return result;
}
void main()
{
// Inflate each input point into a quad.
float r = vert[0].rotation;
mat2 rotation_matrix = mat2(cos(r), -sin(r), sin(r), cos(r));
vec2 currentPos;
vec4 texcoords = vec4(0,0,1,1);
currentPos = vert[0].position + vert[0].size * vec2(-0.5, -0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.xy;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
currentPos = vert[0].position + vert[0].size * vec2(-0.5, 0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.xw;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
currentPos = vert[0].position + vert[0].size * vec2(0.5, -0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.zy;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
currentPos = vert[0].position + vert[0].size * vec2(0.5, 0.5) * rotation_matrix;
gl_Position = calcPosition(currentPos);
vertOut.texcoord = texcoords.zw;
vertOut.layer = vert[0].layer;
gl_PrimitiveID = gl_PrimitiveIDIn;
EmitVertex();
}
'''
fragment_shader = '''\
#version 330
#extension GL_EXT_gpu_shader4 : enable
uniform sampler2DArray texture_atlas;
uniform float zoom;
in FragData
{
smooth vec2 texcoord;
smooth float layer;
};
layout(location=0) out vec4 fragcolor;
void main()
{
fragcolor = texture(
texture_atlas,
vec3(texcoord, float(layer)));
}
'''
######## TEXTURE_SETUP ########
def make_texture_array(
image,
across=8,
down=8):
'''
Split up an input image with a grid and assemble a
texture array from all of the sub-images.
'''
source_width, source_height = image.get_size()
width = source_width // across
height = source_height // down
subpixels = []
for y in xrange(down):
for x in xrange(across):
subimage = image.subsurface((x*width, y*height, width, height))
subpixels.append(pygame.image.tostring(subimage, "RGBA", True))
pixels = "".join(subpixels)
texture = GL.glGenTextures(1)
GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, texture)
def tex_param(name, value):
GL.glTexParameteri(GL.GL_TEXTURE_2D_ARRAY, name, value)
tex_param(GL.GL_TEXTURE_MIN_FILTER, GL.GL_NEAREST)
tex_param(GL.GL_TEXTURE_MAG_FILTER, GL.GL_NEAREST)
tex_param(GL.GL_TEXTURE_WRAP_S, GL.GL_CLAMP_TO_EDGE)
tex_param(GL.GL_TEXTURE_WRAP_T, GL.GL_CLAMP_TO_EDGE)
tex_param(GL.GL_TEXTURE_BASE_LEVEL, 0)
tex_param(GL.GL_TEXTURE_MAX_LEVEL, 0)
targetformat = GL.GL_RGBA8
sourceformat = GL.GL_RGBA
GL.glTexImage3D(
GL.GL_TEXTURE_2D_ARRAY,
0,
targetformat,
width,
height,
across*down,
0,
sourceformat,
GL.GL_UNSIGNED_BYTE,
pixels)
return texture
######## SHADER SETUP ########
def create_shader_program(resources):
'''
Compile the shader program. Populates resources.shader_program_object
with the OpenGL program object and active_uniforms with a dictionary
mapping uniform names to locations.
'''
writelog=sys.stderr.write
shaders = []
def compile_shader(source, gltype, name):
writelog("Compiling {0} shader...\n".format(name))
shader = make_shader(gltype, source)
infolog = GL.glGetShaderInfoLog(shader)
if len(infolog)==0:
writelog("...completed\n")
else:
writelog("...completed with messages:\n")
writelog(infolog)
writelog("\n")
shaders.append(shader)
compile_shader(vertex_shader, GL.GL_VERTEX_SHADER, 'vertex')
compile_shader(fragment_shader, GL.GL_FRAGMENT_SHADER, 'fragment')
compile_shader(geometry_shader, GL.GL_GEOMETRY_SHADER, 'geometry')
writelog("Compiling shader program...\n")
program = make_program(*shaders)
infolog = GL.glGetProgramInfoLog(program)
if len(infolog)==0:
writelog("...completed\n")
else:
writelog("...completed with messages:\n")
writelog(infolog)
writelog("\n")
active_uniforms = GL.glGetProgramiv(program, GL.GL_ACTIVE_UNIFORMS)
resources.uniform_locations = {}
for i in range(active_uniforms):
name, size, data_type = GL.glGetActiveUniform(program, i)
resources.uniform_locations[name] = i
resources.shader_program_object = program
def make_shader(shadertype, source):
'''
Compile and return an OpenGL shader object.
'''
shader = GL.glCreateShader(shadertype)
GL.glShaderSource(shader, source)
GL.glCompileShader(shader)
retval = ctypes.c_uint(GL.GL_UNSIGNED_INT)
GL.glGetShaderiv(shader, GL.GL_COMPILE_STATUS, retval)
if not retval:
print >> sys.stderr, "Failed to compile shader."
print GL.glGetShaderInfoLog(shader)
GL.glDeleteShader(shader)
raise Exception("Failed to compile shader.")
return shader
def make_program(*shaders):
'''
Compile and return an OpenGL program object.
'''
program = GL.glCreateProgram()
for shader in shaders:
GL.glAttachShader(program, shader)
GL.glLinkProgram(program)
retval = ctypes.c_int()
GL.glGetProgramiv(program, GL.GL_LINK_STATUS, retval)
if not retval:
print >> sys.stderr, "Failed to link shader program."
print GL.glGetProgramInfoLog(program)
GL.glDeleteProgram(program)
raise Exception("Failed to link shader program.")
return program
######## RESOURCE ALLOCATION ########
class Resources(object):
pass
def make_resources(screen_dimensions):
loadimg = pygame.image.load
spacemen_image = loadimg('diagnostic_numbers.png')
resources = Resources()
vertex_dtype = numpy.dtype([
("position", ("f4", 2)),
("size", ("f4", 2)),
("other", ("f4", 2))])
resources.vertex_stride = 24
resources.position_stream_offset = 0
resources.size_stream_offset = 8
resources.other_stream_offset = 16
resources.vertex_array = numpy.zeros(512, dtype=vertex_dtype)
resources.spacemen_texture = make_texture_array(spacemen_image, 16, 16)
create_shader_program(resources)
resources.array_buffer = GL.glGenBuffers(1)
w,h = screen_dimensions
resources.save_buffer = numpy.zeros((h,w,4),dtype="u1")
return resources
######## SCREENSHOT #########
# pygame.surfarray.make_surface is broken in 1.9.1. It reads uninitialized
# stack contents on 64-bit systems. :( Here we use numpy to do the copying
# instead.
def make_surface(array):
w,h,depth = array.shape
if depth == 4:
surf = pygame.Surface((w,h), depth=32, flags=pygame.SRCALPHA)
pixels = pygame.surfarray.pixels3d(surf)
pixels[:,:,:] = array[:,:,:3]
alpha = pygame.surfarray.pixels_alpha(surf)
alpha[:,:] = array[:,:,3]
elif depth == 3:
surf = pygame.Surface((w,h), depth=32)
pixels = pygame.surfarray.pixels3d(surf)
pixels[:,:,:depth] = array
else:
raise ValueError("Array must have minor dimension of 3 or 4.")
return surf
class Screenshotter(object):
'''
Captures screenshots from OpenGL and records them by SHA1 hash.
'''
def __init__(self, save_buffer, screen_dimensions):
self.hashes_seen = collections.Counter()
self.save_buffer = save_buffer
self.screen_dimensions = screen_dimensions
def get_filename(self, screen_hash):
return screen_hash + ".out.png"
def take_screenshot(self):
w,h = self.screen_dimensions
save_buffer = self.save_buffer
GL.glReadPixels(0, 0, w, h, GL.GL_RGBA, GL.GL_UNSIGNED_BYTE, self.save_buffer)
byte_view = save_buffer.view("u1")
screen_hash = hashlib.sha1(byte_view).hexdigest()
if self.hashes_seen[screen_hash] == 0:
oriented = numpy.swapaxes(save_buffer, 0, 1)[:,::-1,:]
surf = make_surface(oriented)
filename = self.get_filename(screen_hash)
pygame.image.save(surf,filename)
print filename
self.hashes_seen[screen_hash] += 1
def print_summary(self):
for screen_hash, count in sorted(self.hashes_seen.items(), key=lambda(h,c):-c):
print "{0} {1}".format(self.get_filename(screen_hash), count)
######## RENDERING ########
def prepare_context(resources, zoom, screen_dimensions):
'''
Prepare the OpenGL context for rendering.
'''
uniforms = resources.uniform_locations
screen_w, screen_h = screen_dimensions
GL.glViewport(0,0,screen_w,screen_h)
GL.glEnable(GL.GL_BLEND)
GL.glBlendFunc(GL.GL_SRC_ALPHA, GL.GL_ONE_MINUS_SRC_ALPHA)
GL.glUseProgram(resources.shader_program_object)
GL.glUniform2f(uniforms['cam_position'], 0, 0)
GL.glUniform1f(uniforms['zoom'], zoom)
GL.glUniform2f(uniforms['screen_dimensions'], screen_w, screen_h)
GL.glActiveTexture(GL.GL_TEXTURE0)
GL.glBindTexture(GL.GL_TEXTURE_2D_ARRAY, resources.spacemen_texture)
GL.glUniform1i(uniforms['texture_atlas'], 0)
GL.glBindBuffer(GL.GL_ARRAY_BUFFER, resources.array_buffer)
GL.glBufferData(GL.GL_ARRAY_BUFFER, resources.vertex_array.nbytes, resources.vertex_array, GL.GL_STATIC_DRAW)
GL.glEnableVertexAttribArray(0)
GL.glEnableVertexAttribArray(1)
GL.glEnableVertexAttribArray(2)
GL.glVertexAttribPointer(
0, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
ctypes.cast(resources.position_stream_offset, ctypes.c_void_p))
GL.glVertexAttribPointer(
1, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
ctypes.cast(resources.size_stream_offset, ctypes.c_void_p))
GL.glVertexAttribPointer(
2, 2, GL.GL_FLOAT, GL.GL_FALSE, resources.vertex_stride,
ctypes.cast(resources.other_stream_offset, ctypes.c_void_p))
def render(resources, zoom, vertex_count):
'''
Render one frame.
'''
GL.glClearColor(0.4, 0.4, 0.4, 1.0)
GL.glClear(GL.GL_COLOR_BUFFER_BIT)
GL.glDrawArrays(
GL.GL_POINTS,
0,
vertex_count)
pygame.display.flip()
######## MAIN LOOP ########
def main():
video_flags = PG.OPENGL|PG.DOUBLEBUF
pygame.init()
screen_dimensions = 512, 256
pygame.display.set_mode(screen_dimensions, video_flags)
resources = make_resources(screen_dimensions)
frames = 3000
done = 0
zoom = 32.0
vertex_count = 512
screenshotter = Screenshotter(resources.save_buffer, screen_dimensions)
for i in xrange(vertex_count):
scale = 32.0
y = (15 - i // 32) / 32.0 * scale - scale/4.0 + (scale/2.0/32.0)
x = (i % 32) / 32.0 * scale - scale/2.0 + (scale/2.0/32.0)
xx = i // 2
lo = xx % 16
hi = (xx // 16) % 16
flavour = hi if i%2==0 else lo
resources.vertex_array[i] = ((x,y), (1,1), (0, flavour))
prepare_context(resources, zoom, screen_dimensions)
for i in xrange(frames):
if done:
break
if i%100==0:
print "{0}/{1}".format(i, frames)
while 1:
event = pygame.event.poll()
if event.type == PG.NOEVENT:
break
if event.type == PG.QUIT:
done = 1
render(resources, zoom, vertex_count)
screenshotter.take_screenshot()
print "---"
screenshotter.print_summary()
if __name__ == '__main__':
main()
它还需要diagnostic_numbers.png
在工作目录中调用的输入 PNG:
这是它通常显示的示例:
每个正方形都是一个单独的点,由几何着色器膨胀为一个四边形。每个输入顶点都有一个 2D 位置(第一个顶点在左上角,然后它们在屏幕下方按行排列)、一个尺寸(它们的宽度和高度都是 1)、一个旋转(它们都有一个旋转 0)和一个层(0-15)。层确定要从纹理数组的哪一层进行渲染。
但是,对于某些顶点,某些帧的渲染层不正确。例如:
由于顶点都是按顺序排列的,因此可以看出每个毛刺是一个由 8 个相邻顶点组成的块。这似乎总是如此。也有这样的情况,毛刺的顶点已经用一个顶点块的层值渲染,该顶点块的层值从数组中先前的 80 个顶点开始。在每种情况下,我都检查过这是真的。我也不认为我曾经在前128 256 个顶点(屏幕的上半部分)内观察到故障,它只是在那之后的某个地方开始。
每一帧,这是所有运行的 GL 代码*:
def render(resources, zoom, vertex_count):
'''
Render one frame.
'''
GL.glClearColor(0.4, 0.4, 0.4, 1.0)
GL.glClear(GL.GL_COLOR_BUFFER_BIT)
GL.glDrawArrays(
GL.GL_POINTS,
0,
vertex_count)
pygame.display.flip()
该程序将运行 3000 帧,计算每个渲染帧的 SHA1 哈希,并为每个不同的输出帧保存一个 PNG。这是我机器上典型控制台输出的示例:
Compiling vertex shader...
...completed
Compiling fragment shader...
...completed
Compiling geometry shader...
...completed
Compiling shader program...
...completed
0/3000
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png
100/3000
200/3000
300/3000
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png
400/3000
500/3000
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png
4c3844a6879af3992081807e1e429e8ac83753f5.out.png
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png
600/3000
700/3000
800/3000
900/3000
1000/3000
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png
1100/3000
1200/3000
1300/3000
1400/3000
1500/3000
231d09f859aac29aef23d0c590187071e4fad321.out.png
1600/3000
1700/3000
1800/3000
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png
1900/3000
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png
2000/3000
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png
2100/3000
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png
2200/3000
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png
2300/3000
2400/3000
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png
2500/3000
2600/3000
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png
2700/3000
2800/3000
2900/3000
---
6fdbf7d09076ef084e57b90d7d445d2e56c54ab8.out.png 2821
c538c6cddea6c6f53c3c968ebf8ab46bceb017f9.out.png 93
3c7558f5a257c6b53fae9815df0ee8d457db9b19.out.png 46
5cb6c6989c24dc16e123d74f46985ebb243b2935.out.png 12
df3051821a6d8327c77cfcf3d8053fdfcaf13e32.out.png 7
183cb8faaf1241526bd74e1f2ca65e6d89ab6c74.out.png 4
c53a53ba0f4d2401094cf25b271ab2e50b3909bf.out.png 3
69e14d278f2e340f50acc6274922d0cecc932ecd.out.png 3
4c3844a6879af3992081807e1e429e8ac83753f5.out.png 3
dce25b06206bf0e671fb46e3365b9a42f1146813.out.png 1
1620c62c2eb151244ce929aa37a7dbc97c5def54.out.png 1
5932cf4ce48ad73bee1b35dd88b59d745ac3c493.out.png 1
443afa3ee4c28611f0cc6b6a60712de503e34f8e.out.png 1
3666fcaac2d9d9b555c23367e5f01bb4f435cb65.out.png 1
66edb4d0fb88951af944c717c37b92f5cfa37cd0.out.png 1
231d09f859aac29aef23d0c590187071e4fad321.out.png 1
50ac6e618e5b58a8709baf557a757d62b041ef36.out.png 1
大多数帧按预期呈现,但有相当多的帧显示了故障,并且一些故障模式更为常见。
如果要运行代码,则需要 Python 2.7、pygame、PyOpenGL 和 numpy。在 Ubuntu 上,我安装的软件包是 python-numpy、python-opengl 和 python-pygame。
我已经在 64 位 Ubuntu 上的 NVIDIA 驱动程序版本 310 和 313 上进行了尝试,并得到了相同的结果。我的硬件(如 lspci 所示)是“NVIDIA Corporation G98M [GeForce 9300M GS]”。
我不确定现在要检查什么。我想我已经正确上传了顶点数据,因为它至少在某些时候正确渲染,而且我只在开始时上传一次。但是我每一帧都做的太少了,以至于我不认为我在那里做错了什么。着色器编译器没有发出警告。接下来我应该尝试什么?有可能是驱动程序错误吗?我怎么知道是不是?
* - 除了用于捕获屏幕截图的代码,但可以禁用它并且故障仍然会发生。
我尝试过的事情:
- 将 EndPrimitive() 添加到几何着色器的末尾。没有不同。
- 重新排列顶点数组中的字段。没有不同。
- 在着色器中分配不同位置的属性。如果我将位置放在位置 2,则故障将影响位置的 y 元素。