HLSL新手问题:
我正在尝试移植以下 MATLAB 代码以在显卡上工作。
function diff_im = anisodiff2D(im, num_iter, delta_t, kappa, option)
im = double(im);
% PDE (partial differential equation) initial condition.
diff_im = im;
% Center pixel distances.
dx = 1;
dy = 1;
dd = sqrt(2);
% 2D convolution masks - finite differences.
hN = [0 1 0; 0 -1 0; 0 0 0];
hS = [0 0 0; 0 -1 0; 0 1 0];
hE = [0 0 0; 0 -1 1; 0 0 0];
hW = [0 0 0; 1 -1 0; 0 0 0];
hNE = [0 0 1; 0 -1 0; 0 0 0];
hSE = [0 0 0; 0 -1 0; 0 0 1];
hSW = [0 0 0; 0 -1 0; 1 0 0];
hNW = [1 0 0; 0 -1 0; 0 0 0];
% Anisotropic diffusion.
for t = 1:num_iter
% Finite differences. [imfilter(.,.,'conv') can be replaced by conv2(.,.,'same')]
nablaN = imfilter(diff_im,hN,'conv');
nablaS = imfilter(diff_im,hS,'conv');
nablaW = imfilter(diff_im,hW,'conv');
nablaE = imfilter(diff_im,hE,'conv');
nablaNE = imfilter(diff_im,hNE,'conv');
nablaSE = imfilter(diff_im,hSE,'conv');
nablaSW = imfilter(diff_im,hSW,'conv');
nablaNW = imfilter(diff_im,hNW,'conv');
% Diffusion function.
if option == 1
cN = exp(-(nablaN/kappa).^2);
cS = exp(-(nablaS/kappa).^2);
cW = exp(-(nablaW/kappa).^2);
cE = exp(-(nablaE/kappa).^2);
cNE = exp(-(nablaNE/kappa).^2);
cSE = exp(-(nablaSE/kappa).^2);
cSW = exp(-(nablaSW/kappa).^2);
cNW = exp(-(nablaNW/kappa).^2);
elseif option == 2
cN = 1./(1 + (nablaN/kappa).^2);
cS = 1./(1 + (nablaS/kappa).^2);
cW = 1./(1 + (nablaW/kappa).^2);
cE = 1./(1 + (nablaE/kappa).^2);
cNE = 1./(1 + (nablaNE/kappa).^2);
cSE = 1./(1 + (nablaSE/kappa).^2);
cSW = 1./(1 + (nablaSW/kappa).^2);
cNW = 1./(1 + (nablaNW/kappa).^2);
end
% Discrete PDE solution.
diff_im = diff_im + ...
delta_t*(...
(1/(dy^2))*cN.*nablaN + (1/(dy^2))*cS.*nablaS + ...
(1/(dx^2))*cW.*nablaW + (1/(dx^2))*cE.*nablaE + ...
(1/(dd^2))*cNE.*nablaNE + (1/(dd^2))*cSE.*nablaSE + ...
(1/(dd^2))*cSW.*nablaSW + (1/(dd^2))*cNW.*nablaNW );
% Iteration warning.
fprintf('\rIteration %d\n',t);
end
就在我一次通过的那一刻:
texture2D Input0;
sampler2D Input0Sampler = sampler_state
{
Texture = <Input0>;
MinFilter = Point;
MagFilter = Point;
MipFilter = Point;
AddressU = Clamp;
AddressV = Clamp;
};
struct VertexShaderInput
{
float4 Position : POSITION0;
float2 TextureCoordinate : TEXCOORD0;
};
struct VertexShaderOutput
{
float4 Position : POSITION0;
float2 TextureCoordinate : TEXCOORD0;
};
struct PixelShaderOutput
{
// TODO: Optionally add/remove output indices to match GPUProcessor.numOutputs
float4 Index0 : COLOR0;
};
// input texture dimensions
static float w = 1920 - 8;
static float h = 1080 - 8;
static const float2 pixel = float2(1.0 / w, 1.0 / h);
static const float2 halfPixel = float2(pixel.x / 2, pixel.y / 2);
static const float3x3 hN =
{
0, 1, 0,
0, -1, 0,
0, 0, 0
};
static const float3x3 hS =
{
0, 0, 0,
0, -1, 0,
0, 1, 0
};
static const float3x3 hE =
{
0, 0, 0,
0, -1, 1,
0, 0, 0
};
static const float3x3 hW =
{
0, 0, 0,
1, -1, 0,
0, 0, 0
};
static const float3x3 hNE =
{
0, 0, 1,
0, -1, 0,
0, 0, 0
};
static const float3x3 hSE =
{
0, 0, 0,
0, -1, 0,
0, 0, 1
};
static const float3x3 hSW =
{
0, 0, 0,
0, -1, 0,
1, 0, 0
};
static const float3x3 hNW =
{
1, 0, 0,
0, -1, 0,
0, 0, 0
};
VertexShaderOutput VertexShaderFunction(VertexShaderInput vsInput)
{
//VertexShaderOutput output;
//output.Position = vsInput.Position;
//output.TextureCoordinate = vsInput.TextureCoordinate;
VertexShaderOutput output;
vsInput.Position.x = vsInput.Position.x - 2*halfPixel.x;
vsInput.Position.y = vsInput.Position.y + 2*halfPixel.y;
output.Position = vsInput.Position;
output.TextureCoordinate = vsInput.TextureCoordinate ;
return output;
//return output;
}
float4 Convolution(VertexShaderOutput input, float3x3 kernel)
{
//PixelShaderOutput output;
float4 pixel = float4(0.0f, 0.0f, 0.0f, 0.0f);
for (int i = -1; i <= 1; ++i)
{
for (int j = -1; j <= 1; ++j)
{
pixel += kernel[i+1][j+1] * tex2D(Input0Sampler, input.TextureCoordinate + float2(i,j));
};
};
return pixel;
}
PixelShaderOutput PixelShaderFunction(VertexShaderOutput psInput)
{
PixelShaderOutput output;
output.Index0 = tex2D(Input0Sampler, psInput.TextureCoordinate);
float dx, dy, dd;
dx = 1; dy = 1; dd = pow(2, 0.5);
float delta_t = 1/7;
float4 nablaN = Convolution(psInput, hN);
float4 nablaS = Convolution(psInput, hS);
float4 nablaW = Convolution(psInput, hW);
float4 nablaE = Convolution(psInput, hE);
float4 nablaNE = Convolution(psInput, hNE);
float4 nablaSE = Convolution(psInput, hSE);
float4 nablaSW = Convolution(psInput, hSW);
float4 nablaNW = Convolution(psInput, hNW);
float4 cN = 1 / pow( 1 + (nablaN / 40), 2);
float4 cS = 1 / pow( 1 + (nablaS / 40), 2);
float4 cW = 1 / pow( 1 + (nablaW / 40), 2);
float4 cE = 1 / pow( 1 + (nablaE / 40), 2);
float4 cNE = 1 / pow( 1 + (nablaNE / 40), 2);
float4 cSE = 1 / pow( 1 + (nablaSE / 40), 2);
float4 cSW = 1 / pow( 1 + (nablaSW / 40), 2);
float4 cNW = 1 / pow( 1 + (nablaNW / 40), 2);
output.Index0 += delta_t *
(
mul(cN, nablaN) + mul(cS, nablaS) + mul(cW, nablaW) + mul(cE, nablaE) + (dd*dd)*(mul(cNE, nablaNE) + mul(cSE, nablaSE) + mul(cSW, nablaSW) + mul(cNW, nablaNW))
);
return output;
}
technique PeronaMalik
{
pass pass1
{
VertexShader = compile vs_2_0 VertexShaderFunction();
PixelShader = compile ps_2_0 PixelShaderFunction();
}
}
问题是我需要像在 matlab 代码中一样连续多次应用这种技术。我什至应该使用多次通行证来做到这一点吗?
编辑
如果我决定使用 C# 来控制通行证,那么我可以尝试:
byte[] theBytes = TemplateMatch.Bytes;
for (int iters = 0; iters < 3; iters++)
{
t.SetData<byte>(theBytes);
GraphicsDevice.SetRenderTarget(renOutput);
effect.Parameters["Input0"].SetValue(t);
quad.RenderFullScreenQuad(effect);
for (int i = 0; i < effect.Techniques.Count; i++)
{
for (int j = 0; j < effect.Techniques[i].Passes.Count; j++)
{
effect.Techniques[i].Passes[j].Apply();
}
}
GraphicsDevice.SetRenderTarget(null);
renOutput.GetData<float>(arrayOutput);
Buffer.BlockCopy(arrayOutput, 0, theBytes, 0, theBytes.Length);
}
但是在第二次迭代中我得到了一个错误
在 GraphicsDevice 上主动设置资源时,您不能在资源上调用 SetData。在调用 SetData 之前从设备取消设置它。
在线 t.SetData(theBytes);
编辑
我试过了
byte[] theBytes = TemplateMatch.Bytes;
for (int iters = 0; iters < 3; iters++)
{
t.SetData<byte>(theBytes);
GraphicsDevice.SetRenderTarget(renOutput);
effect.Parameters["Input0"].SetValue(t);
quad.RenderFullScreenQuad(effect);
for (int i = 0; i < effect.Techniques.Count; i++)
{
for (int j = 0; j < effect.Techniques[i].Passes.Count; j++)
{
effect.Techniques[i].Passes[j].Apply();
}
}
GraphicsDevice.SetRenderTarget(null);
renOutput.GetData<float>(arrayOutput);
Buffer.BlockCopy(arrayOutput, 0, theBytes, 0, theBytes.Length);
GraphicsDevice.Textures[0] = null;
}
这似乎修复了运行时错误,但这给了我相同的未过滤图像!
编辑
I've modified the above matlab code so that it runs in freemat. I've stepped through the code and I've found that the problem is related to the lines like this cN = exp(-(nablaN/kappa).^2); In the freemat version these do not evaluate to zeros (while my HLSL version does). This leads me to suspect that the problem is related to precision issues with HLSL or how I'm handling floating point arithmetic on the graphics card.