我正在尝试流水线化一个由 5 个乘法器和 5 个串联连接的加法器组成的模块。该模块是一个多项式计算器。在没有流水线的情况下,该模块到目前为止运行良好。
multipliers [31:0] m0,m1,m2,m3,m4; // separate module
adders [31:0] a0,a1,a2,a3,a4; // separate module
user_input [31:0] input; // register
constants [31:0] c0,c1,c2,c3,c4; // registers
pipeliners [31:0] p0,p1,p3,p4,p4; // pipelining resisters
wires [31:0] w0,w1,w2,w3,w4; // wires
没有流水线的结构如下所示,
[input]*[c0] => w0 => [w0]+[c1] => w1 => [w1]*[input] => w2 => [w2]+[c2] => w3 ... //goes on like this
由于它们都是串联的,因此关键路径由 10 个组件组成。
我实施的流水线想法如下,
[input]*[c0] => w0 => p0 => [p0]+[c1] => w1 => p1 => [p1]*[input] => w2=> p2 => [p2]+[c2] => w3 ... //goes on like this
我有一个错误,“不能由原语或连续赋值驱动。”这是由于 p0,p1,p3 ... 寄存器。将它们转换成线可以解决错误,但它们不再是寄存器。我使用iverilog 作为编译器。
我的问题是,如何进行流水线操作,以便使用尽可能少的时钟周期获得输出并解决错误?
******* 带代码的编辑版本 *******
`timescale 1ns / 1ps
module poly (
clk,
q,
result
);
input clk;
input [31:0] q; //user input
output [31:0] result;
reg [31:0] c,c0,c1,c2,c3,c4;
reg [31:0] p, p0, p1, p2, p3, p4, p5, p6,p7,p8,p9,p10,p11,p12;
always @(q)
if (q>=32'h08000000 && q<32'h0A000000) begin
c <= 32'h058B90C0;
c0 <= 32'h74599F60;
c1 <= 32'h79481740;
c2 <= 32'h445B7440;
c3 <= 32'h5AF892E0;
c4 <= 32'h9E2C2258;
end else if (q>=32'h0A000000 && q<32'h0C000000)
begin
c <= 32'h258B90C0;
c0 <= 32'hFB942240;
c1 <= 32'h21558EC0;
c2 <= 32'h5D882000;
c3 <= 32'h75F846E8;
c4 <= 32'hF48F5786;
end
wire [31:0] x0,x1,x2,x3,x4;
wire [31:0] y0,y1,y2,y3,y4;
multiplier m4 (.i_multiplicand(q),.i_multiplier(c4),.o_result(x4));
assign = x4;
adder a4 (.a(p0),.b(c3),.c(y4));
assign p1 = y4;
assign p2 = q;
multiplier m3 (.i_multiplicand(p2),.i_multiplier(p1),.o_result(x3));
assign p3 = x3;
adder a3 (.a(p3),.b(c2),.c(y3));
assign p4 = y3;
assign p5 = q;
multiplier m2 (.i_multiplicand(p5),.i_multiplier(p4),.o_result(x2));
assign p6 = x2;
adder a2 (.a(p6),.b(c1),.c(y2));
assign p7 = y2;
assign p8 = q;
multiplier m1 (.i_multiplicand(p8),.i_multiplier(p7),.o_result(x1));
assign p9 = x1;
adder a1 (.a(p9),.b(c0),.c(y1));
assign p10 = y1;
assign p11 = q;
adder a0 (.a(p10),.b(p11),.c(y0));
assign p12 = y0;
multiplier m0 (.i_multiplicand(p12),.i_multiplier(c),.o_result(x0));
assign result = x0;
endmodule