floating-point - VHDL 代码无法与测试平台正确连接

Question

我正在尝试用组合逻辑制作一个 32 位浮点乘法器。据我所知，我已经这样做了，除非我尝试在其中模拟我的测试台。如果我用我的代码进行模拟，我会在所有值中得到 U。如果我取出我的代码并进行模拟，我会得到我的输入向量。我知道我还没有完全标准化我的尾数。我完全不明白为什么。一切都完美编译。我遇到的唯一问题是在模拟测试台时获取值......一切都在正确调用一切。任何帮助将非常感激。

请注意测试台确实可以工作，我已经用其他确实返回正确值的 VHDL 对其进行了模拟

这是我的乘数代码：

library IEEE                   ;
use IEeE.std_logic_1164.all    ;
use ieee.std_logic_arith.all   ;
use ieee.numeric_std.all       ;
Use Ieee.std_logic_unsigned.all; 

entity FP_MULTIPLIER is
    port( A_VAL, B_VAL : in std_logic_vector (31 downto 0);
            F_VAL : out std_logic_vector (31 downto 0));
end FP_MULTIPLIER;--

architecture Behavioral of FP_MULTIPLIER is
signal exponant : std_logic_vector (8 downto 0);
signal holder : std_logic_vector (47 downto 0);
signal PPA1 : std_logic_vector (47 downto 0);
signal PPA2 : std_logic_vector (47 downto 0);
signal PPA3 : std_logic_vector (47 downto 0);
signal PPA4 : std_logic_vector (47 downto 0);
signal PPA5 : std_logic_vector (47 downto 0);
signal PPA6 : std_logic_vector (47 downto 0);
signal PPA7 : std_logic_vector (47 downto 0);
signal PPA8 : std_logic_vector (47 downto 0);
signal PPA9 : std_logic_vector (47 downto 0);
signal AD1  : std_logic_vector (47 downto 0);
signal AD2  : std_logic_vector (47 downto 0);
signal AD3  : std_logic_vector (47 downto 0);
signal almost:std_logic_vector (23 downto 0);
signal F0:std_logic_vector (47 downto 0);
signal F1:std_logic_vector (47 downto 0);
constant ZERO : std_logic_vector := "000000000000000000000000000000000000000000000000";     --zero vektir for imputs

type temp_MP is array (6 downto 1) of std_logic_vector (47 downto 0);
signal MP :temp_MP;
type temp_PPA is array (23 downto 0) of std_logic_vector (47 downto 0);                         --creates 24 vectir array of 49 length
signal temp :temp_PPA := (others => (others => '0'));
begin

exponant(8) <= A_VAL(31) xor B_VAL(31);                                                         --gets sign bit
exponant (7 downto 0) <= ((A_VAL (30 downto 23) + B_VAL (30 downto 23)) - 127 );                --gets exponant

partial : for I in 0 to 23 generate
    temp(I) (23+I downto I) <= '1'& A_VAL(22 downto 0) WHEN B_VAL(I)='1' ELSE                   --fills vector array and shifts it properly
                                    (others => '0');
end generate partial;


RRU1 : entity work.RRU7_3 port map (temp(0),temp(1),temp(2),temp(3),temp(4),temp(5),temp(6),PPA1,PPA2,PPA3);
RRU2 : entity work.RRU7_3 port map (temp(7),temp(8),temp(9),temp(10),temp(11),temp(12),temp(13),PPA4,PPA5,PPA6);
RRU3 : entity work.RRU7_3 port map (temp(14),temp(15),temp(16),temp(17),temp(18),temp(19),temp(20),PPA7,PPA8,PPA9);
RRU4 : entity work.RRU7_3 port map (temp(21),temp(22),temp(23),ZERO,PPA7,PPA8,PPA9,MP(1),MP(2),MP(3));
RRU5 : entity work.RRU7_3 port map (PPA1,PPA2,PPA3,PPA4,PPA5,PPA6,ZERO,MP(4),MP(5),MP(6));
RRU6 : entity work.RRU7_3 port map (MP(1),MP(2),MP(3),MP(4),MP(5),MP(6),ZERO,AD1,AD2,AD3);
RRU32: entity work.RRU3_2 port map (AD1,AD2,AD3,F1,F0);

holder <= F1+ F0;    --implement with CSA?

almost <=   holder(46 downto 22) WHEN holder(47)='1' ELSE  --fix for rounding and radix point
                holder(45 downto 21);

F_VAL <= exponant & almost;

end Behavioral;

这是我的测试台的代码：

library IEEE;
use IEEE.STD_LOGIC_1164.all;
use STD.TEXTIO.all;
use IEEE.STD_LOGIC_TEXTIO.all;

entity ATB_FPM is
end entity ATB_FPM;

architecture ATB_FPM of ATB_FPM is

component FP_MULTIPLIER is
  port (
    A_VAL : in  STD_LOGIC_VECTOR ( 31 downto 0 );
    B_VAL : in  STD_LOGIC_VECTOR ( 31 downto 0 );
    F_VAL : out STD_LOGIC_VECTOR ( 31 downto 0 )
  );
end component FP_MULTIPLIER;

type F_VALS_ARRAY is array ( 0 to 15 ) of STD_LOGIC_VECTOR ( 31 downto 0 );
constant A_VALS : F_VALS_ARRAY := (
  X"3F800000", X"3F800000", X"3FA33333", X"38D1B717",
  X"41160000", X"41020831", X"43000000", X"40300000",
  X"47129320", X"41B80A3D", X"42C80000", X"46EC8E00",
  X"49791900", X"45800000", X"46733D52", X"390164EF"
);
constant B_VALS : F_VALS_ARRAY := (
  X"40000000", X"449F6000", X"42C80000", X"3951B717",
  X"460CAF00", X"42F61EB8", X"43800000", X"46210100",
  X"47000600", X"3B8B4396", X"43480000", X"4641BC00",
  X"3C4A42AF", X"46000000", X"44FC8666", X"3959945B"
);

signal X_A_VAL : STD_LOGIC_VECTOR ( 31 downto 0 );
signal X_B_VAL : STD_LOGIC_VECTOR ( 31 downto 0 );
signal X_F_VAL : STD_LOGIC_VECTOR ( 31 downto 0 );

begin
UUT: FP_MULTIPLIER
  port map (
    A_VAL => X_A_VAL,
    B_VAL => X_B_VAL,
    F_VAL => X_F_VAL
  );

process
variable DLAY : TIME;
constant T50N : TIME := 50 ns;
file OUT_FILE : TEXT open WRITE_MODE is "results.txt";
variable BUF  : LINE;
constant SP2  : STRING( 1 to 2 ) := "  ";
constant SP4  : STRING( 1 to 4 ) := "    ";
constant HDR  : STRING( 1 to 38 ) := "  A_VAL     B_VAL       F_VAL     TIME";
variable LNO  : INTEGER := 1;
begin
   for I in 0 to 15 loop
     if LNO = 1 then
       LNO := LNO + 1;
       write ( BUF, HDR );
       writeline ( OUT_FILE, BUF );
     elsif LNO = 25 then
       LNO := 1;
     else
       LNO := LNO + 1;
     end if;
     X_A_VAL <= A_VALS(I) ;    X_B_VAL <= B_VALS(I);
     wait for 50 ns;
     DLAY := T50N - X_F_VAL'LAST_EVENT; 
   hwrite ( BUF, X_A_VAL );  write ( BUF, SP2 );
   hwrite ( BUF, X_B_VAL );  write ( BUF, SP4 );
   hwrite ( BUF, X_F_VAL );  write ( BUF, SP4 );
   write  ( BUF, DLAY    );
   writeline ( OUT_FILE, BUF );
   end loop;
end process;

end architecture ATB_FPM;

应大众需求，这是我的 RRU7to3；

library IEEE;
use IEEE.STD_LOGIC_1164.all;

entity RRU7_3 is
  port (
    A_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    B_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    C_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    D_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    E_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    F_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    G_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    F2_VEC : out STD_LOGIC_VECTOR ( 47 downto 0 );
    F1_VEC : out STD_LOGIC_VECTOR ( 47 downto 0 );
    F0_VEC : out STD_LOGIC_VECTOR ( 47 downto 0 )
  );
end entity RRU7_3;

architecture WASTEFUL of RRU7_3 is

component WTA7_3 is
  port (
    A  : in  STD_LOGIC;
    B  : in  STD_LOGIC;
    C  : in  STD_LOGIC;
    D  : in  STD_LOGIC;
    E  : in  STD_LOGIC;
    F  : in  STD_LOGIC;
    G  : in  STD_LOGIC;
    F0 : out STD_LOGIC;
    F1 : out STD_LOGIC;
    F2 : out STD_LOGIC
  );
end component WTA7_3;

signal ROW2 : STD_LOGIC_VECTOR ( 65 downto 0 );
signal ROW1 : STD_LOGIC_VECTOR ( 65 downto 0 );
signal ROW0 : STD_LOGIC_VECTOR ( 65 downto 0 );

begin

GEN_LABEL: for I in 0 to 47 generate
  WTAUNIT: WTA7_3
    port map (
      A  => A_VEC(I),
      B  => B_VEC(I),
      C  => C_VEC(I),
      D  => D_VEC(I),
      E  => E_VEC(I),
      F  => F_VEC(I),
      G  => G_VEC(I),
      F0 => ROW0(I),
      F1 => ROW1(I+1),
      F2 => ROW2(I+2)
  );
end generate;

  F0_VEC <= ROW0( 47 downto 0 );
  F1_VEC <= ROW1( 47 downto 1 ) & '0';
  F2_VEC <= ROW2( 47 downto 2 ) & "00";

end architecture WASTEFUL;

现在这里是 RRU3to2

library IEEE;
use IEEE.STD_LOGIC_1164.all;

entity RRU3_2 is
  port (
    A_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    B_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    C_VEC  : in  STD_LOGIC_VECTOR ( 47 downto 0 );
    F1_VEC : out STD_LOGIC_VECTOR ( 47 downto 0 );
    F0_VEC : out STD_LOGIC_VECTOR ( 47 downto 0 )
  );
end entity RRU3_2;

architecture WASTEFUL of RRU3_2 is

component CSADDER is
  generic (
    G_DELAY : TIME := 1 ns
  );
  port (
    A : in  STD_LOGIC;
    B : in  STD_LOGIC;
    C : in  STD_LOGIC;
    F0: out STD_LOGIC;
    F1: out STD_LOGIC
  );
end component CSADDER;

signal ROW1 : STD_LOGIC_VECTOR ( 48 downto 0 );
signal ROW0 : STD_LOGIC_VECTOR ( 48 downto 0 );

begin

GEN_LABEL: for I in 0 to 47 generate
  WTAUNIT: CSADDER
    port map (
      A  => A_VEC(I),
      B  => B_VEC(I),
      C  => C_VEC(I),
      F0 => ROW0(I),
      F1 => ROW1(I+1)
  );
end generate;

  F0_VEC <= ROW0( 47 downto 0 );
  F1_VEC <= ROW1( 47 downto 1 ) & '0';

end architecture WASTEFUL;

我还编辑了我上面的 FP_Multiplier 以反映为便于阅读而建议的更改（测试和编译仍然很好，根本没有值。）

这是模拟的图像：

模拟只生产 U 并且拒绝运行

score 1 · Accepted Answer

这个实现不是为了容易理解而写的，并且有很多可能的改进。

但我怀疑最重要的一点是：据我所知，您只分配了名为“temp”的数组中每个部分产品的一部分，而将每个部分产品的其余部分分配为“UUUU”。我们只能保证 RRU 单元“工作”，并且无法判断它们是否将这些“UUU”输入值解析为输出上的“UUU”；他们似乎很可能是。建议您将定义的值分配给每个“temp”值的所有值。这可能足以解决困难，但只有你才能知道，除非你用缺失的信息更新问题。

进一步的改进：

为什么不将部分产品数组称为partial_products而不是temp？
为什么不调用符号位sign而不是temp_man(8)？同上指数，而a_mantissa 不是'1'& A_VAL(22 downto 0)
正如 vermaete 所说，使用标准库numeric_std而不是 std_logic_arith 和 std_logic_unsigned （这可能相互矛盾）。然后将您的未签名数量（如部分产品）声明为“未签名”而不是 slv。
除非您打算修改ZERO(!) 的值，否则将其声明为常量而不是信号（并且该(others => '0')表单可以节省所有这些数字的计数！）。
请注意，holder您分配给的两个切片的almost大小不同。如果，如您所说，在这种状态下“一切都完美编译”，请针对您正在使用的工具提交错误报告，并在此处发布其名称和版本，以便我们避免使用它。

score 0 · Accepted Answer

您的测试平台应该至少驱动 FB_MULTIPIER 组件（又名DUT）的输入。在更好的情况下，它还将读取 DUT 的输出并检查它们是否符合预期。

在您的测试台中，我找不到您为 X_A_VAL 和 X_B_VAL 赋值的地方。如果您不驱动 std_logic，它将处于未初始化状态 ('U')。

作为旁注：

use ieee.std_logic_arith.all   ;
use ieee.numeric_std.all       ;
Use Ieee.std_logic_unsigned.all;

是旧式和新式库的混合体。您可以在VHDL FAQ中找到有关它的更多信息。但我建议你不要这样做，只使用use ieee.numeric_std.all

score 0 · Accepted Answer

向量数组与他们正在处理的函数不匹配存在一个稍微复杂的问题。解决这个问题，并取出向量被“填充”为零的位置；代码现在可以工作了。请注意，它尚未对数字进行标准化， 尽管它确实为某个点提供了一个有点准确的数字。

对于那些好奇的人，这里有一些工作描述

library IEEE                   ;
use IEeE.std_logic_1164.all    ;
use ieee.std_logic_arith.all   ;
use ieee.numeric_std.all       ;
Use Ieee.std_logic_unsigned.all; 

entity FP_MULTIPLIER is
    port( A_VAL, B_VAL : in std_logic_vector (31 downto 0);
            F_VAL : out std_logic_vector (31 downto 0));
end FP_MULTIPLIER;--

architecture Behavioral of FP_MULTIPLIER is
signal exponant : std_logic_vector (8 downto 0);
signal holder : std_logic_vector (47 downto 0);
signal PPA1 : std_logic_vector (47 downto 0);
signal PPA2 : std_logic_vector (47 downto 0);
signal PPA3 : std_logic_vector (47 downto 0);
signal PPA4 : std_logic_vector (47 downto 0);
signal PPA5 : std_logic_vector (47 downto 0);
signal PPA6 : std_logic_vector (47 downto 0);
signal PPA7 : std_logic_vector (47 downto 0);
signal PPA8 : std_logic_vector (47 downto 0);
signal PPA9 : std_logic_vector (47 downto 0);
signal AD1  : std_logic_vector (47 downto 0);
signal AD2  : std_logic_vector (47 downto 0);
signal AD3  : std_logic_vector (47 downto 0);
signal almost:std_logic_vector (23 downto 0);
signal there: std_logic_vector (23 downto 0);
signal F0:std_logic_vector (47 downto 0);
signal F1:std_logic_vector (47 downto 0);
constant ZERO : std_logic_vector := "000000000000000000000000000000000000000000000000";     --zero vektir for imputs

type temp_MP is array (6 downto 1) of std_logic_vector (47 downto 0);
signal MP :temp_MP;
type temp_PPA is array (23 downto 0) of std_logic_vector (47 downto 0);                         --creates 24 vectir array of 49 length
signal temp :temp_PPA := (others => (others => '0'));
begin

exponant(8) <= A_VAL(31) xor B_VAL(31);                                                         --gets sign bit
exponant (7 downto 0) <= ((A_VAL (30 downto 23) + B_VAL (30 downto 23)) - 127 );                --gets exponant

partial : for I in 0 to 23 generate
    temp(I) (23+I downto I) <= '1'& A_VAL(22 downto 0) WHEN B_VAL(I)='1' ELSE                   --fills vector array and shifts it properly
                                    (others => '0');
end generate partial;


RRU1 : entity work.RRU7_3 port map (temp(0),temp(1),temp(2),temp(3),temp(4),temp(5),temp(6),PPA1,PPA2,PPA3);
RRU2 : entity work.RRU7_3 port map (temp(7),temp(8),temp(9),temp(10),temp(11),temp(12),temp(13),PPA4,PPA5,PPA6);
RRU3 : entity work.RRU7_3 port map (temp(14),temp(15),temp(16),temp(17),temp(18),temp(19),temp(20),PPA7,PPA8,PPA9);
RRU4 : entity work.RRU7_3 port map (temp(21),temp(22),temp(23),ZERO,PPA7,PPA8,PPA9,MP(1),MP(2),MP(3));
RRU5 : entity work.RRU7_3 port map (PPA1,PPA2,PPA3,PPA4,PPA5,PPA6,ZERO,MP(4),MP(5),MP(6));
RRU6 : entity work.RRU7_3 port map (MP(1),MP(2),MP(3),MP(4),MP(5),MP(6),ZERO,AD1,AD2,AD3);
RRU32: entity work.RRU3_2 port map (AD1,AD2,AD3,F1,F0);

holder <= F1+ F0;

almost <=   holder(46 downto 23) WHEN holder(47)='1' ELSE
                holder(47 downto 24);

F_VAL <= exponant & almost (22 downto 0);

end Behavioral;

floating-point - VHDL 代码无法与测试平台正确连接

3 回答 3

Related

Reference