arrays - 如何在 DLX 汇编语言中放置具有多个数字的数组来表示矩阵

Question

我正在制作关于 Haar DWT 的汇编语言，如您所知，haar dwt 需要一个从图像表示的矩阵。我想要一种能够放置 64x64 或 128x128 甚至 256x256 矩阵的汇编语言。我怎么做？

我对这个组件很陌生，更不用说使用 DLX 微处理器了。我有用于计算 DWT 本身的汇编代码。但不知何故，我可以放入 haar_in 数组的最大矩阵仅为 4x4 矩阵/16 个值。我不知道是内存问题，还是我计算的大浮点对程序来说太重了，但我无法完成。我曾尝试更改 haar_out 的空间，即 M 和 N，但也没有用。

.data
  sqrt:
    .double 1.4142

  haar_in:
    .double 148, 142, 130, 168, 166, 117, 138, 120, 116, 118, 109, 136, 99, 89, 123, 120

  haar_out:
    .space 10000

  M:
    .word 4
  N:
    .word 4
  K:
    .word 1

; Register Usage
;   GP:
;     R0 => 0
;     R1 => M
;     R2 => N
;     R3.. => Internally used
;   FP:
;     F0 => 0
;     F8 => Square Root of 2
;     F2.. => Internally used
main:
  ld    f8, sqrt(r0)              ; F8 contains square root of 2
  lw    r1, M(r0)                   ; Get value of M in R1
  lw    r2, N(r0)                   ; Get value of N in R2
  addi  r3, r0, 0                   ; Set R3 to 0 as a counter for M
  addi  r4, r0, 0                   ; Set R4 to 0 as a counter for N

  ; Copy the haar_in to the haar_out array
copy_n_haar_out:
  sub   r2, r2, r4
  beqz  r2, n_loop_exit             ; Exit N-loop if counter reached N
  lw    r2, N(r0)                   ; Get value of N in R2
  slli  r4, r4, 3                   ; Align to the next pointer
  lw    r1, M(r0)                   ; Get value of M in R1
  addi  r3, r0, 0                   ; Set R3 to 0 as a counter for M
  copy_m_haar_out:
    sub   r1, r1, r3
    beqz  r1, m_loop_exit           ; Exit M-loop if counter reached M
    lw    r1, M(r0)                 ; Get value of M in R1
    slli  r3, r3, 3                 ; Align to the next pointer
    mult  r5, r4, r1                ; Form R5 = (j*m)
    add   r5, r5, r3                ;      R5 = i + j*m
    ld    f2, haar_in(r5)           ; Get the array from haar array
    sd    haar_out(r5), f2          ; Store it in internal array
    srli  r3, r3, 3                 ; Get the original count back
    addi  r3, r3, 1                 ; Increment the count by 1
    j     copy_m_haar_out           ; Loop back
  m_loop_exit:
  srli  r4, r4, 3                   ; Get the original count back
  addi  r4, r4, 1                   ; Increment the count by 1
  j     copy_n_haar_out             ; Loop back
n_loop_exit:
  lw    r1, M(r0)                   ; Get value of M in R1
  lw    r2, N(r0)                   ; Get value of N in R2

  ; Determine K, the largest power of 2 such that K <= M
  lw    r3, K(r0)                   ; Get the initial value of K in R3
k_less_than_m:
  slli  r5, r3, 1                   ; R5 = K*2
  slt   r4, r1, r5                  ; Set R4 if M <= K
  subi  r4, r4, 1                   ; Check if R4 is set
  beqz  r4, k_loop_m_exit           ; Exit loop if set
  sub   r4, r1, r3                  ; Check if K == M
  beqz  r4, k_loop_m_exit           ; Exit loop if equal
  slli  r3, r3, 1                   ; K = K*2
  j     k_less_than_m               ; Loop back
k_loop_m_exit:
  sw    K(r0), r3                   ; Store the value in K

  ; Transform all the columns now
  lw    r6, K(r0)                   ; Get the updated value of K in R6
  addi  r3, r0, 0                   ; Set R3 to 0 as a counter for K
  addi  r4, r0, 0                   ; Set R4 to 0 as a counter for N
col_transform:
  slti  r7, r6, 1                   ; Set R7 if K < 1
  bnez  r7, col_trans_exit          ; Exit Loop if R7 is set
  srli  r6, r6, 1                   ; K = K/2
  sw    K(r0), r6                   ; Store value of K

  ; Perform the actual transform
  col_trans_n:
    sub   r2, r2, r4
    beqz  r2, col_trans_n_exit        ; Exit N-loop if counter reached N
    lw    r2, N(r0)                   ; Get value of N in R2
    slli  r4, r4, 3                   ; Align to the next pointer
    lw    r6, K(r0)                   ; Get value of K in R6
    addi  r3, r0, 0                   ; Set R3 to 0 as a counter for M
    col_trans_k:
      sub   r6, r6, r3
      beqz  r6, col_trans_k_exit      ; Exit K-loop if counter reached K
      lw    r6, K(r0)                 ; Get value of K in R6
      slli  r3, r3, 3                 ; Align to the next pointer
      slli  r5, r3, 1                 ; Form R5 = (2*i)
      mult  r8, r4, r1                ; Form R8 = (j*m)
      add   r9, r5, r8                ;      R9 = (2*i) + (j*m)
      ld    f2, haar_in(r9)           ; Get the value from haar array in F2
      addi  r9, r9, 8                 ; Move to the next index
      ld    f6, haar_in(r9)           ; Get the next value from array in F6
      addd  f4, f2, f6                ; Add the results in F4
      divd  f4, f4, f8                ; F4 = F4/sqrt(2)
      subd  f10, f2, f6               ; Sub the results in F10
      divd  f10, f10, f8              ; F10 = F10/sqrt(2)
      add   r5, r3, r8                ; Form R5 = i + j*m
      sd    haar_out(r5), f4          ; Store the result in out array
      slli  r6, r6, 3                 ; Form the array index in K
      add   r5, r5, r6                ; Form R5 = (k+i+j*m)
      sd    haar_out(r5), f10         ; Store it in internal array
      srli  r6, r6, 3                 ; Get the original value back
      srli  r3, r3, 3                 ; Get the original count back
      addi  r3, r3, 1                 ; Increment the count by 1
      j     col_trans_k               ; Loop back
    col_trans_k_exit:
    srli  r4, r4, 3                   ; Get the original count back
    addi  r4, r4, 1                   ; Increment the count by 1
    j     col_trans_n                 ; Loop back
  col_trans_n_exit:

  j     col_transform
col_trans_exit:

这是我的 .data，所以每当我计算一个数字时，我都会将 dp 从 haar_in 加载到 FP 寄存器中，然后计算结果将存储在 haar_out 中。每当我放置超过 4x4 矩阵时，消息将是超时、溢出或非法数字。帮助..

arrays - 如何在 DLX 汇编语言中放置具有多个数字的数组来表示矩阵

0 回答 0

Related

Reference