我正在制作关于 Haar DWT 的汇编语言,如您所知,haar dwt 需要一个从图像表示的矩阵。我想要一种能够放置 64x64 或 128x128 甚至 256x256 矩阵的汇编语言。我怎么做?
我对这个组件很陌生,更不用说使用 DLX 微处理器了。我有用于计算 DWT 本身的汇编代码。但不知何故,我可以放入 haar_in 数组的最大矩阵仅为 4x4 矩阵/16 个值。我不知道是内存问题,还是我计算的大浮点对程序来说太重了,但我无法完成。我曾尝试更改 haar_out 的空间,即 M 和 N,但也没有用。
.data
sqrt:
.double 1.4142
haar_in:
.double 148, 142, 130, 168, 166, 117, 138, 120, 116, 118, 109, 136, 99, 89, 123, 120
haar_out:
.space 10000
M:
.word 4
N:
.word 4
K:
.word 1
; Register Usage
; GP:
; R0 => 0
; R1 => M
; R2 => N
; R3.. => Internally used
; FP:
; F0 => 0
; F8 => Square Root of 2
; F2.. => Internally used
main:
ld f8, sqrt(r0) ; F8 contains square root of 2
lw r1, M(r0) ; Get value of M in R1
lw r2, N(r0) ; Get value of N in R2
addi r3, r0, 0 ; Set R3 to 0 as a counter for M
addi r4, r0, 0 ; Set R4 to 0 as a counter for N
; Copy the haar_in to the haar_out array
copy_n_haar_out:
sub r2, r2, r4
beqz r2, n_loop_exit ; Exit N-loop if counter reached N
lw r2, N(r0) ; Get value of N in R2
slli r4, r4, 3 ; Align to the next pointer
lw r1, M(r0) ; Get value of M in R1
addi r3, r0, 0 ; Set R3 to 0 as a counter for M
copy_m_haar_out:
sub r1, r1, r3
beqz r1, m_loop_exit ; Exit M-loop if counter reached M
lw r1, M(r0) ; Get value of M in R1
slli r3, r3, 3 ; Align to the next pointer
mult r5, r4, r1 ; Form R5 = (j*m)
add r5, r5, r3 ; R5 = i + j*m
ld f2, haar_in(r5) ; Get the array from haar array
sd haar_out(r5), f2 ; Store it in internal array
srli r3, r3, 3 ; Get the original count back
addi r3, r3, 1 ; Increment the count by 1
j copy_m_haar_out ; Loop back
m_loop_exit:
srli r4, r4, 3 ; Get the original count back
addi r4, r4, 1 ; Increment the count by 1
j copy_n_haar_out ; Loop back
n_loop_exit:
lw r1, M(r0) ; Get value of M in R1
lw r2, N(r0) ; Get value of N in R2
; Determine K, the largest power of 2 such that K <= M
lw r3, K(r0) ; Get the initial value of K in R3
k_less_than_m:
slli r5, r3, 1 ; R5 = K*2
slt r4, r1, r5 ; Set R4 if M <= K
subi r4, r4, 1 ; Check if R4 is set
beqz r4, k_loop_m_exit ; Exit loop if set
sub r4, r1, r3 ; Check if K == M
beqz r4, k_loop_m_exit ; Exit loop if equal
slli r3, r3, 1 ; K = K*2
j k_less_than_m ; Loop back
k_loop_m_exit:
sw K(r0), r3 ; Store the value in K
; Transform all the columns now
lw r6, K(r0) ; Get the updated value of K in R6
addi r3, r0, 0 ; Set R3 to 0 as a counter for K
addi r4, r0, 0 ; Set R4 to 0 as a counter for N
col_transform:
slti r7, r6, 1 ; Set R7 if K < 1
bnez r7, col_trans_exit ; Exit Loop if R7 is set
srli r6, r6, 1 ; K = K/2
sw K(r0), r6 ; Store value of K
; Perform the actual transform
col_trans_n:
sub r2, r2, r4
beqz r2, col_trans_n_exit ; Exit N-loop if counter reached N
lw r2, N(r0) ; Get value of N in R2
slli r4, r4, 3 ; Align to the next pointer
lw r6, K(r0) ; Get value of K in R6
addi r3, r0, 0 ; Set R3 to 0 as a counter for M
col_trans_k:
sub r6, r6, r3
beqz r6, col_trans_k_exit ; Exit K-loop if counter reached K
lw r6, K(r0) ; Get value of K in R6
slli r3, r3, 3 ; Align to the next pointer
slli r5, r3, 1 ; Form R5 = (2*i)
mult r8, r4, r1 ; Form R8 = (j*m)
add r9, r5, r8 ; R9 = (2*i) + (j*m)
ld f2, haar_in(r9) ; Get the value from haar array in F2
addi r9, r9, 8 ; Move to the next index
ld f6, haar_in(r9) ; Get the next value from array in F6
addd f4, f2, f6 ; Add the results in F4
divd f4, f4, f8 ; F4 = F4/sqrt(2)
subd f10, f2, f6 ; Sub the results in F10
divd f10, f10, f8 ; F10 = F10/sqrt(2)
add r5, r3, r8 ; Form R5 = i + j*m
sd haar_out(r5), f4 ; Store the result in out array
slli r6, r6, 3 ; Form the array index in K
add r5, r5, r6 ; Form R5 = (k+i+j*m)
sd haar_out(r5), f10 ; Store it in internal array
srli r6, r6, 3 ; Get the original value back
srli r3, r3, 3 ; Get the original count back
addi r3, r3, 1 ; Increment the count by 1
j col_trans_k ; Loop back
col_trans_k_exit:
srli r4, r4, 3 ; Get the original count back
addi r4, r4, 1 ; Increment the count by 1
j col_trans_n ; Loop back
col_trans_n_exit:
j col_transform
col_trans_exit:
这是我的 .data,所以每当我计算一个数字时,我都会将 dp 从 haar_in 加载到 FP 寄存器中,然后计算结果将存储在 haar_out 中。每当我放置超过 4x4 矩阵时,消息将是超时、溢出或非法数字。帮助..