我正在使用 Omap3515(Arm Cortex A8)。启用 I-Cache、D-Cache、分支预测和 MMU。
如果我尝试将 600KB 的帧缓冲区从外部存储器区域复制到另一个外部存储器区域,我也会中止数据。
(如果我复制 100KB 少则没有中止。甚至有时复制 400KB 也没有数据中止。)数据中止后,我可以注意到 SDR 即 SDRAM 不可访问。
我以 PA=VA 的方式启用了 MMU。
如果我禁用 D-Cache,则不会中止并且工作正常。但我想启用 D-Cache 以加快访问速度。
这是代码
.arm
.sect EntryOnReset
;// Module Imports and exports
.global ResetHandler
.global RelocateImage
.global OasysEntry
.global BoardInit
.global __stack
.global __STACK_SIZE
.global NEONCopyPLD
.global EnableCaches
.global DisableCaches
.global MemmoryTest
.global core_init
;// Stack Size Definition.
UND_Stack_Size .EQU 0x00000100
SVC_Stack_Size .EQU 0x00001000 ;//Only main function stack is to be mentioned here.
ABT_Stack_Size .EQU 0x00000100
FIQ_Stack_Size .EQU 0x00000100
IRQ_Stack_Size .EQU 0x00000100
USR_Stack_Size .EQU 0x00000100
;//******************************************************************************
;// EQUATES
;//******************************************************************************
;// Standard definitions of Mode bits and Interrupt (I & F) flags in PSRs
Mode_USR .EQU 0x10
Mode_FIQ .EQU 0x11
Mode_IRQ .EQU 0x12
Mode_SVC .EQU 0x13
Mode_ABT .EQU 0x17
Mode_UND .EQU 0x1B
Mode_SYS .EQU 0x1F
I_Bit .EQU 0x80 ;// when I bit is set, IRQ is disabled
F_Bit .EQU 0x40 ;// when F bit is set, FIQ is disabled
;//******************************************************************************
;// EQUATES
;//******************************************************************************
FIQ_IRQ_DISABLE .EQU 0xC0 ;//Disable both FIQ and IRQ.
;//******************************************************************************
;// CODE GENERATION DIRECTIVES
;//******************************************************************************
;// Area Definition and Entry Point
;// Startup Code must be linked first at Address at which it expects to run.
;//------------------------------------------------------------------------------
;//******************************************************************************
;// Routine name : ENTRY
;// Description : Entry point for software.
;// Assumptions : <none>
;// Tainted registers : <none>
;// Functions called :
;// Low level
;// Requirements :
;//
;// 1. This routine shall initialize the Stack pointer.
;//
;// 2. This routine shall perform architecture specific
;// initializations by calling InitCPU routine.
;//******************************************************************************
ResetHandler:
;==================================================================
; Enable access to NEON/VFP by enabling access to Coprocessors 10 and 11.
; Enables Full Access i.e. in both privileged and non privileged modes
;==================================================================
MRC p15, #0, r0, c1, c0, #2 ; Read Coprocessor Access Control Register (CPACR)
ORR r0, r0, #(0xF << 20) ; Enable access to CP 10 & 11
MCR p15, #0, r0, c1, c0, #2 ; Write Coprocessor Access Control Register (CPACR)
ISB
;==================================================================
; Switch on the VFP and NEON hardware
;=================================================================
MOV r0, #0x40000000
VMSR FPEXC, r0 ; Write FPEXC register, EN bit set
;@ Disable MMU.
MRC p15, #0, r1, c1, c0, #0 ;@ Read Control Register configuration data.
BIC r1, r1, #0x1
MCR p15, #0, r1, c1, c0, #0 ;@ Write Control Register configuration data.
;@ Disable L1 Caches.
MRC p15, #0, r1, c1, c0, #0 ;@ Read Control Register configuration data.
BIC r1, r1, #(0x1 << 12) ;@ Disable I Cache.
BIC r1, r1, #(0x1 << 2) ;@ Disable D Cache.
MCR p15, #0, r1, c1, c0, #0 ;@ Write Control Register configuration data
;@ Invalidate L1 Caches.
;@ Invalidate Instruction cache.
MOV r1, #0
MCR p15, #0, r1, c7, c5, #0
;@ Invalidate Data cache.
;@ To make the code general purpose, calculate the
;@ cache size first and loop through each set + way.
MRC p15, #1, r0, c0, c0, #0 ;@ Read Cache Size ID.
;TBR LDR r3, #0x1FF
MOV r3, #0x1FF
AND r0, r3, r0, LSR #13 ;@ r0 = no. of sets - 1.
MOV r1, #0 ;@ r1 = way counter way_loop.
way_loop:
MOV r3, #0 ;@ r3 = set counter set_loop.
set_loop:
MOV r2, r1, LSL #30
;TBR ORR r2, r3, LSL #5 ;@ r2 = set/way cache operation format.
ORR r2, r2, r3, LSL #5 ;@ r2 = set/way cache operation format.
MCR p15, #0, r2, c7, c6, #2 ;@ Invalidate the line described by r2.
ADD r3, r3, #1 ;@ Increment set counter.
CMP r0, r3 ;@ Last set reached yet?
BGT set_loop ;@ If not, iterate set_loop,
ADD r1, r1, #1 ;@ else, next.
CMP r1, #4 ;@ Last way reached yet?
BNE way_loop ;@ if not, iterate way_loop.
;@ Invalidate TLB
MCR p15, #0, r1, c8, c7, #0
;@ Branch Prediction Enable.
MOV r1, #0
MRC p15, #0, r1, c1, c0, #0 ;@ Read Control Register configuration data.
ORR r1, r1, #(0x1 << 11) ;@ Global BP Enable bit.
MCR p15, #0, r1, c1, c0, #0 ;@ Write Control Register configuration data.
;@ Enable D-side Prefetch
MRC p15, #0, r1, c1, c0, #1 ;@ Read Auxiliary Control Register.
ORR r1, r1, #(0x1 <<2) ;@ Enable D-side prefetch.
MCR p15, #0, r1, c1, c0, #1; ;@ Write Auxiliary Control Register.
DSB
ISB
;@ DSB causes completion of all cache maintenance operations appearing in program
;@ order before the DSB instruction.
;@ An ISB instruction causes the effect of all branch predictor maintenance
;@ operations before the ISB instruction to be visible to all instructions
;@ after the ISB instruction.
;@ Initialize PageTable.
;@ It would be faster to create this in a read-only section in an assembly file.
;@ descriptor.
LDR r1, tlb_l1_base
MOV r2, #0
MOV r4, #1
write_pte
MOV r0, #0x0E
ORR r0, r0, r4, LSL #0xA
ORR r0, r0, r4, LSL #0xB
ORR r0, r0, r2, LSL #20
STR r0, [r1]
ADD r1, r1, #4
ADD r2, r2, #1 ;@ Decrement loop counter.
SUBS r3, r2, #4096
BNE write_pte
;@ Initialize MMU.
MOV r1,#0x0
MCR p15, #0, r1, c2, c0, #2 ;@ Write Translation Table Base Control Register.
LDR r1, tlb_l1_base
MCR p15, #0, r1, c2, c0, #0 ;@ Write Translation Table Base Register 0.
;@ In this simple example, do not use TRE or Normal Memory Remap Register.
;@ Set all Domains to Manger.
MOV r1, #0xFFFF ; Provied Manager access, so access premition bits (AP) shall not be checked.
ORR r1, r1, r1, LSL #0x10
MCR p15, #0, r1, c3, c0, #0 ;@ Write Domain Access Control Register.
;@ Enable MMU
MRC p15, #0, r1, c1, c0, #0 ;@ Read Control Register configuration data.
ORR r1, r1, #0x1 ;@ Bit 0 is the MMU enable.
MCR p15, #0, r1, c1, c0, #0 ;@ Write Control Register configuration data.
tlb_l1_base .word 0x40200000
;//******************************************************************************
;// SETUP STACK POINTERS FOR USR MODE
;//******************************************************************************
;*------------------------------------------------------
;* INITIALIZE THE USER MODE STACK
;*------------------------------------------------------
StackInit:
LDR sp, c_stack
LDR r0, c_STACK_SIZE
ADD sp, sp, r0
;*-----------------------------------------------------
;* Clear upper 3 bits for 64-bit alignment.
;*-----------------------------------------------------
BIC sp, sp, #0x07
;// Set IRQ and FIQ bits in CPSR to disable all interrupts.
MRS R0, CPSR
STMFD SP!, {R0} ;// Store it onto stack
ORR R1, R0, #FIQ_IRQ_DISABLE
MSR CPSR_c, R1
;//******************************************************************************
;// SETUP STACK FOR OTHER MODES
;//******************************************************************************
MOV R0, SP
;// Enter Undefined Instruction Mode and set its Stack Pointer
MSR CPSR_c, #Mode_UND|I_Bit|F_Bit
MOV SP, R0
SUB R0, R0, #UND_Stack_Size
;// Enter Abort Mode and set its Stack Pointer
MSR CPSR_c, #Mode_ABT|I_Bit|F_Bit
MOV SP, R0
SUB R0, R0, #ABT_Stack_Size
;// Enter FIQ Mode and set its Stack Pointer
MSR CPSR_c, #Mode_FIQ|I_Bit|F_Bit
MOV SP, R0
SUB R0, R0, #FIQ_Stack_Size
;// Enter IRQ Mode and set its Stack Pointer
MSR CPSR_c, #Mode_IRQ|I_Bit|F_Bit
MOV SP, R0
SUB R0, R0, #IRQ_Stack_Size
;// Enter Supervisor Mode and set its Stack Pointer
MSR CPSR_c, #Mode_SVC|I_Bit|F_Bit
MOV SP, R0
SUB R0, R0, #SVC_Stack_Size
MSR CPSR_c, #Mode_SVC|I_Bit
;//******************************************************************************
;// MOVE TO myfunc
;//******************************************************************************
;// The following routine copies the loaded image to execution region.
BL RelocateImage
;// The following routine initialises the Omap3515.
BL BoardInit
;// The following routine enaables the MMU.
;BL EnableMMU
;// The following routine enables the I cache.
BL EnableICaches
;// The following routine enables branch prediction.
BL EnableBrachPrediction
;// The following routine enables the D cache.
BL EnableDCaches
;// The following routine enables the L2 cache.
;BL EnableL2UnifiedCache
;// The following routine starts the OS.
BL MemmoryTest
;===================================================================
; Enable MMU and Branch to __main
; Leaving the caches disabled until after scatter loading.
;===================================================================
.global EnableMMU
;******************************************************************************
; c1, Control Register
; [0] M bit Banked Enables the MMU:
; 0 = MMU disabled, reset value
; 1 = MMU enabled.
;******************************************************************************
EnableMMU:
;Read the c1 register
mrc p15, #0, r0, c1, c0, #0
;Set b1 - Enables the MMU
orr r0, r0, #0x1
; Write back to c1 register to enable MMU
mcr p15, #0, r0, c1, c0, #0
BX lr
.global EnableICaches
;==================================================================
; This API enables instruction cache.
;==================================================================
EnableICaches:
MRC p15, #0, r0, c1, c0, #0 ; Read System Control Register
ORR r0, r0, #(0x1 << 12) ; Set I bit 12 to enable I Cache
;BIC r0, r0, #(0x1 <<12) ; Clear bit 0
MCR p15, #0, r0, c1, c0, #0 ; Write System Control Register
BX lr
.global EnableDCaches
;==================================================================
; This API enables data cache.
;==================================================================
EnableDCaches:
MRC p15, #0, r0, c1, c0, #0 ; Read System Control Register
ORR r0, r0, #(0x1 << 2) ; Set C bit 2 to enable D Cache
;BIC r0, r0, #(0x1 << 2) ; Clear bit 0
;BIC r0, r0, #(0x1 << 1) ; disable alignment checks
MCR p15, #0, r0, c1, c0, #0 ; Write System Control Register
BX lr
.global EnableL2UnifiedCache
;==================================================================
; Enable Cortex-A8 Level2 Unified Cache
;==================================================================
EnableL2UnifiedCache:
MRC p15, #0, r0, c1, c0, #1 ; Read Auxiliary Control Register
ORR r0, r0, #2 ; L2EN bit, enable L2 cache
;BIC r0, r0, #(0x1 << 1) ; L2EN bit, disable L2 cache
;ORR r0, r0, #(0x1 << 4) ;Enables speculative accesses on AXI
ORR r0, r0, #(0x1 << 4) ;Enables speculative accesses on AXI
ORR r0, r0, #(0x1 << 5) ;Enables caching NEON data within the L1 data cache
MCR p15, #0, r0, c1, c0, #1 ; Write Auxiliary Control Register
BX lr
.global EnableBrachPrediction
;==================================================================
; This API enables branch prediction
;==================================================================
EnableBrachPrediction:
MRC p15, #0, r0, c1, c0, #0 ; Read System Control Register
ORR r0, r0, #(0x1 << 11) ; Set Z bit 11 to enable branch prediction
;BIC r0, r0, #(0x1 << 11) ; Disable all forms of branch prediction
MCR p15, #0, r0, c1, c0, #0 ; Write System Control Register
BX lr
c_stack: .long __stack
c_STACK_SIZE: .long __STACK_SIZE
;//******************************************************************************
;// POINTERS TO VARIABLES
;//******************************************************************************
; ENDIF
.END
谢谢和问候, 克里什